Moonworm crawler optimized to work with access id

2022-05-16 11:53:35 +00:00 · 2022-05-16 11:53:35 +00:00 · 7a3b7c15d7
commit 7a3b7c15d7
--- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py
+++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py
@ -1,13 +1,14 @@
 import argparse
 import logging
 from typing import Optional
+from uuid import UUID

 from moonstreamdb.db import yield_db_session_ctx
 from web3 import Web3
 from web3.middleware import geth_poa_middleware

 from ..blockchain import AvailableBlockchainType
-from ..settings import MOONSTREAM_MOONWORM_TASKS_JOURNAL, bugout_client
+from ..settings import MOONSTREAM_MOONWORM_TASKS_JOURNAL, NB_CONTROLLER_ACCESS_ID
 from .continuous_crawler import _retry_connect_web3, continuous_crawler
 from .crawler import (
    SubscriptionTypes,
@ -54,7 +55,7 @@ def handle_crawl(args: argparse.Namespace) -> None:
            logger.info(
                "No web3 provider URL provided, using default (blockchan.py: connect())"
            )
-            web3 = _retry_connect_web3(blockchain_type)
+            web3 = _retry_connect_web3(blockchain_type, access_id=args.access_id)
        else:
            logger.info(f"Using web3 provider URL: {args.web3}")
            web3 = Web3(
@ -109,11 +110,21 @@ def handle_crawl(args: argparse.Namespace) -> None:
            args.min_sleep_time,
            args.heartbeat_interval,
            args.new_jobs_refetch_interval,
+            access_id=args.access_id,
        )


 def main() -> None:
    parser = argparse.ArgumentParser()
+    parser.set_defaults(func=lambda _: parser.print_help())
+
+    parser.add_argument(
+        "--access-id",
+        default=NB_CONTROLLER_ACCESS_ID,
+        type=UUID,
+        help="User access ID",
+    )
+
    subparsers = parser.add_subparsers()

    crawl_parser = subparsers.add_parser("crawl")
--- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/continuous_crawler.py
+++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/continuous_crawler.py
@ -3,6 +3,7 @@ import time
 import traceback
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Tuple
+from uuid import UUID

 from moonworm.crawler.moonstream_ethereum_state_provider import (  # type: ignore
    MoonstreamEthereumStateProvider,
@ -85,6 +86,7 @@ def _retry_connect_web3(
    blockchain_type: AvailableBlockchainType,
    retry_count: int = 10,
    sleep_time: float = 5,
+    access_id: Optional[UUID] = None,
 ) -> Web3:
    """
    Retry connecting to the blockchain.
@ -92,7 +94,7 @@ def _retry_connect_web3(
    while retry_count > 0:
        retry_count -= 1
        try:
-            web3 = connect(blockchain_type)
+            web3 = connect(blockchain_type, access_id=access_id)
            web3.eth.block_number
            logger.info(f"Connected to {blockchain_type}")
            return web3
@ -121,6 +123,7 @@ def continuous_crawler(
    min_sleep_time: float = 0.1,
    heartbeat_interval: float = 60,
    new_jobs_refetch_interval: float = 120,
+    access_id: Optional[UUID] = None,
 ):
    crawler_type = "continuous"
    assert (
@ -139,7 +142,7 @@ def continuous_crawler(

    jobs_refetchet_time = crawl_start_time
    if web3 is None:
-        web3 = _retry_connect_web3(blockchain_type)
+        web3 = _retry_connect_web3(blockchain_type, access_id=access_id)

    network = (
        Network.ethereum
@ -281,7 +284,7 @@ def continuous_crawler(
                    logger.error("Too many failures, exiting")
                    raise e
                try:
-                    web3 = _retry_connect_web3(blockchain_type)
+                    web3 = _retry_connect_web3(blockchain_type, access_id=access_id)
                except Exception as err:
                    logger.error(f"Failed to reconnect: {err}")
                    logger.exception(err)