diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py index 1a1cf83d..bb3ac99a 100644 --- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py @@ -11,6 +11,7 @@ from ..settings import MOONSTREAM_MOONWORM_TASKS_JOURNAL, bugout_client from .continuous_crawler import _retry_connect_web3, continuous_crawler from .crawler import ( SubscriptionTypes, + blockchain_type_to_subscription_type, get_crawl_job_entries, make_event_crawl_jobs, make_function_call_crawl_jobs, @@ -23,9 +24,13 @@ logger = logging.getLogger(__name__) def handle_crawl(args: argparse.Namespace) -> None: + blockchain_type = AvailableBlockchainType(args.blockchain_type) + + logger.info(f"Blockchain type: {blockchain_type.value}") + initial_event_jobs = make_event_crawl_jobs( get_crawl_job_entries( - SubscriptionTypes.POLYGON_BLOCKCHAIN, + blockchain_type_to_subscription_type(blockchain_type), "event", MOONSTREAM_MOONWORM_TASKS_JOURNAL, ) @@ -34,7 +39,7 @@ def handle_crawl(args: argparse.Namespace) -> None: initial_function_call_jobs = make_function_call_crawl_jobs( get_crawl_job_entries( - SubscriptionTypes.POLYGON_BLOCKCHAIN, + blockchain_type_to_subscription_type(blockchain_type), "function", MOONSTREAM_MOONWORM_TASKS_JOURNAL, ) @@ -42,12 +47,6 @@ def handle_crawl(args: argparse.Namespace) -> None: logger.info( f"Initial function call crawl jobs count: {len(initial_function_call_jobs)}" ) - - # Couldn't figure out how to convert from string to AvailableBlockchainType - # AvailableBlockchainType(args.blockchain_type) is not working - blockchain_type = AvailableBlockchainType(args.blockchain_type) - - logger.info(f"Blockchain type: {blockchain_type.value}") with yield_db_session_ctx() as db_session: web3: Optional[Web3] = None if args.web3 is None: diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py index 06fd5b3a..7e9cf70e 100644 --- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py @@ -154,12 +154,14 @@ def get_crawl_job_entries( offset=current_offset, limit=limit, ) + entries.extend(search_result.results) # if len(entries) >= search_result.total_results: if len(search_result.results) == 0: break current_offset += limit + return entries @@ -217,7 +219,15 @@ def make_function_call_crawl_jobs( created_at=int(datetime.fromisoformat(entry.created_at).timestamp()), ) else: - crawl_job_by_address[contract_address].contract_abi.append(json.loads(abi)) + old_selectors = [ + encode_function_signature(function_abi) + for function_abi in crawl_job_by_address[contract_address].contract_abi + ] + new_selector = encode_function_signature(json.loads(abi)) + if new_selector not in old_selectors: + crawl_job_by_address[contract_address].contract_abi.append( + json.loads(abi) + ) return [crawl_job for crawl_job in crawl_job_by_address.values()] @@ -283,6 +293,9 @@ def merge_function_call_crawl_jobs( ) break else: + # No old job with new job address was found + # This else is intended for `for` + # https://book.pythontips.com/en/latest/for_-_else.html old_crawl_jobs.append(new_crawl_job) return old_crawl_jobs