diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py index 1a1cf83d..05540cfa 100644 --- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py @@ -175,7 +175,7 @@ def main() -> None: "--min-sleep-time", "-t", type=float, - default=0.01, + default=0.1, help="Minimum time to sleep between crawl step", ) diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/continuous_crawler.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/continuous_crawler.py index e2c70742..17084515 100644 --- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/continuous_crawler.py +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/continuous_crawler.py @@ -172,14 +172,14 @@ def continuous_crawler( ) last_heartbeat_time = datetime.utcnow() blocks_cache: Dict[int, int] = {} - + current_sleep_time = min_sleep_time failed_count = 0 try: while True: try: # query db with limit 1, to avoid session closing db_session.execute("SELECT 1") - time.sleep(min_sleep_time) + time.sleep(current_sleep_time) end_block = min( web3.eth.blockNumber - confirmations, @@ -187,12 +187,12 @@ def continuous_crawler( ) if start_block + min_blocks_batch > end_block: - min_sleep_time += 0.1 + current_sleep_time += 0.1 logger.info( - f"Sleeping for {min_sleep_time} seconds because of low block count" + f"Sleeping for {current_sleep_time} seconds because of low block count" ) continue - min_sleep_time = max(0, min_sleep_time - 0.1) + current_sleep_time = max(min_sleep_time, current_sleep_time - 0.1) logger.info(f"Crawling events from {start_block} to {end_block}") all_events = _crawl_events( diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py index 06fd5b3a..aee81838 100644 --- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/crawler.py @@ -206,18 +206,26 @@ def make_function_call_crawl_jobs( """ crawl_job_by_address: Dict[str, FunctionCallCrawlJob] = {} + method_signature_by_address: Dict[str, List[str]] = {} for entry in entries: contract_address = Web3().toChecksumAddress(_get_tag(entry, "address")) - abi = cast(str, entry.content) + abi = json.loads(cast(str, entry.content)) + method_signature = encode_function_signature(abi) + if contract_address not in crawl_job_by_address: crawl_job_by_address[contract_address] = FunctionCallCrawlJob( - contract_abi=[json.loads(abi)], + contract_abi=[abi], contract_address=contract_address, created_at=int(datetime.fromisoformat(entry.created_at).timestamp()), ) + method_signature_by_address[contract_address] = [method_signature] + else: - crawl_job_by_address[contract_address].contract_abi.append(json.loads(abi)) + + if method_signature not in method_signature_by_address[contract_address]: + crawl_job_by_address[contract_address].contract_abi.append(abi) + method_signature_by_address[contract_address].append(method_signature) return [crawl_job for crawl_job in crawl_job_by_address.values()] diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/function_call_crawler.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/function_call_crawler.py index a98e057f..ebcd3ae7 100644 --- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/function_call_crawler.py +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/function_call_crawler.py @@ -91,6 +91,3 @@ def function_call_crawler( i, min(i + batch_size - 1, end_block), ) - logger.info(f"Crawled {len(crawled_functions)} functions") - for function_call in crawled_functions: - print(function_call) diff --git a/crawlers/mooncrawl/mooncrawl/settings.py b/crawlers/mooncrawl/mooncrawl/settings.py index 13494c55..c282140b 100644 --- a/crawlers/mooncrawl/mooncrawl/settings.py +++ b/crawlers/mooncrawl/mooncrawl/settings.py @@ -28,7 +28,7 @@ DOCS_TARGET_PATH = "docs" # Crawler label -CRAWLER_LABEL = "moonworm-alpha" +CRAWLER_LABEL = "test-moonworm-alpha" # Geth connection address MOONSTREAM_ETHEREUM_WEB3_PROVIDER_URI = os.environ.get(