2022-06-16 12:53:19 +00:00
|
|
|
import logging
|
|
|
|
import time
|
2023-08-29 17:33:38 +00:00
|
|
|
from typing import Dict, List, Optional
|
2022-06-16 12:53:19 +00:00
|
|
|
from uuid import UUID
|
|
|
|
|
2023-05-23 10:56:38 +00:00
|
|
|
from eth_typing.evm import ChecksumAddress
|
2022-08-10 16:55:49 +00:00
|
|
|
from moonstreamdb.blockchain import AvailableBlockchainType
|
2024-01-31 10:09:04 +00:00
|
|
|
from moonstreamdb.networks import Network # type: ignore
|
2022-06-16 12:53:19 +00:00
|
|
|
from moonworm.crawler.moonstream_ethereum_state_provider import ( # type: ignore
|
|
|
|
MoonstreamEthereumStateProvider,
|
|
|
|
)
|
|
|
|
from sqlalchemy.orm.session import Session
|
|
|
|
from web3 import Web3
|
|
|
|
|
2023-05-23 10:56:38 +00:00
|
|
|
from .crawler import (
|
|
|
|
EventCrawlJob,
|
|
|
|
FunctionCallCrawlJob,
|
|
|
|
_retry_connect_web3,
|
2023-05-25 13:06:33 +00:00
|
|
|
update_entries_status_and_progress,
|
2023-05-23 10:56:38 +00:00
|
|
|
)
|
2022-06-16 12:53:19 +00:00
|
|
|
from .db import add_events_to_session, add_function_calls_to_session, commit_session
|
2024-01-31 10:09:04 +00:00
|
|
|
from .event_crawler import _autoscale_crawl_events, _crawl_events
|
2022-06-16 12:53:19 +00:00
|
|
|
from .function_call_crawler import _crawl_functions
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
def historical_crawler(
|
|
|
|
db_session: Session,
|
|
|
|
blockchain_type: AvailableBlockchainType,
|
|
|
|
web3: Optional[Web3],
|
|
|
|
event_crawl_jobs: List[EventCrawlJob],
|
|
|
|
function_call_crawl_jobs: List[FunctionCallCrawlJob],
|
|
|
|
start_block: int,
|
|
|
|
end_block: int,
|
|
|
|
max_blocks_batch: int = 100,
|
|
|
|
min_sleep_time: float = 0.1,
|
|
|
|
access_id: Optional[UUID] = None,
|
2023-05-23 10:56:38 +00:00
|
|
|
addresses_deployment_blocks: Optional[Dict[ChecksumAddress, int]] = None,
|
2024-03-25 11:27:56 +00:00
|
|
|
max_insert_batch: int = 10000,
|
2022-06-16 12:53:19 +00:00
|
|
|
):
|
|
|
|
assert max_blocks_batch > 0, "max_blocks_batch must be greater than 0"
|
|
|
|
assert min_sleep_time > 0, "min_sleep_time must be greater than 0"
|
|
|
|
assert start_block >= end_block, "start_block must be greater than end_block"
|
|
|
|
assert end_block > 0, "end_block must be greater than 0"
|
|
|
|
|
|
|
|
if web3 is None:
|
|
|
|
web3 = _retry_connect_web3(blockchain_type, access_id=access_id)
|
|
|
|
|
|
|
|
assert (
|
|
|
|
web3.eth.block_number >= start_block
|
|
|
|
), "start_block must be less than current block"
|
|
|
|
|
2023-08-29 17:33:38 +00:00
|
|
|
if blockchain_type == AvailableBlockchainType.ETHEREUM:
|
|
|
|
network = Network.ethereum
|
|
|
|
elif blockchain_type == AvailableBlockchainType.POLYGON:
|
|
|
|
network = Network.polygon
|
|
|
|
elif blockchain_type == AvailableBlockchainType.MUMBAI:
|
|
|
|
network = Network.mumbai
|
|
|
|
elif blockchain_type == AvailableBlockchainType.XDAI:
|
|
|
|
network = Network.xdai
|
|
|
|
elif blockchain_type == AvailableBlockchainType.WYRM:
|
|
|
|
network = Network.wyrm
|
|
|
|
elif blockchain_type == AvailableBlockchainType.ZKSYNC_ERA_TESTNET:
|
|
|
|
network = Network.zksync_era_testnet
|
|
|
|
elif blockchain_type == AvailableBlockchainType.ZKSYNC_ERA:
|
|
|
|
network = Network.zksync_era
|
2024-01-31 10:09:04 +00:00
|
|
|
elif blockchain_type == AvailableBlockchainType.ARBITRUM_NOVA:
|
|
|
|
network = Network.arbitrum_nova
|
2024-02-20 12:57:29 +00:00
|
|
|
elif blockchain_type == AvailableBlockchainType.ARBITRUM_SEPOLIA:
|
|
|
|
network = Network.arbitrum_sepolia
|
2024-02-21 01:27:06 +00:00
|
|
|
elif blockchain_type == AvailableBlockchainType.XAI:
|
|
|
|
network = Network.xai
|
2023-08-29 17:33:38 +00:00
|
|
|
else:
|
|
|
|
raise Exception("Unsupported blockchain type provided")
|
|
|
|
|
2022-06-16 12:53:19 +00:00
|
|
|
ethereum_state_provider = MoonstreamEthereumStateProvider(
|
|
|
|
web3,
|
|
|
|
network,
|
|
|
|
db_session,
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.info(f"Starting historical event crawler start_block={start_block}")
|
|
|
|
|
|
|
|
blocks_cache: Dict[int, int] = {}
|
|
|
|
failed_count = 0
|
|
|
|
|
2023-05-23 10:56:38 +00:00
|
|
|
original_start_block = start_block
|
|
|
|
|
|
|
|
progess_map: Dict[ChecksumAddress, float] = {}
|
|
|
|
|
2022-06-16 12:53:19 +00:00
|
|
|
while start_block >= end_block:
|
|
|
|
try:
|
|
|
|
time.sleep(min_sleep_time)
|
|
|
|
|
|
|
|
batch_end_block = max(
|
|
|
|
start_block - max_blocks_batch,
|
|
|
|
end_block,
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.info(f"Crawling events from {start_block} to {batch_end_block}")
|
2022-09-21 15:38:05 +00:00
|
|
|
|
|
|
|
if function_call_crawl_jobs:
|
|
|
|
all_events = _crawl_events(
|
|
|
|
db_session=db_session,
|
|
|
|
blockchain_type=blockchain_type,
|
|
|
|
web3=web3,
|
|
|
|
jobs=event_crawl_jobs,
|
|
|
|
from_block=batch_end_block,
|
|
|
|
to_block=start_block,
|
|
|
|
blocks_cache=blocks_cache,
|
|
|
|
db_block_query_batch=max_blocks_batch,
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
all_events, max_blocks_batch = _autoscale_crawl_events(
|
|
|
|
db_session=db_session,
|
|
|
|
blockchain_type=blockchain_type,
|
|
|
|
web3=web3,
|
|
|
|
jobs=event_crawl_jobs,
|
|
|
|
from_block=batch_end_block,
|
|
|
|
to_block=start_block,
|
|
|
|
blocks_cache=blocks_cache,
|
|
|
|
db_block_query_batch=max_blocks_batch,
|
|
|
|
)
|
2022-06-16 12:53:19 +00:00
|
|
|
logger.info(
|
|
|
|
f"Crawled {len(all_events)} events from {start_block} to {batch_end_block}."
|
|
|
|
)
|
|
|
|
|
2024-03-25 11:27:56 +00:00
|
|
|
if len(all_events) > max_insert_batch:
|
|
|
|
|
|
|
|
for i in range(0, len(all_events), max_insert_batch):
|
|
|
|
add_events_to_session(
|
|
|
|
db_session,
|
|
|
|
all_events[i : i + max_insert_batch],
|
|
|
|
blockchain_type,
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
add_events_to_session(db_session, all_events, blockchain_type)
|
2022-06-16 12:53:19 +00:00
|
|
|
|
2022-06-21 11:31:26 +00:00
|
|
|
if function_call_crawl_jobs:
|
2022-10-03 14:26:58 +00:00
|
|
|
logger.info(
|
|
|
|
f"Crawling function calls from {start_block} to {batch_end_block}"
|
|
|
|
)
|
2022-06-21 11:31:26 +00:00
|
|
|
all_function_calls = _crawl_functions(
|
|
|
|
blockchain_type,
|
|
|
|
ethereum_state_provider,
|
|
|
|
function_call_crawl_jobs,
|
|
|
|
batch_end_block,
|
|
|
|
start_block,
|
|
|
|
)
|
|
|
|
logger.info(
|
|
|
|
f"Crawled {len(all_function_calls)} function calls from {start_block} to {batch_end_block}."
|
|
|
|
)
|
|
|
|
|
2024-03-25 11:27:56 +00:00
|
|
|
if len(all_function_calls) > max_insert_batch:
|
|
|
|
|
|
|
|
for i in range(0, len(all_function_calls), max_insert_batch):
|
|
|
|
add_function_calls_to_session(
|
|
|
|
db_session,
|
|
|
|
all_function_calls[i : i + max_insert_batch],
|
|
|
|
blockchain_type,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
|
|
|
|
add_function_calls_to_session(
|
|
|
|
db_session, all_function_calls, blockchain_type
|
|
|
|
)
|
2022-06-16 12:53:19 +00:00
|
|
|
|
2023-05-23 10:56:38 +00:00
|
|
|
if addresses_deployment_blocks:
|
|
|
|
for address, deployment_block in addresses_deployment_blocks.items():
|
2023-05-25 13:06:33 +00:00
|
|
|
current_position = batch_end_block
|
2023-05-23 10:56:38 +00:00
|
|
|
|
2023-05-25 13:06:33 +00:00
|
|
|
progess = (original_start_block - current_position) / (
|
2023-05-23 10:56:38 +00:00
|
|
|
original_start_block - deployment_block
|
|
|
|
)
|
|
|
|
progess_map[address] = progess
|
|
|
|
|
|
|
|
if len(function_call_crawl_jobs) > 0:
|
2023-05-25 13:06:33 +00:00
|
|
|
function_call_crawl_jobs = update_entries_status_and_progress( # type: ignore
|
2023-05-23 10:56:38 +00:00
|
|
|
events=function_call_crawl_jobs,
|
|
|
|
progess_map=progess_map,
|
|
|
|
)
|
|
|
|
|
|
|
|
if len(event_crawl_jobs) > 0:
|
2023-05-25 13:06:33 +00:00
|
|
|
event_crawl_jobs = update_entries_status_and_progress( # type: ignore
|
2023-05-23 10:56:38 +00:00
|
|
|
events=event_crawl_jobs,
|
|
|
|
progess_map=progess_map,
|
|
|
|
)
|
|
|
|
|
2022-06-16 12:53:19 +00:00
|
|
|
# Commiting to db
|
|
|
|
commit_session(db_session)
|
|
|
|
|
|
|
|
start_block = batch_end_block - 1
|
|
|
|
failed_count = 0
|
|
|
|
except Exception as e:
|
2022-10-05 11:26:56 +00:00
|
|
|
db_session.rollback()
|
2022-06-16 12:53:19 +00:00
|
|
|
logger.error(f"Internal error: {e}")
|
|
|
|
logger.exception(e)
|
|
|
|
failed_count += 1
|
|
|
|
if failed_count > 10:
|
|
|
|
logger.error("Too many failures, exiting")
|
|
|
|
raise e
|
|
|
|
try:
|
|
|
|
web3 = _retry_connect_web3(blockchain_type, access_id=access_id)
|
|
|
|
except Exception as err:
|
|
|
|
logger.error(f"Failed to reconnect: {err}")
|
|
|
|
logger.exception(err)
|
|
|
|
raise err
|