moonstream/crawlers/mooncrawl/mooncrawl/moonworm_crawler/event_crawler.py

193 wiersze
5.7 KiB
Python

import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
from moonstreamdb.blockchain import AvailableBlockchainType, get_block_model
from moonworm.crawler.log_scanner import (
_crawl_events as moonworm_autoscale_crawl_events, # type: ignore
)
from moonworm.crawler.log_scanner import _fetch_events_chunk
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import and_
from web3 import Web3
from .crawler import EventCrawlJob
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class Event:
event_name: str
args: Dict[str, Any]
address: str
block_number: int
block_timestamp: int
transaction_hash: str
log_index: int
def _get_block_timestamp_from_web3(
web3: Web3,
block_number: int,
) -> int:
"""
Gets the timestamp of a block from the blockchain.
will raise an exception if the block is not found.
"""
return web3.eth.getBlock(block_number).timestamp
# I am using blocks_cache as the argument, to reuse this function in tx_call crawler
# and support one cashe for both tx_call and event_crawler
def get_block_timestamp(
db_session: Session,
web3: Web3,
blockchain_type: AvailableBlockchainType,
block_number: int,
blocks_cache: Dict[int, int],
max_blocks_batch: int = 30,
) -> int:
"""
Get the timestamp of a block.
First tries to get the block from the cache,
then tries to get the block from the db,
then tries to get it from the blockchain.
After the call cache is updated.
If the cache grows too large, it is cleared.
:param block_number: The block number.
:param max_blocks_batch: The maximum number of blocks to fetch in a single batch from db query.
:param blocks_cache: The cache of blocks.
:return: The timestamp of the block.
"""
assert max_blocks_batch > 0
if block_number in blocks_cache:
return blocks_cache[block_number]
block_model = get_block_model(blockchain_type)
blocks = (
db_session.query(block_model.block_number, block_model.timestamp)
.filter(
and_(
block_model.block_number >= block_number - max_blocks_batch - 1,
block_model.block_number <= block_number + max_blocks_batch + 1,
)
)
.order_by(block_model.block_number.asc())
.all()
)
target_block_timestamp: Optional[int] = None
if blocks and blocks[0].block_number == block_number:
target_block_timestamp = blocks[0].timestamp
if target_block_timestamp is None:
target_block_timestamp = _get_block_timestamp_from_web3(web3, block_number)
if len(blocks_cache) > (max_blocks_batch * 3 + 2):
blocks_cache.clear()
blocks_cache[block_number] = target_block_timestamp
for block in blocks:
blocks_cache[block.block_number] = block.timestamp
return target_block_timestamp
def _crawl_events(
db_session: Session,
blockchain_type: AvailableBlockchainType,
web3: Web3,
jobs: List[EventCrawlJob],
from_block: int,
to_block: int,
blocks_cache: Dict[int, int] = {},
db_block_query_batch=10,
) -> List[Event]:
all_events = []
for job in jobs:
raw_events = _fetch_events_chunk(
web3,
job.event_abi,
from_block,
to_block,
job.contracts,
on_decode_error=lambda e: print(
f"Error decoding event: {e}"
), # TODO report via humbug
)
for raw_event in raw_events:
raw_event["blockTimestamp"] = get_block_timestamp(
db_session,
web3,
blockchain_type,
raw_event["blockNumber"],
blocks_cache,
db_block_query_batch,
)
event = Event(
event_name=raw_event["event"],
args=raw_event["args"],
address=raw_event["address"],
block_number=raw_event["blockNumber"],
block_timestamp=raw_event["blockTimestamp"],
transaction_hash=raw_event["transactionHash"],
log_index=raw_event["logIndex"],
)
all_events.append(event)
return all_events
def _autoscale_crawl_events(
db_session: Session,
blockchain_type: AvailableBlockchainType,
web3: Web3,
jobs: List[EventCrawlJob],
from_block: int,
to_block: int,
blocks_cache: Dict[int, int] = {},
batch_size: int = 1000,
db_block_query_batch=10,
) -> Tuple[List[Event], int]:
"""
Crawl events with auto regulated batch_size.
"""
all_events = []
for job in jobs:
raw_events, batch_size = moonworm_autoscale_crawl_events(
web3=web3,
event_abi=job.event_abi,
from_block=from_block,
to_block=to_block,
batch_size=batch_size,
contract_address=job.contracts[0],
max_blocks_batch=3000,
)
for raw_event in raw_events:
raw_event["blockTimestamp"] = get_block_timestamp(
db_session,
web3,
blockchain_type,
raw_event["blockNumber"],
blocks_cache,
db_block_query_batch,
)
event = Event(
event_name=raw_event["event"],
args=raw_event["args"],
address=raw_event["address"],
block_number=raw_event["blockNumber"],
block_timestamp=raw_event["blockTimestamp"],
transaction_hash=raw_event["transactionHash"],
log_index=raw_event["logIndex"],
)
all_events.append(event)
return all_events, batch_size