kopia lustrzana https://github.com/bugout-dev/moonstream
193 wiersze
5.7 KiB
Python
193 wiersze
5.7 KiB
Python
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
from moonstreamdb.blockchain import AvailableBlockchainType, get_block_model
|
|
from moonworm.crawler.log_scanner import (
|
|
_crawl_events as moonworm_autoscale_crawl_events, # type: ignore
|
|
)
|
|
from moonworm.crawler.log_scanner import _fetch_events_chunk
|
|
from sqlalchemy.orm.session import Session
|
|
from sqlalchemy.sql.expression import and_
|
|
from web3 import Web3
|
|
|
|
from .crawler import EventCrawlJob
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class Event:
|
|
event_name: str
|
|
args: Dict[str, Any]
|
|
address: str
|
|
block_number: int
|
|
block_timestamp: int
|
|
transaction_hash: str
|
|
log_index: int
|
|
|
|
|
|
def _get_block_timestamp_from_web3(
|
|
web3: Web3,
|
|
block_number: int,
|
|
) -> int:
|
|
"""
|
|
Gets the timestamp of a block from the blockchain.
|
|
will raise an exception if the block is not found.
|
|
"""
|
|
return web3.eth.getBlock(block_number).timestamp
|
|
|
|
|
|
# I am using blocks_cache as the argument, to reuse this function in tx_call crawler
|
|
# and support one cashe for both tx_call and event_crawler
|
|
def get_block_timestamp(
|
|
db_session: Session,
|
|
web3: Web3,
|
|
blockchain_type: AvailableBlockchainType,
|
|
block_number: int,
|
|
blocks_cache: Dict[int, int],
|
|
max_blocks_batch: int = 30,
|
|
) -> int:
|
|
"""
|
|
Get the timestamp of a block.
|
|
First tries to get the block from the cache,
|
|
then tries to get the block from the db,
|
|
then tries to get it from the blockchain.
|
|
|
|
After the call cache is updated.
|
|
If the cache grows too large, it is cleared.
|
|
|
|
:param block_number: The block number.
|
|
:param max_blocks_batch: The maximum number of blocks to fetch in a single batch from db query.
|
|
:param blocks_cache: The cache of blocks.
|
|
:return: The timestamp of the block.
|
|
"""
|
|
assert max_blocks_batch > 0
|
|
|
|
if block_number in blocks_cache:
|
|
return blocks_cache[block_number]
|
|
|
|
block_model = get_block_model(blockchain_type)
|
|
|
|
blocks = (
|
|
db_session.query(block_model.block_number, block_model.timestamp)
|
|
.filter(
|
|
and_(
|
|
block_model.block_number >= block_number - max_blocks_batch - 1,
|
|
block_model.block_number <= block_number + max_blocks_batch + 1,
|
|
)
|
|
)
|
|
.order_by(block_model.block_number.asc())
|
|
.all()
|
|
)
|
|
|
|
target_block_timestamp: Optional[int] = None
|
|
if blocks and blocks[0].block_number == block_number:
|
|
target_block_timestamp = blocks[0].timestamp
|
|
|
|
if target_block_timestamp is None:
|
|
target_block_timestamp = _get_block_timestamp_from_web3(web3, block_number)
|
|
|
|
if len(blocks_cache) > (max_blocks_batch * 3 + 2):
|
|
blocks_cache.clear()
|
|
|
|
blocks_cache[block_number] = target_block_timestamp
|
|
for block in blocks:
|
|
blocks_cache[block.block_number] = block.timestamp
|
|
|
|
return target_block_timestamp
|
|
|
|
|
|
def _crawl_events(
|
|
db_session: Session,
|
|
blockchain_type: AvailableBlockchainType,
|
|
web3: Web3,
|
|
jobs: List[EventCrawlJob],
|
|
from_block: int,
|
|
to_block: int,
|
|
blocks_cache: Dict[int, int] = {},
|
|
db_block_query_batch=10,
|
|
) -> List[Event]:
|
|
all_events = []
|
|
for job in jobs:
|
|
raw_events = _fetch_events_chunk(
|
|
web3,
|
|
job.event_abi,
|
|
from_block,
|
|
to_block,
|
|
job.contracts,
|
|
on_decode_error=lambda e: print(
|
|
f"Error decoding event: {e}"
|
|
), # TODO report via humbug
|
|
)
|
|
for raw_event in raw_events:
|
|
raw_event["blockTimestamp"] = get_block_timestamp(
|
|
db_session,
|
|
web3,
|
|
blockchain_type,
|
|
raw_event["blockNumber"],
|
|
blocks_cache,
|
|
db_block_query_batch,
|
|
)
|
|
event = Event(
|
|
event_name=raw_event["event"],
|
|
args=raw_event["args"],
|
|
address=raw_event["address"],
|
|
block_number=raw_event["blockNumber"],
|
|
block_timestamp=raw_event["blockTimestamp"],
|
|
transaction_hash=raw_event["transactionHash"],
|
|
log_index=raw_event["logIndex"],
|
|
)
|
|
all_events.append(event)
|
|
|
|
return all_events
|
|
|
|
|
|
def _autoscale_crawl_events(
|
|
db_session: Session,
|
|
blockchain_type: AvailableBlockchainType,
|
|
web3: Web3,
|
|
jobs: List[EventCrawlJob],
|
|
from_block: int,
|
|
to_block: int,
|
|
blocks_cache: Dict[int, int] = {},
|
|
batch_size: int = 1000,
|
|
db_block_query_batch=10,
|
|
) -> Tuple[List[Event], int]:
|
|
"""
|
|
Crawl events with auto regulated batch_size.
|
|
"""
|
|
all_events = []
|
|
for job in jobs:
|
|
raw_events, batch_size = moonworm_autoscale_crawl_events(
|
|
web3=web3,
|
|
event_abi=job.event_abi,
|
|
from_block=from_block,
|
|
to_block=to_block,
|
|
batch_size=batch_size,
|
|
contract_address=job.contracts[0],
|
|
max_blocks_batch=3000,
|
|
)
|
|
for raw_event in raw_events:
|
|
raw_event["blockTimestamp"] = get_block_timestamp(
|
|
db_session,
|
|
web3,
|
|
blockchain_type,
|
|
raw_event["blockNumber"],
|
|
blocks_cache,
|
|
db_block_query_batch,
|
|
)
|
|
event = Event(
|
|
event_name=raw_event["event"],
|
|
args=raw_event["args"],
|
|
address=raw_event["address"],
|
|
block_number=raw_event["blockNumber"],
|
|
block_timestamp=raw_event["blockTimestamp"],
|
|
transaction_hash=raw_event["transactionHash"],
|
|
log_index=raw_event["logIndex"],
|
|
)
|
|
all_events.append(event)
|
|
|
|
return all_events, batch_size
|