kopia lustrzana https://github.com/bugout-dev/moonstream
435 wiersze
14 KiB
Python
435 wiersze
14 KiB
Python
import json
|
|
import logging
|
|
import time
|
|
from dataclasses import dataclass
|
|
from typing import Any, Dict, List, Optional, Set, Union
|
|
|
|
from eth_typing import ChecksumAddress
|
|
from hexbytes.main import HexBytes
|
|
from moonstreamdb.blockchain import (
|
|
AvailableBlockchainType,
|
|
get_label_model,
|
|
get_transaction_model,
|
|
)
|
|
from moonstreamdb.models import Base, EthereumTransaction, PolygonTransaction
|
|
from moonworm.crawler.function_call_crawler import ( # type: ignore
|
|
ContractFunctionCall,
|
|
utfy_dict,
|
|
)
|
|
from moonworm.crawler.log_scanner import _fetch_events_chunk # type: ignore
|
|
from sqlalchemy.orm.session import Session
|
|
from tqdm import tqdm
|
|
from web3 import Web3
|
|
from web3._utils.events import get_event_data
|
|
|
|
from ..moonworm_crawler.db import add_events_to_session, commit_session
|
|
from ..moonworm_crawler.event_crawler import Event, get_block_timestamp
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# TODO: ADD VALUE!!!
|
|
@dataclass
|
|
class ExtededFunctionCall(ContractFunctionCall):
|
|
gas_price: int
|
|
max_fee_per_gas: Optional[int] = None
|
|
max_priority_fee_per_gas: Optional[int] = None
|
|
value: int = 0
|
|
|
|
|
|
def _function_call_with_gas_price_to_label(
|
|
blockchain_type: AvailableBlockchainType,
|
|
function_call: ExtededFunctionCall,
|
|
label_name: str,
|
|
) -> Base:
|
|
"""
|
|
Creates a label model.
|
|
"""
|
|
label_model = get_label_model(blockchain_type)
|
|
label = label_model(
|
|
label=label_name,
|
|
label_data={
|
|
"type": "tx_call",
|
|
"name": function_call.function_name,
|
|
"caller": function_call.caller_address,
|
|
"args": function_call.function_args,
|
|
"status": function_call.status,
|
|
"gasUsed": function_call.gas_used,
|
|
"gasPrice": function_call.gas_price,
|
|
"maxFeePerGas": function_call.max_fee_per_gas,
|
|
"maxPriorityFeePerGas": function_call.max_priority_fee_per_gas,
|
|
"value": function_call.value,
|
|
},
|
|
address=function_call.contract_address,
|
|
block_number=function_call.block_number,
|
|
transaction_hash=function_call.transaction_hash,
|
|
block_timestamp=function_call.block_timestamp,
|
|
)
|
|
|
|
return label
|
|
|
|
|
|
def add_function_calls_with_gas_price_to_session(
|
|
db_session: Session,
|
|
function_calls: List[ExtededFunctionCall],
|
|
blockchain_type: AvailableBlockchainType,
|
|
label_name: str,
|
|
) -> None:
|
|
label_model = get_label_model(blockchain_type)
|
|
transactions_hashes_to_save = [
|
|
function_call.transaction_hash for function_call in function_calls
|
|
]
|
|
|
|
existing_labels = (
|
|
db_session.query(label_model.transaction_hash)
|
|
.filter(
|
|
label_model.label == label_name,
|
|
label_model.log_index == None,
|
|
label_model.transaction_hash.in_(transactions_hashes_to_save),
|
|
)
|
|
.all()
|
|
)
|
|
|
|
existing_labels_transactions = [label[0] for label in existing_labels]
|
|
|
|
labels_to_save = [
|
|
_function_call_with_gas_price_to_label(
|
|
blockchain_type, function_call, label_name
|
|
)
|
|
for function_call in function_calls
|
|
if function_call.transaction_hash not in existing_labels_transactions
|
|
]
|
|
|
|
logger.info(f"Saving {len(labels_to_save)} labels to session")
|
|
db_session.add_all(labels_to_save)
|
|
|
|
|
|
def _transform_to_w3_tx(
|
|
tx_raw: Union[EthereumTransaction, PolygonTransaction],
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Transform db transaction model to web3 transaction
|
|
"""
|
|
tx = {
|
|
"blockNumber": tx_raw.block_number,
|
|
"from": tx_raw.from_address,
|
|
"gas": tx_raw.gas,
|
|
"gasPrice": tx_raw.gas_price,
|
|
"hash": HexBytes(tx_raw.hash),
|
|
"input": tx_raw.input,
|
|
"maxFeePerGas": tx_raw.max_fee_per_gas,
|
|
"maxPriorityFeePerGas": tx_raw.max_priority_fee_per_gas,
|
|
"nonce": tx_raw.nonce,
|
|
"to": tx_raw.to_address,
|
|
"transactionIndex": tx_raw.transaction_index,
|
|
"value": tx_raw.value,
|
|
}
|
|
if tx["maxFeePerGas"] is not None:
|
|
tx["maxFeePerGas"] = int(tx["maxFeePerGas"])
|
|
if tx["maxPriorityFeePerGas"] is not None:
|
|
tx["maxPriorityFeePerGas"] = int(tx["maxPriorityFeePerGas"])
|
|
if tx["gasPrice"] is not None:
|
|
tx["gasPrice"] = int(tx["gasPrice"])
|
|
if tx["value"] is not None:
|
|
tx["value"] = int(tx["value"])
|
|
return tx
|
|
|
|
|
|
def process_transaction(
|
|
db_session: Session,
|
|
web3: Web3,
|
|
blockchain_type: AvailableBlockchainType,
|
|
contract: Any,
|
|
secondary_abi: List[Dict[str, Any]],
|
|
transaction: Dict[str, Any],
|
|
blocks_cache: Dict[int, int],
|
|
):
|
|
try:
|
|
raw_function_call = contract.decode_function_input(transaction["input"])
|
|
function_name = raw_function_call[0].fn_name
|
|
function_args = utfy_dict(raw_function_call[1])
|
|
except Exception as e:
|
|
# logger.error(f"Failed to decode transaction : {str(e)}")
|
|
selector = transaction["input"][:10]
|
|
function_name = selector
|
|
function_args = "unknown"
|
|
|
|
transaction_reciept = web3.eth.getTransactionReceipt(transaction["hash"])
|
|
block_timestamp = get_block_timestamp(
|
|
db_session,
|
|
web3,
|
|
blockchain_type,
|
|
transaction["blockNumber"],
|
|
blocks_cache,
|
|
100,
|
|
)
|
|
|
|
function_call = ExtededFunctionCall(
|
|
block_number=transaction["blockNumber"],
|
|
block_timestamp=block_timestamp,
|
|
transaction_hash=transaction["hash"].hex(),
|
|
contract_address=transaction["to"],
|
|
caller_address=transaction["from"],
|
|
function_name=function_name,
|
|
function_args=function_args,
|
|
status=transaction_reciept["status"],
|
|
gas_used=transaction_reciept["gasUsed"],
|
|
gas_price=transaction["gasPrice"],
|
|
max_fee_per_gas=transaction.get(
|
|
"maxFeePerGas",
|
|
),
|
|
max_priority_fee_per_gas=transaction.get("maxPriorityFeePerGas"),
|
|
value=transaction["value"],
|
|
)
|
|
|
|
secondary_logs = []
|
|
for log in transaction_reciept["logs"]:
|
|
for abi in secondary_abi:
|
|
try:
|
|
raw_event = get_event_data(web3.codec, abi, log)
|
|
event = {
|
|
"event": raw_event["event"],
|
|
"args": json.loads(Web3.toJSON(utfy_dict(dict(raw_event["args"])))),
|
|
"address": raw_event["address"],
|
|
"blockNumber": raw_event["blockNumber"],
|
|
"transactionHash": raw_event["transactionHash"].hex(),
|
|
"logIndex": raw_event["logIndex"],
|
|
"blockTimestamp": block_timestamp,
|
|
}
|
|
processed_event = _processEvent(event)
|
|
secondary_logs.append(processed_event)
|
|
|
|
break
|
|
except:
|
|
pass
|
|
|
|
return function_call, secondary_logs
|
|
|
|
|
|
def _get_transactions(
|
|
db_session: Session,
|
|
web3: Web3,
|
|
blockchain_type: AvailableBlockchainType,
|
|
transaction_hashes: Set[str],
|
|
):
|
|
transaction_model = get_transaction_model(blockchain_type)
|
|
transactions = (
|
|
db_session.query(transaction_model)
|
|
.filter(transaction_model.hash.in_(transaction_hashes))
|
|
.all()
|
|
)
|
|
|
|
web3_transactions = [
|
|
_transform_to_w3_tx(transaction) for transaction in transactions
|
|
]
|
|
|
|
not_found_transaction_hashes = [
|
|
transaction_hash
|
|
for transaction_hash in transaction_hashes
|
|
if transaction_hash not in [transaction.hash for transaction in transactions]
|
|
]
|
|
|
|
for nf_transaction in not_found_transaction_hashes:
|
|
tx = web3.eth.getTransaction(nf_transaction)
|
|
|
|
web3_transactions.append(tx)
|
|
|
|
return web3_transactions
|
|
|
|
|
|
def _processEvent(raw_event: Dict[str, Any]):
|
|
event = Event(
|
|
event_name=raw_event["event"],
|
|
args=raw_event["args"],
|
|
address=raw_event["address"],
|
|
block_number=raw_event["blockNumber"],
|
|
block_timestamp=raw_event["blockTimestamp"],
|
|
transaction_hash=raw_event["transactionHash"],
|
|
log_index=raw_event["logIndex"],
|
|
)
|
|
return event
|
|
|
|
|
|
def populate_with_events(
|
|
db_session: Session,
|
|
web3: Web3,
|
|
blockchain_type: AvailableBlockchainType,
|
|
label_name: str,
|
|
populate_from_label: str,
|
|
abi: List[Dict[str, Any]],
|
|
from_block: int,
|
|
to_block: int,
|
|
batch_size: int = 100,
|
|
):
|
|
current_block = from_block
|
|
|
|
events_abi = [event for event in abi if event["type"] == "event"]
|
|
label_model = get_label_model(blockchain_type)
|
|
|
|
pbar = tqdm(total=(to_block - from_block + 1))
|
|
pbar.set_description(f"Populating events for blocks {from_block}-{to_block}")
|
|
|
|
while current_block <= to_block:
|
|
batch_end = min(current_block + batch_size, to_block)
|
|
events = []
|
|
logger.info("Fetching events")
|
|
txs = (
|
|
db_session.query(
|
|
label_model.transaction_hash,
|
|
label_model.block_number,
|
|
label_model.block_timestamp,
|
|
)
|
|
.filter(
|
|
label_model.label == populate_from_label,
|
|
label_model.block_number >= current_block,
|
|
label_model.block_number <= batch_end,
|
|
)
|
|
.distinct()
|
|
.all()
|
|
)
|
|
|
|
txs_to_populate = {tx[0] for tx in txs}
|
|
block_timestamps = {tx[1]: tx[2] for tx in txs}
|
|
|
|
logger.info(f"Theoretically {len(txs_to_populate)} transactions to populate")
|
|
if len(txs_to_populate) == 0:
|
|
pbar.update(batch_end - current_block + 1)
|
|
current_block = batch_end + 1
|
|
continue
|
|
|
|
for event_abi in events_abi:
|
|
raw_events = _fetch_events_chunk(
|
|
web3,
|
|
event_abi,
|
|
current_block,
|
|
batch_end,
|
|
)
|
|
logger.info(f"Fetched {len(raw_events)} events")
|
|
for raw_event in raw_events:
|
|
if raw_event["transactionHash"] not in txs_to_populate:
|
|
continue
|
|
raw_event["blockTimestamp"] = block_timestamps[raw_event["blockNumber"]]
|
|
event = _processEvent(raw_event)
|
|
events.append(event)
|
|
|
|
logger.info(f"Found {len(events)} events for populate")
|
|
add_events_to_session(db_session, events, blockchain_type, label_name)
|
|
commit_session(db_session)
|
|
pbar.update(batch_end - current_block + 1)
|
|
current_block = batch_end + 1
|
|
|
|
|
|
def crawl(
|
|
db_session: Session,
|
|
web3: Web3,
|
|
blockchain_type: AvailableBlockchainType,
|
|
label_name: str,
|
|
abi: Dict[str, Any],
|
|
secondary_abi: List[Dict[str, Any]],
|
|
from_block: int,
|
|
to_block: int,
|
|
crawl_transactions: bool = True,
|
|
addresses: Optional[List[ChecksumAddress]] = None,
|
|
batch_size: int = 100,
|
|
) -> None:
|
|
current_block = from_block
|
|
|
|
db_blocks_cache: Dict[int, int] = {}
|
|
contract = web3.eth.contract(abi=abi)
|
|
# TODO(yhtiyar): load checkpoint
|
|
events_abi = [item for item in abi if item["type"] == "event"] # type: ignore
|
|
|
|
pbar = tqdm(total=(to_block - from_block + 1))
|
|
pbar.set_description(f"Crawling blocks {from_block}-{to_block}")
|
|
|
|
while current_block <= to_block:
|
|
blockchain_block = web3.eth.block_number
|
|
if current_block > blockchain_block:
|
|
logger.info("Current block is greater than blockchain block, sleeping")
|
|
time.sleep(1)
|
|
continue
|
|
batch_end = min(current_block + batch_size, to_block)
|
|
logger.info(f"Crawling blocks {current_block}-{current_block + batch_size}")
|
|
events = []
|
|
logger.info("Fetching events")
|
|
for event_abi in events_abi:
|
|
raw_events = _fetch_events_chunk(
|
|
web3,
|
|
event_abi,
|
|
current_block,
|
|
batch_end,
|
|
addresses,
|
|
)
|
|
for raw_event in raw_events:
|
|
raw_event["blockTimestamp"] = get_block_timestamp(
|
|
db_session,
|
|
web3,
|
|
blockchain_type,
|
|
raw_event["blockNumber"],
|
|
blocks_cache=db_blocks_cache,
|
|
max_blocks_batch=1000,
|
|
)
|
|
event = _processEvent(raw_event)
|
|
events.append(event)
|
|
|
|
if crawl_transactions:
|
|
transaction_hashes = {event.transaction_hash for event in events}
|
|
logger.info(f"Fetched {len(events)} events")
|
|
logger.info(f"Fetching {len(transaction_hashes)} transactions")
|
|
|
|
transactions = _get_transactions(
|
|
db_session, web3, blockchain_type, transaction_hashes
|
|
)
|
|
logger.info(f"Fetched {len(transactions)} transactions")
|
|
|
|
function_calls = []
|
|
for tx in transactions:
|
|
processed_tx, secondary_logs = process_transaction(
|
|
db_session,
|
|
web3,
|
|
blockchain_type,
|
|
contract,
|
|
secondary_abi,
|
|
tx,
|
|
db_blocks_cache,
|
|
)
|
|
function_calls.append(processed_tx)
|
|
events.extend(secondary_logs)
|
|
add_function_calls_with_gas_price_to_session(
|
|
db_session,
|
|
function_calls,
|
|
blockchain_type,
|
|
label_name,
|
|
)
|
|
add_events_to_session(
|
|
db_session,
|
|
events,
|
|
blockchain_type,
|
|
label_name,
|
|
)
|
|
commit_session(db_session)
|
|
pbar.update(batch_end - current_block + 1)
|
|
current_block = batch_end + 1
|
|
|
|
|
|
def get_checkpoint(
|
|
db_session: Session,
|
|
blockchain_type: AvailableBlockchainType,
|
|
from_block: int,
|
|
to_block: int,
|
|
label_name: str,
|
|
) -> int:
|
|
label_model = get_label_model(blockchain_type)
|
|
last_labeled_block = (
|
|
db_session.query(label_model.block_number)
|
|
.filter(label_model.label == label_name)
|
|
.filter(label_model.block_number <= to_block)
|
|
.filter(label_model.block_number >= from_block)
|
|
.order_by(label_model.block_number.desc())
|
|
.first()
|
|
)
|
|
if last_labeled_block is None:
|
|
return from_block
|
|
return last_labeled_block[0] + 1
|