added enrich command

pull/304/head
yhtiyar 2021-10-04 19:24:11 +03:00
rodzic f8fb803fd4
commit c543ab92f5
3 zmienionych plików z 159 dodań i 131 usunięć

Wyświetl plik

@ -7,10 +7,11 @@ from typing import Optional
from moonstreamdb.db import yield_db_session_ctx
from .data import event_types, nft_event, BlockBounds
from .data import EventType, event_types, nft_event, BlockBounds
from .datastore import setup_database, import_data
from .derive import current_owners
from .materialize import create_dataset, EthereumBatchloader
from .enrich import EthereumBatchloader, enrich
from .materialize import create_dataset
logging.basicConfig(level=logging.INFO)
@ -50,12 +51,33 @@ def handle_materialize(args: argparse.Namespace) -> None:
moonstream_datastore,
db_session,
event_type,
batch_loader,
bounds,
args.batch_size,
)
def handle_enrich(args: argparse.Namespace) -> None:
batch_loader = EthereumBatchloader(jsonrpc_url=args.jsonrpc)
logger.info(f"Enriching NFT events in datastore: {args.datastore}")
with contextlib.closing(sqlite3.connect(args.datastore)) as moonstream_datastore:
enrich(
moonstream_datastore,
EventType.TRANSFER,
batch_loader,
args.batch_size,
)
enrich(
moonstream_datastore,
EventType.MINT,
batch_loader,
args.batch_size,
)
def handle_derive(args: argparse.Namespace) -> None:
with contextlib.closing(sqlite3.connect(args.datastore)) as moonstream_datastore:
results = current_owners(moonstream_datastore)
@ -166,6 +188,30 @@ def main() -> None:
)
parser_import_data.set_defaults(func=handle_import_data)
parser_enrich = subcommands.add_parser(
"enrich", description="enrich dataset from geth node"
)
parser_enrich.add_argument(
"-d",
"--datastore",
required=True,
help="Path to SQLite database representing the dataset",
)
parser_enrich.add_argument(
"--jsonrpc",
default=default_web3_provider,
type=str,
help=f"Http uri provider to use when collecting data directly from the Ethereum blockchain (default: {default_web3_provider})",
)
parser_enrich.add_argument(
"-n",
"--batch-size",
type=int,
default=1000,
help="Number of events to process per batch",
)
parser_enrich.set_defaults(func=handle_enrich)
args = parser.parse_args()
args.func(args)

Wyświetl plik

@ -83,6 +83,116 @@ FROM {event_tables[event_type]};
return selection_query
def get_events_for_enrich(
conn: sqlite3.Connection, event_type: EventType
) -> List[NFTEvent]:
def select_query(event_type: EventType) -> str:
selection_query = f"""
SELECT
event_id,
transaction_hash,
block_number,
nft_address,
token_id,
from_address,
to_address,
transaction_value,
timestamp
FROM {event_tables[event_type]} WHERE block_number = 'None';
"""
return selection_query
logger.info(f"Loading {event_tables[event_type]} table events for enrich")
cur = conn.cursor()
cur.execute(select_query(event_type))
events: List[NFTEvent] = []
for row in cur:
(
event_id,
transaction_hash,
block_number,
nft_address,
token_id,
from_address,
to_address,
value,
timestamp,
) = cast(
Tuple[
str,
str,
Optional[int],
str,
str,
str,
str,
Optional[int],
Optional[int],
],
row,
)
event = NFTEvent(
event_id=event_id,
event_type=event_type, # Original argument to this function
nft_address=nft_address,
token_id=token_id,
from_address=from_address,
to_address=to_address,
transaction_hash=transaction_hash,
value=value,
block_number=block_number,
timestamp=timestamp,
)
events.append(event)
logger.info(f"Found {len(events)} events to enrich")
return events
def update_events_batch(conn: sqlite3.Connection, events: List[NFTEvent]) -> None:
def replace_query(event_type: EventType) -> str:
query = f"""
REPLACE INTO {event_tables[event_type]}(
event_id,
transaction_hash,
block_number,
nft_address,
token_id,
from_address,
to_address,
transaction_value,
timestamp
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
"""
return query
logger.info("Updating events in sqlite")
cur = conn.cursor()
try:
transfers = [
nft_event_to_tuple(event)
for event in events
if event.event_type == EventType.TRANSFER
]
mints = [
nft_event_to_tuple(event)
for event in events
if event.event_type == EventType.MINT
]
cur.executemany(replace_query(EventType.TRANSFER), transfers)
cur.executemany(replace_query(EventType.MINT), mints)
conn.commit()
except Exception as e:
logger.error(f"FAILED TO replace!!! :{events}")
conn.rollback()
raise e
def setup_database(conn: sqlite3.Connection) -> None:
"""
Sets up the schema of the Moonstream NFTs dataset in the given SQLite database.

Wyświetl plik

@ -1,4 +1,3 @@
from dataclasses import is_dataclass
import logging
import sqlite3
from typing import Any, cast, Iterator, List, Optional, Set
@ -28,135 +27,10 @@ logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class EthereumBatchloader:
def __init__(self, jsonrpc_url) -> None:
self.jsonrpc_url = jsonrpc_url
self.message_number = 0
self.commands: List[Any] = []
self.requests_banch: List[Any] = []
def load_blocks(self, block_list: List[int], with_transactions: bool):
"""
Request list of blocks
"""
rpc = [
{
"jsonrpc": "2.0",
"id": index,
"method": "eth_getBlockByNumber",
"params": params_single,
}
for index, params_single in enumerate(
[[hex(block_number), with_transactions] for block_number in block_list]
)
]
response = self.send_json_message(rpc)
return response
def load_transactions(self, transaction_hashes: List[str]):
"""
Request list of transactions
"""
rpc = [
{
"jsonrpc": "2.0",
"method": "eth_getTransactionByHash",
"id": index,
"params": [tx_hash],
}
for index, tx_hash in enumerate(transaction_hashes)
]
response = self.send_json_message(rpc)
return response
def send_message(self, payload):
headers = {"Content-Type": "application/json"}
try:
r = requests.post(
self.jsonrpc_url, headers=headers, data=payload, timeout=300
)
except Exception as e:
print(e)
return r
def send_json_message(self, message):
encoded_json = json.dumps(message)
raw_response = self.send_message(encoded_json.encode("utf8"))
response = raw_response.json()
return response
def enrich_from_web3(
nft_events: List[NFTEvent],
batch_loader: EthereumBatchloader,
bounds: Optional[BlockBounds] = None,
) -> List[NFTEvent]:
"""
Adds block number, value, timestamp from web3 if they are None (because that transaction is missing in db)
"""
transactions_to_query = set()
indices_to_update: List[int] = []
for index, nft_event in enumerate(nft_events):
if (
nft_event.block_number is None
or nft_event.value is None
or nft_event.timestamp is None
):
transactions_to_query.add(nft_event.transaction_hash)
indices_to_update.append(index)
if len(transactions_to_query) == 0:
return nft_events
logger.info("Calling JSON RPC API")
jsonrpc_transactions_response = batch_loader.load_transactions(
list(transactions_to_query)
)
transactions_map = {
result["result"]["hash"]: (
int(result["result"]["value"], 16),
int(result["result"]["blockNumber"], 16),
)
for result in jsonrpc_transactions_response
}
blocks_to_query: Set[int] = set()
for index in indices_to_update:
nft_events[index].value, nft_events[index].block_number = transactions_map[
nft_events[index].transaction_hash
]
blocks_to_query.add(cast(int, nft_events[index].block_number))
if len(blocks_to_query) == 0:
return nft_events
jsonrpc_blocks_response = batch_loader.load_blocks(list(blocks_to_query), False)
blocks_map = {
int(result["result"]["number"], 16): int(result["result"]["timestamp"], 16)
for result in jsonrpc_blocks_response
}
for index in indices_to_update:
nft_events[index].timestamp = blocks_map[cast(int, nft_event.block_number)]
def check_bounds(event: NFTEvent) -> bool:
if bounds is None:
return True
is_admissible = True
if event.block_number < bounds.starting_block:
is_admissible = False
if bounds.ending_block is not None and event.block_number > bounds.ending_block:
is_admissible = False
return is_admissible
admissible_events = [event for event in nft_events if check_bounds(event)]
return admissible_events
def add_events(
datastore_conn: sqlite3.Connection,
db_session: Session,
event_type: EventType,
batch_loader: EthereumBatchloader,
initial_offset=0,
bounds: Optional[BlockBounds] = None,
batch_size: int = 10,
@ -256,7 +130,6 @@ def create_dataset(
datastore_conn: sqlite3.Connection,
db_session: Session,
event_type: EventType,
batch_loader: EthereumBatchloader,
bounds: Optional[BlockBounds] = None,
batch_size: int = 10,
) -> None:
@ -277,7 +150,6 @@ def create_dataset(
datastore_conn,
db_session,
event_type,
batch_loader,
offset,
bounds,
batch_size,