kopia lustrzana https://github.com/bugout-dev/moonstream
dataset generator for contract deployments
rodzic
5610deb367
commit
d832e95f41
|
@ -0,0 +1,81 @@
|
|||
import argparse
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
from shutil import copyfile
|
||||
from typing import Optional
|
||||
|
||||
|
||||
from moonstreamdb.db import yield_db_session_ctx
|
||||
|
||||
from .materialize import add_contract_deployments
|
||||
from .datastore import setup_database
|
||||
from .data import BlockBounds
|
||||
|
||||
|
||||
def handle_initdb(args: argparse.Namespace) -> None:
|
||||
with contextlib.closing(sqlite3.connect(args.datastore)) as conn:
|
||||
setup_database(conn)
|
||||
|
||||
|
||||
def handle_materialize(args: argparse.Namespace) -> None:
|
||||
bounds: Optional[BlockBounds] = None
|
||||
if args.start is not None:
|
||||
bounds = BlockBounds(starting_block=args.start, ending_block=args.end)
|
||||
elif args.end is not None:
|
||||
raise ValueError("You cannot set --end unless you also set --start")
|
||||
with yield_db_session_ctx() as db_session, contextlib.closing(
|
||||
sqlite3.connect(args.datastore)
|
||||
) as datastore:
|
||||
add_contract_deployments(
|
||||
db_session, datastore, batch_size=args.batch_size, bounds=bounds
|
||||
)
|
||||
|
||||
|
||||
def generate_arg_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate a database of contracts deployed on Ethereum."
|
||||
)
|
||||
subcommands = parser.add_subparsers(dest="subcommand", title="subcommands")
|
||||
|
||||
parser_initdb = subcommands.add_parser(
|
||||
"initdb",
|
||||
description="Initialize an SQLite datastore for contract deployments",
|
||||
)
|
||||
parser_initdb.add_argument(
|
||||
"-d",
|
||||
"--datastore",
|
||||
required=True,
|
||||
help="Path to SQLite database representing the dataset",
|
||||
)
|
||||
parser_initdb.set_defaults(func=handle_initdb)
|
||||
|
||||
parser_materialize = subcommands.add_parser(
|
||||
"materialize",
|
||||
description="Materialize the contract deployments database",
|
||||
)
|
||||
|
||||
parser_materialize.add_argument(
|
||||
"-d",
|
||||
"--datastore",
|
||||
required=True,
|
||||
help="Path to SQLite database representing the dataset",
|
||||
)
|
||||
|
||||
parser_materialize.add_argument(
|
||||
"--start", type=int, default=None, help="Starting block number"
|
||||
)
|
||||
parser_materialize.add_argument(
|
||||
"--end", type=int, default=None, help="Ending block number"
|
||||
)
|
||||
parser_materialize.add_argument(
|
||||
"-n",
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Number of events to process per batch",
|
||||
)
|
||||
parser_materialize.set_defaults(func=handle_materialize)
|
||||
|
||||
return parser
|
|
@ -0,0 +1,20 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContractDeployment:
|
||||
address: str
|
||||
block_number: int
|
||||
transaction_hash: str
|
||||
deployer_address: str
|
||||
block_timestamp: int
|
||||
gas_used: int
|
||||
gas_price: int
|
||||
transaction_fee: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class BlockBounds:
|
||||
starting_block: int
|
||||
ending_block: Optional[int] = None
|
|
@ -0,0 +1,90 @@
|
|||
import logging
|
||||
import sqlite3
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
from .data import ContractDeployment
|
||||
|
||||
|
||||
CREATE_CONTRACT_DEPLOYMENTS_TABLE_QUERY = """
|
||||
CREATE TABLE IF NOT EXISTS contract_deployments (
|
||||
id INTEGER PRIMARY KEY,
|
||||
transaction_hash TEXT NOT NULL,
|
||||
block_number INTEGER NOT NULL,
|
||||
timestamp INTEGER NOT NULL,
|
||||
contract_address TEXT NOT NULL,
|
||||
deployer_address TEXT NOT NULL,
|
||||
gas_used INTEGER NOT NULL,
|
||||
gas_price INTEGER NOT NULL,
|
||||
transaction_fee INTEGER NOT NULL,
|
||||
)
|
||||
"""
|
||||
|
||||
CREATE_CHECKPOINT_TABLE_QUERY = """CREATE TABLE IF NOT EXISTS checkpoint
|
||||
(
|
||||
label STRING,
|
||||
offset INTEGER
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
def setup_database(conn: sqlite3.Connection):
|
||||
"""
|
||||
Create the database tables if they don't exist.
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
cur.execute(CREATE_CONTRACT_DEPLOYMENTS_TABLE_QUERY)
|
||||
cur.execute(CREATE_CHECKPOINT_TABLE_QUERY)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_contract_deployments(
|
||||
conn: sqlite3.Connection, contract_deployments: List[ContractDeployment]
|
||||
):
|
||||
"""
|
||||
Insert a list of contract deployments into the database.
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
for contract_deployment in contract_deployments:
|
||||
cur.execute(
|
||||
"INSERT INTO contract_deployments (transaction_hash, block_number, timestamp, contract_address, deployer_address, gas_used, gas_price, transaction_fee) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
contract_deployment.transaction_hash,
|
||||
contract_deployment.block_number,
|
||||
contract_deployment.block_timestamp,
|
||||
contract_deployment.address,
|
||||
contract_deployment.deployer_address,
|
||||
contract_deployment.gas_used,
|
||||
contract_deployment.gas_price,
|
||||
contract_deployment.transaction_fee,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def load_checkpoint(conn: sqlite3.Connection, label: str) -> int:
|
||||
"""
|
||||
Load the checkpoint with the given label.
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"SELECT offset FROM checkpoint WHERE label = ?",
|
||||
(label,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
if row is None:
|
||||
return 0
|
||||
else:
|
||||
return row[0]
|
||||
|
||||
|
||||
def save_checkpoint(conn: sqlite3.Connection, label: str, offset: int):
|
||||
"""
|
||||
Save the checkpoint with the given label.
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"INSERT OR REPLACE INTO checkpoint (label, offset) VALUES (?, ?)",
|
||||
(label, offset),
|
||||
)
|
||||
conn.commit()
|
|
@ -0,0 +1,84 @@
|
|||
import logging
|
||||
import sqlite3
|
||||
from typing import Any, cast, Iterator, List, Optional, Set
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
from moonstreamdb.models import EthereumLabel
|
||||
from moonstreamdb.db import yield_db_session_ctx
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import or_, and_
|
||||
|
||||
from .datastore import load_checkpoint, save_checkpoint, insert_contract_deployments
|
||||
from .data import BlockBounds, ContractDeployment
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def add_contract_deployments(
|
||||
datastore_conn: sqlite3.Connection,
|
||||
db_session: Session,
|
||||
initial_offset=0,
|
||||
bounds: Optional[BlockBounds] = None,
|
||||
batch_size: int = 10,
|
||||
):
|
||||
"""
|
||||
Get all contract deployments in a given block bound and add to sqlite3 database
|
||||
"""
|
||||
|
||||
raw_created_at_list = (
|
||||
db_session.query(EthereumLabel.created_at)
|
||||
.filter(EthereumLabel.label == "contract_deployment")
|
||||
.order_by(EthereumLabel.created_at.asc())
|
||||
.distinct(EthereumLabel.created_at)
|
||||
).all()
|
||||
|
||||
created_at_list = [
|
||||
created_at[0] for created_at in raw_created_at_list[initial_offset:]
|
||||
]
|
||||
|
||||
query = db_session.query(EthereumLabel).filter(
|
||||
EthereumLabel.label == "contract_deployment"
|
||||
)
|
||||
|
||||
if bounds is not None:
|
||||
time_filters = [EthereumLabel.block_number >= bounds.starting_block]
|
||||
if bounds.ending_block is not None:
|
||||
time_filters.append(EthereumLabel.block_number <= bounds.ending_block)
|
||||
bounds_filters = [EthereumLabel.hash == None, and_(*time_filters)]
|
||||
|
||||
query = query.filter(or_(*bounds_filters))
|
||||
|
||||
pbar = tqdm(total=(len(raw_created_at_list)))
|
||||
pbar.set_description(f"Processing created ats")
|
||||
pbar.update(initial_offset)
|
||||
batch_start = 0
|
||||
batch_end = batch_start + batch_size
|
||||
while batch_start <= len(created_at_list):
|
||||
labels = query.filter(
|
||||
EthereumLabel.created_at.in_(created_at_list[batch_start : batch_end + 1])
|
||||
).all()
|
||||
if len(labels) == 0:
|
||||
continue
|
||||
contract_deployment_batch: List[ContractDeployment] = []
|
||||
for label in labels:
|
||||
contract_deployment_batch.append(
|
||||
ContractDeployment(
|
||||
address=label.address,
|
||||
transaction_hash=label.transaction_hash,
|
||||
block_number=label.block_number,
|
||||
block_timestamp=label.block_timestamp,
|
||||
deployer_address=label.label_data["deployer"],
|
||||
gas_used=label.label_data["gasUsed"],
|
||||
gas_price=label.label_data["gasPrice"],
|
||||
transaction_fee=label.label_data["transactionFee"],
|
||||
)
|
||||
)
|
||||
logger.info(f"Adding {len(contract_deployment_batch)} contract deployments")
|
||||
insert_contract_deployments(datastore_conn, contract_deployment_batch)
|
||||
pbar.update(batch_end - batch_start + 1)
|
||||
batch_start = batch_end + 1
|
||||
batch_end = min(batch_end + batch_size, len(created_at_list))
|
||||
logger.info("Finished adding contract deployments")
|
|
@ -0,0 +1 @@
|
|||
-e git+https://git@github.com/bugout-dev/moonstream.git@67fe019f1086c435dd3b58f1ade2778acc2167c7#egg=moonstreamdb&subdirectory=db
|
Ładowanie…
Reference in New Issue