kopia lustrzana https://github.com/bugout-dev/moonstream
changed summary schema, now it requires start and end blocks
rodzic
5b6d06cf99
commit
a093946a7a
|
@ -3,13 +3,16 @@ A command line tool to crawl information about NFTs from various sources.
|
||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
import dateutil.parser
|
||||||
|
from dateutil.relativedelta import relativedelta
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, cast
|
from typing import Any, Dict, cast, Optional
|
||||||
|
|
||||||
|
|
||||||
import dateutil.parser
|
|
||||||
from moonstreamdb.db import yield_db_session_ctx
|
from moonstreamdb.db import yield_db_session_ctx
|
||||||
from moonstreamdb.models import EthereumBlock
|
from moonstreamdb.models import EthereumBlock
|
||||||
from sqlalchemy.orm.session import Session
|
from sqlalchemy.orm.session import Session
|
||||||
|
@ -21,6 +24,11 @@ from ..publish import publish_json
|
||||||
from ..settings import MOONSTREAM_IPC_PATH
|
from ..settings import MOONSTREAM_IPC_PATH
|
||||||
from ..version import MOONCRAWL_VERSION
|
from ..version import MOONCRAWL_VERSION
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
BLOCKS_PER_SUMMARY = 40
|
||||||
|
|
||||||
|
|
||||||
def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3:
|
def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3:
|
||||||
"""
|
"""
|
||||||
|
@ -47,35 +55,86 @@ def get_latest_block_from_db(db_session: Session):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_summary_block() -> Optional[int]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_nft_event_date() -> Optional[datetime]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def sync_labels(
|
||||||
|
db_session: Session, web3_client: Web3, start: Optional[int]
|
||||||
|
) -> EthereumBlock:
|
||||||
|
if start is None:
|
||||||
|
logger.info(
|
||||||
|
"Syncing start block is not given, getting it from latest nft label in db"
|
||||||
|
)
|
||||||
|
start_date = get_latest_nft_event_date()
|
||||||
|
if start_date is None:
|
||||||
|
logger.warning(
|
||||||
|
"Didn't find any nft labels in db, starting sync from 3 month before now"
|
||||||
|
)
|
||||||
|
time_now = datetime.now(timezone.utc)
|
||||||
|
start_date = time_now - relativedelta(months=3)
|
||||||
|
|
||||||
|
start = (
|
||||||
|
db_session.query(EthereumBlock)
|
||||||
|
.filter(EthereumBlock.timestamp >= start_date.timestamp())
|
||||||
|
.order_by(EthereumBlock.timestamp.asc())
|
||||||
|
.limit(1)
|
||||||
|
.one()
|
||||||
|
).block_number
|
||||||
|
logger.info(f"Start block: {start}, date: {start_date}")
|
||||||
|
latest_block = get_latest_block_from_db(db_session)
|
||||||
|
end = latest_block.block_number
|
||||||
|
assert start <= end, f"Start block {start} is greater than latest_block {end} in db"
|
||||||
|
logger.info(f"Labeling blocks {start}-{end}")
|
||||||
|
add_labels(web3_client, db_session, start, end)
|
||||||
|
return latest_block
|
||||||
|
|
||||||
|
|
||||||
|
def sync_summaries(db_session: Session, end: int, start: Optional[int]):
|
||||||
|
if start is None:
|
||||||
|
logger.info(
|
||||||
|
"Syncing start time is not given, getting it from latest nft label in db"
|
||||||
|
)
|
||||||
|
start = get_latest_summary_block()
|
||||||
|
if start is None:
|
||||||
|
time_now = datetime.now(timezone.utc)
|
||||||
|
start_date = time_now - relativedelta(months=3)
|
||||||
|
start = (
|
||||||
|
db_session.query(EthereumBlock)
|
||||||
|
.filter(EthereumBlock.timestamp >= start_date.timestamp())
|
||||||
|
.order_by(EthereumBlock.timestamp.asc())
|
||||||
|
.limit(1)
|
||||||
|
.one()
|
||||||
|
).block_number
|
||||||
|
start += 1
|
||||||
|
|
||||||
|
while start < end:
|
||||||
|
current_end = start + BLOCKS_PER_SUMMARY - 1
|
||||||
|
current_end_time = (
|
||||||
|
db_session.query(EthereumBlock)
|
||||||
|
.filter(EthereumBlock.block_number <= current_end)
|
||||||
|
.order_by(EthereumBlock.block_number.desc())
|
||||||
|
.limit(1)
|
||||||
|
.one()
|
||||||
|
).timestamp
|
||||||
|
summary_result = ethereum_summary(
|
||||||
|
db_session, datetime.fromtimestamp(current_end_time, timezone.utc)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def ethereum_sync_handler(args: argparse.Namespace) -> None:
|
def ethereum_sync_handler(args: argparse.Namespace) -> None:
|
||||||
web3_client = web3_client_from_cli_or_env(args)
|
web3_client = web3_client_from_cli_or_env(args)
|
||||||
|
|
||||||
humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN")
|
humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN")
|
||||||
if humbug_token is None:
|
if humbug_token is None:
|
||||||
raise ValueError("MOONSTREAM_HUMBUG_TOKEN env variable is not set")
|
raise ValueError("MOONSTREAM_HUMBUG_TOKEN env variable is not set")
|
||||||
|
|
||||||
with yield_db_session_ctx() as db_session:
|
with yield_db_session_ctx() as db_session:
|
||||||
start = args.start
|
|
||||||
if start is None:
|
|
||||||
time_now = datetime.now(timezone.utc)
|
|
||||||
week_ago = time_now - timedelta(weeks=1)
|
|
||||||
start = (
|
|
||||||
db_session.query(EthereumBlock)
|
|
||||||
.filter(EthereumBlock.timestamp >= week_ago.timestamp())
|
|
||||||
.order_by(EthereumBlock.timestamp.asc())
|
|
||||||
.limit(1)
|
|
||||||
.one()
|
|
||||||
).block_number
|
|
||||||
|
|
||||||
latest_block = get_latest_block_from_db(db_session)
|
|
||||||
end = latest_block.block_number
|
|
||||||
assert (
|
|
||||||
start <= end
|
|
||||||
), f"Start block {start} is greater than latest_block {end} in db"
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
print(f"Labeling blocks {start}-{end}")
|
|
||||||
add_labels(web3_client, db_session, start, end)
|
|
||||||
|
|
||||||
end_time = datetime.fromtimestamp(latest_block.timestamp, timezone.utc)
|
end_time = datetime.fromtimestamp(latest_block.timestamp, timezone.utc)
|
||||||
print(f"Creating summary with endtime={end_time}")
|
print(f"Creating summary with endtime={end_time}")
|
||||||
result = ethereum_summary(db_session, end_time)
|
result = ethereum_summary(db_session, end_time)
|
||||||
|
@ -96,36 +155,30 @@ def ethereum_label_handler(args: argparse.Namespace) -> None:
|
||||||
add_labels(web3_client, db_session, args.start, args.end, args.address)
|
add_labels(web3_client, db_session, args.start, args.end, args.address)
|
||||||
|
|
||||||
|
|
||||||
def push_summary(result: Dict[str, Any], end_block_no: int, humbug_token: str):
|
def push_summary(result: Dict[str, Any], humbug_token: str):
|
||||||
|
|
||||||
title = f"NFT activity on the Ethereum blockchain: end time: {result['crawled_at'] } (block {end_block_no})"
|
title = (
|
||||||
|
f"NFT activity on the Ethereum blockchain: end time: {result['crawled_at'] })"
|
||||||
|
)
|
||||||
publish_json(
|
publish_json(
|
||||||
"nft_ethereum",
|
"nft_ethereum",
|
||||||
humbug_token,
|
humbug_token,
|
||||||
title,
|
title,
|
||||||
result,
|
result,
|
||||||
tags=[f"crawler_version:{MOONCRAWL_VERSION}", f"end_block:{end_block_no}"],
|
tags=[f"crawler_version:{MOONCRAWL_VERSION}"],
|
||||||
wait=False,
|
wait=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def ethereum_summary_handler(args: argparse.Namespace) -> None:
|
def ethereum_summary_handler(args: argparse.Namespace) -> None:
|
||||||
with yield_db_session_ctx() as db_session:
|
|
||||||
result = ethereum_summary(db_session, args.end)
|
|
||||||
|
|
||||||
# humbug_token = args.humbug
|
with yield_db_session_ctx() as db_session:
|
||||||
# if humbug_token is None:
|
result = ethereum_summary(db_session, args.start, args.end)
|
||||||
# humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN")
|
humbug_token = args.humbug
|
||||||
# if humbug_token:
|
if humbug_token is None:
|
||||||
# title = f"NFT activity on the Ethereum blockchain: {start_time} (block {start_block}) to {end_time} (block {end_block})"
|
humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN")
|
||||||
# publish_json(
|
if humbug_token:
|
||||||
# "nft_ethereum",
|
push_summary(result, humbug_token)
|
||||||
# humbug_token,
|
|
||||||
# title,
|
|
||||||
# result,
|
|
||||||
# tags=[f"crawler_version:{MOONCRAWL_VERSION}"],
|
|
||||||
# wait=False,
|
|
||||||
# )
|
|
||||||
with args.outfile as ofp:
|
with args.outfile as ofp:
|
||||||
json.dump(result, ofp)
|
json.dump(result, ofp)
|
||||||
|
|
||||||
|
@ -181,20 +234,18 @@ def main() -> None:
|
||||||
"summary", description="Generate Ethereum NFT summary"
|
"summary", description="Generate Ethereum NFT summary"
|
||||||
)
|
)
|
||||||
parser_ethereum_summary.add_argument(
|
parser_ethereum_summary.add_argument(
|
||||||
"-e",
|
"-s",
|
||||||
"--end",
|
"--start",
|
||||||
type=dateutil.parser.parse,
|
type=int,
|
||||||
default=time_now.isoformat(),
|
required=True,
|
||||||
help=f"End time for window to calculate NFT statistics (default: {time_now.isoformat()})",
|
help=f"Start block for window to calculate NFT statistics",
|
||||||
)
|
)
|
||||||
parser_ethereum_summary.add_argument(
|
parser_ethereum_summary.add_argument(
|
||||||
"--humbug",
|
"-e",
|
||||||
default=None,
|
"--end",
|
||||||
help=(
|
type=int,
|
||||||
"If you would like to write this data to a Moonstream journal, please provide a Humbug "
|
required=True,
|
||||||
"token for that here. (This argument overrides any value set in the "
|
help=f"End block for window to calculate NFT statistics",
|
||||||
"MOONSTREAM_HUMBUG_TOKEN environment variable)"
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
parser_ethereum_summary.add_argument(
|
parser_ethereum_summary.add_argument(
|
||||||
"-o",
|
"-o",
|
||||||
|
@ -203,6 +254,13 @@ def main() -> None:
|
||||||
default=sys.stdout,
|
default=sys.stdout,
|
||||||
help="Optional file to write output to. By default, prints to stdout.",
|
help="Optional file to write output to. By default, prints to stdout.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser_ethereum_summary.add_argument(
|
||||||
|
"-humbug",
|
||||||
|
"--humbug",
|
||||||
|
default=None,
|
||||||
|
help="Humbug token, if given summary will send there",
|
||||||
|
)
|
||||||
parser_ethereum_summary.set_defaults(func=ethereum_summary_handler)
|
parser_ethereum_summary.set_defaults(func=ethereum_summary_handler)
|
||||||
|
|
||||||
parser_ethereum_sync = subparsers_ethereum.add_parser(
|
parser_ethereum_sync = subparsers_ethereum.add_parser(
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta, timezone
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from hexbytes.main import HexBytes
|
from hexbytes.main import HexBytes
|
||||||
|
@ -31,7 +31,9 @@ logger = logging.getLogger(__name__)
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
# Summary keys
|
# Summary keys
|
||||||
SUMMARY_KEY_BLOCKS = "blocks"
|
SUMMARY_KEY_START_BLOCK = "start_block"
|
||||||
|
SUMMARY_KEY_END_BLOCK = "end_block"
|
||||||
|
SUMMARY_KEY_NUM_BLOCKS = "num_blocks"
|
||||||
SUMMARY_KEY_NUM_TRANSACTIONS = "num_transactions"
|
SUMMARY_KEY_NUM_TRANSACTIONS = "num_transactions"
|
||||||
SUMMARY_KEY_TOTAL_VALUE = "total_value"
|
SUMMARY_KEY_TOTAL_VALUE = "total_value"
|
||||||
SUMMARY_KEY_NFT_TRANSFERS = "nft_transfers"
|
SUMMARY_KEY_NFT_TRANSFERS = "nft_transfers"
|
||||||
|
@ -41,7 +43,9 @@ SUMMARY_KEY_NFT_PURCHASERS = "nft_owners"
|
||||||
SUMMARY_KEY_NFT_MINTERS = "nft_minters"
|
SUMMARY_KEY_NFT_MINTERS = "nft_minters"
|
||||||
|
|
||||||
SUMMARY_KEYS = [
|
SUMMARY_KEYS = [
|
||||||
SUMMARY_KEY_BLOCKS,
|
SUMMARY_KEY_START_BLOCK,
|
||||||
|
SUMMARY_KEY_END_BLOCK,
|
||||||
|
SUMMARY_KEY_NUM_BLOCKS,
|
||||||
SUMMARY_KEY_NUM_TRANSACTIONS,
|
SUMMARY_KEY_NUM_TRANSACTIONS,
|
||||||
SUMMARY_KEY_TOTAL_VALUE,
|
SUMMARY_KEY_TOTAL_VALUE,
|
||||||
SUMMARY_KEY_NFT_TRANSFERS,
|
SUMMARY_KEY_NFT_TRANSFERS,
|
||||||
|
@ -485,20 +489,17 @@ def add_labels(
|
||||||
pbar.close()
|
pbar.close()
|
||||||
|
|
||||||
|
|
||||||
def time_bounded_summary(
|
def block_bounded_summary(
|
||||||
db_session: Session,
|
db_session: Session,
|
||||||
start_time: datetime,
|
start_block: int,
|
||||||
end_time: datetime,
|
end_block: int,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Produces a summary of Ethereum NFT activity between the given start_time and end_time (inclusive).
|
Produces a summary of Ethereum NFT activity between the given start_time and end_time (inclusive).
|
||||||
"""
|
"""
|
||||||
start_timestamp = int(start_time.timestamp())
|
block_filter = and_(
|
||||||
end_timestamp = int(end_time.timestamp())
|
EthereumBlock.block_number >= start_block,
|
||||||
|
EthereumBlock.block_number <= end_block,
|
||||||
time_filter = and_(
|
|
||||||
EthereumBlock.timestamp >= start_timestamp,
|
|
||||||
EthereumBlock.timestamp <= end_timestamp,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
transactions_query = (
|
transactions_query = (
|
||||||
|
@ -507,7 +508,7 @@ def time_bounded_summary(
|
||||||
EthereumBlock,
|
EthereumBlock,
|
||||||
EthereumTransaction.block_number == EthereumBlock.block_number,
|
EthereumTransaction.block_number == EthereumBlock.block_number,
|
||||||
)
|
)
|
||||||
.filter(time_filter)
|
.filter(block_filter)
|
||||||
)
|
)
|
||||||
|
|
||||||
def nft_query(label: str) -> Query:
|
def nft_query(label: str) -> Query:
|
||||||
|
@ -529,7 +530,7 @@ def time_bounded_summary(
|
||||||
EthereumBlock,
|
EthereumBlock,
|
||||||
EthereumTransaction.block_number == EthereumBlock.block_number,
|
EthereumTransaction.block_number == EthereumBlock.block_number,
|
||||||
)
|
)
|
||||||
.filter(time_filter)
|
.filter(block_filter)
|
||||||
.filter(EthereumLabel.label == label)
|
.filter(EthereumLabel.label == label)
|
||||||
)
|
)
|
||||||
return query
|
return query
|
||||||
|
@ -556,7 +557,7 @@ def time_bounded_summary(
|
||||||
EthereumTransaction.block_number == EthereumBlock.block_number,
|
EthereumTransaction.block_number == EthereumBlock.block_number,
|
||||||
)
|
)
|
||||||
.filter(EthereumLabel.label == label)
|
.filter(EthereumLabel.label == label)
|
||||||
.filter(time_filter)
|
.filter(block_filter)
|
||||||
.order_by(
|
.order_by(
|
||||||
# Without "transfer_value" and "owner_address" as sort keys, the final distinct query
|
# Without "transfer_value" and "owner_address" as sort keys, the final distinct query
|
||||||
# does not seem to be deterministic.
|
# does not seem to be deterministic.
|
||||||
|
@ -572,21 +573,30 @@ def time_bounded_summary(
|
||||||
purchaser_query = holder_query(TRANSFER_LABEL)
|
purchaser_query = holder_query(TRANSFER_LABEL)
|
||||||
minter_query = holder_query(MINT_LABEL)
|
minter_query = holder_query(MINT_LABEL)
|
||||||
|
|
||||||
blocks_result: Dict[str, int] = {}
|
blocks = (
|
||||||
min_block = (
|
db_session.query(EthereumBlock)
|
||||||
db_session.query(func.min(EthereumBlock.block_number))
|
.filter(block_filter)
|
||||||
.filter(time_filter)
|
.order_by(EthereumBlock.block_number.asc())
|
||||||
.scalar()
|
|
||||||
)
|
|
||||||
max_block = (
|
|
||||||
db_session.query(func.max(EthereumBlock.block_number))
|
|
||||||
.filter(time_filter)
|
|
||||||
.scalar()
|
|
||||||
)
|
)
|
||||||
|
first_block = None
|
||||||
|
last_block = None
|
||||||
|
num_blocks = 0
|
||||||
|
for block in blocks:
|
||||||
|
if num_blocks == 0:
|
||||||
|
min_block = block
|
||||||
|
max_block = block
|
||||||
|
num_blocks += 1
|
||||||
|
|
||||||
|
start_time = None
|
||||||
|
end_time = None
|
||||||
if min_block is not None:
|
if min_block is not None:
|
||||||
blocks_result["start"] = min_block
|
first_block = min_block.block_number
|
||||||
|
start_time = datetime.fromtimestamp(
|
||||||
|
min_block.timestamp, timezone.utc
|
||||||
|
).isoformat()
|
||||||
if max_block is not None:
|
if max_block is not None:
|
||||||
blocks_result["end"] = max_block
|
last_block = max_block.block_number
|
||||||
|
end_time = datetime.fromtimestamp(max_block.timestamp, timezone.utc).isoformat()
|
||||||
|
|
||||||
num_transactions = transactions_query.distinct(EthereumTransaction.hash).count()
|
num_transactions = transactions_query.distinct(EthereumTransaction.hash).count()
|
||||||
num_transfers = transfer_query.distinct(EthereumTransaction.hash).count()
|
num_transfers = transfer_query.distinct(EthereumTransaction.hash).count()
|
||||||
|
@ -614,12 +624,14 @@ def time_bounded_summary(
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"date_range": {
|
"date_range": {
|
||||||
"start_time": start_time.isoformat(),
|
"start_time": start_time,
|
||||||
"include_start": True,
|
"include_start": True,
|
||||||
"end_time": end_time.isoformat(),
|
"end_time": end_time,
|
||||||
"include_end": True,
|
"include_end": True,
|
||||||
},
|
},
|
||||||
SUMMARY_KEY_BLOCKS: blocks_result,
|
SUMMARY_KEY_START_BLOCK: first_block,
|
||||||
|
SUMMARY_KEY_END_BLOCK: last_block,
|
||||||
|
SUMMARY_KEY_NUM_BLOCKS: num_blocks,
|
||||||
SUMMARY_KEY_NUM_TRANSACTIONS: f"{num_transactions}",
|
SUMMARY_KEY_NUM_TRANSACTIONS: f"{num_transactions}",
|
||||||
SUMMARY_KEY_TOTAL_VALUE: f"{total_value}",
|
SUMMARY_KEY_TOTAL_VALUE: f"{total_value}",
|
||||||
SUMMARY_KEY_NFT_TRANSFERS: f"{num_transfers}",
|
SUMMARY_KEY_NFT_TRANSFERS: f"{num_transfers}",
|
||||||
|
@ -632,28 +644,12 @@ def time_bounded_summary(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def summary(db_session: Session, end_time: datetime) -> Dict[str, Any]:
|
def summary(db_session: Session, start_block: int, end_block: int) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Produces a summary of all Ethereum NFT activity:
|
Produces a summary of all Ethereum NFT activity:
|
||||||
1. From 1 hour before end_time to end_time
|
From 1 hour before end_time to end_time
|
||||||
2. From 1 day before end_time to end_time
|
|
||||||
3. From 1 week before end_time to end_time
|
|
||||||
"""
|
"""
|
||||||
start_times = {
|
|
||||||
"hour": end_time - timedelta(hours=1),
|
|
||||||
"day": end_time - timedelta(days=1),
|
|
||||||
"week": end_time - timedelta(weeks=1),
|
|
||||||
}
|
|
||||||
summaries = {
|
|
||||||
period: time_bounded_summary(db_session, start_time, end_time)
|
|
||||||
for period, start_time in start_times.items()
|
|
||||||
}
|
|
||||||
|
|
||||||
def aggregate_summary(key: str) -> Dict[str, Any]:
|
result = block_bounded_summary(db_session, start_block, end_block)
|
||||||
return {period: summary.get(key) for period, summary in summaries.items()}
|
result["crawled_at"] = datetime.utcnow().isoformat()
|
||||||
|
|
||||||
result: Dict[str, Any] = {
|
|
||||||
summary_key: aggregate_summary(summary_key) for summary_key in SUMMARY_KEYS
|
|
||||||
}
|
|
||||||
result["crawled_at"] = end_time.isoformat()
|
|
||||||
return result
|
return result
|
||||||
|
|
Ładowanie…
Reference in New Issue