From a1acc7aaed30206bfd60b9a4cd1c5a93e0069450 Mon Sep 17 00:00:00 2001 From: yhtiyar Date: Wed, 1 Sep 2021 17:33:44 +0300 Subject: [PATCH 01/27] initial version of nft explorer --- .../mooncrawl/mooncrawl/eth_nft_explorer.py | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py diff --git a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py new file mode 100644 index 00000000..a52d48bb --- /dev/null +++ b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py @@ -0,0 +1,100 @@ +from dataclasses import dataclass +from typing import List, Optional +from web3 import Web3 +from web3.types import FilterParams +from web3._utils.events import get_event_data + + +w3 = Web3(Web3.HTTPProvider("http://127.0.0.1:18375")) + +# First abi is for old NFT's like crypto kitties +# The erc721 standart requieres that Transfer event is indexed for all arguments +# That is how we get distinguished from erc20 transfer events +erc721_transfer_event_abis = [ + { + "anonymous": False, + "inputs": [ + {"indexed": False, "name": "from", "type": "address"}, + {"indexed": False, "name": "to", "type": "address"}, + {"indexed": False, "name": "tokenId", "type": "uint256"}, + ], + "name": "Transfer", + "type": "event", + }, + { + "anonymous": False, + "inputs": [ + {"indexed": True, "name": "from", "type": "address"}, + {"indexed": True, "name": "to", "type": "address"}, + {"indexed": True, "name": "tokenId", "type": "uint256"}, + ], + "name": "Transfer", + "type": "event", + }, +] + +transfer_event_signature = w3.sha3(text="Transfer(address,address,uint256)").hex() + + +@dataclass +class NFT_transfer: + contract_address: str + transfer_from: str + transfer_to: str + tokenId: int + transfer_tx: str + is_mint: bool = False + + +def decode_nft_transfer_data(log) -> Optional[NFT_transfer]: + for abi in erc721_transfer_event_abis: + try: + transfer_data = get_event_data(w3.codec, abi, log) + nft_transfer = NFT_transfer( + contract_address=transfer_data["address"], + transfer_from=transfer_data["args"]["from"], + transfer_to=transfer_data["args"]["to"], + tokenId=transfer_data["args"]["tokenId"], + transfer_tx=transfer_data["transactionHash"].hex(), + ) + if ( + nft_transfer.transfer_from + == "0x0000000000000000000000000000000000000000" # Blackhole address + ): + nft_transfer.is_mint = True + return nft_transfer + except: + continue + return None + + +def get_nft_transfers( + block_number_from: int, contract_address: Optional[str] = None +) -> List[NFT_transfer]: + filter_params = FilterParams( + fromBlock=block_number_from, topics=[transfer_event_signature] + ) + + if contract_address is not None: + filter_params["address"] = w3.toChecksumAddress(contract_address) + + logs = w3.eth.get_logs(filter_params) + nft_transfers: List[NFT_transfer] = [] + for log in logs: + nft_transfer = decode_nft_transfer_data(log) + if nft_transfer is not None: + nft_transfers.append(nft_transfer) + return nft_transfers + + +cryptoKittiesAddress = "0x06012c8cf97BEaD5deAe237070F9587f8E7A266d" +transfesrs = get_nft_transfers( + w3.eth.block_number - 1000, "0x77aa555c8a518b56a1ed57b7b4b85ee2ad479d06" +) + + +print(transfesrs) +print(f"Total nft transfers: {len(transfesrs)}") +minted_count = len(list(filter(lambda transfer: transfer.is_mint == True, transfesrs))) +print(f"Minted count: {minted_count}") +# print(transfesrs[0].transfer_tx) From 8cfbd716b3f2d5ca9a72be9b3163eb0aaa36e94c Mon Sep 17 00:00:00 2001 From: yhtiyar Date: Wed, 1 Sep 2021 22:29:53 +0300 Subject: [PATCH 02/27] not important fix --- .../mooncrawl/mooncrawl/eth_nft_explorer.py | 75 +++++++++++++++++-- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py index a52d48bb..e933e3bd 100644 --- a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py +++ b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py @@ -33,6 +33,72 @@ erc721_transfer_event_abis = [ }, ] +erc721_functions_abi = [ + { + "inputs": [{"internalType": "address", "name": "owner", "type": "address"}], + "name": "balanceOf", + "outputs": [{"internalType": "uint256", "name": "", "type": "uint256"}], + "payable": False, + "stateMutability": "view", + "type": "function", + "constant": True, + }, + { + "inputs": [], + "name": "name", + "outputs": [{"internalType": "string", "name": "", "type": "string"}], + "stateMutability": "view", + "type": "function", + "constant": True, + }, + { + "inputs": [{"internalType": "uint256", "name": "tokenId", "type": "uint256"}], + "name": "ownerOf", + "outputs": [{"internalType": "address", "name": "", "type": "address"}], + "payable": False, + "stateMutability": "view", + "type": "function", + "constant": True, + }, + { + "inputs": [], + "name": "symbol", + "outputs": [{"internalType": "string", "name": "", "type": "string"}], + "stateMutability": "view", + "type": "function", + "constant": True, + }, + { + "inputs": [], + "name": "totalSupply", + "outputs": [{"internalType": "uint256", "name": "", "type": "uint256"}], + "stateMutability": "view", + "type": "function", + "constant": True, + }, +] + + +@dataclass +class NFT_contract: + address: str + name: str + symbol: str + total_supply: str + + +def get_erc721_contract_info(address: str) -> NFT_contract: + contract = w3.eth.contract( + address=w3.toChecksumAddress(address), abi=erc721_functions_abi + ) + return NFT_contract( + address=address, + name=contract.functions.name().call(), + symbol=contract.functions.symbol().call(), + total_supply=contract.functions.totalSupply().call(), + ) + + transfer_event_signature = w3.sha3(text="Transfer(address,address,uint256)").hex() @@ -89,12 +155,11 @@ def get_nft_transfers( cryptoKittiesAddress = "0x06012c8cf97BEaD5deAe237070F9587f8E7A266d" transfesrs = get_nft_transfers( - w3.eth.block_number - 1000, "0x77aa555c8a518b56a1ed57b7b4b85ee2ad479d06" + w3.eth.block_number - 1000, "0x2aea4add166ebf38b63d09a75de1a7b94aa24163" ) - print(transfesrs) print(f"Total nft transfers: {len(transfesrs)}") -minted_count = len(list(filter(lambda transfer: transfer.is_mint == True, transfesrs))) -print(f"Minted count: {minted_count}") -# print(transfesrs[0].transfer_tx) +minted = list(filter(lambda transfer: transfer.is_mint == True, transfesrs)) +# print(minted) +print(f"Minted count: {len(minted)}") From a80898a6a99b6f6e16f3c4c8e80e9a2b13817531 Mon Sep 17 00:00:00 2001 From: yhtiyar Date: Wed, 1 Sep 2021 23:54:17 +0300 Subject: [PATCH 03/27] added raw nft transfer --- .../mooncrawl/mooncrawl/eth_nft_explorer.py | 60 ++++++++++++++----- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py index e933e3bd..98f290f8 100644 --- a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py +++ b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py @@ -1,6 +1,8 @@ -from dataclasses import dataclass -from typing import List, Optional +from dataclasses import dataclass, asdict +from collections import defaultdict +from typing import Dict, List, Optional from web3 import Web3 +import web3 from web3.types import FilterParams from web3._utils.events import get_event_data @@ -103,31 +105,43 @@ transfer_event_signature = w3.sha3(text="Transfer(address,address,uint256)").hex @dataclass -class NFT_transfer: +class NFTTransferRaw: + contract_address: str + transfer_from: str + transfer_to: str + tokenId: int + transfer_tx: bytes + + +@dataclass +class NFTTransfer: contract_address: str transfer_from: str transfer_to: str tokenId: int transfer_tx: str + value: Optional[int] = None is_mint: bool = False -def decode_nft_transfer_data(log) -> Optional[NFT_transfer]: +def get_value_by_tx(tx_hash): + print(f"Trying to get tx: {tx_hash.hex()}") + tx = w3.eth.get_transaction(tx_hash) + print("got it") + return tx["value"] + + +def decode_nft_transfer_data(log) -> Optional[NFTTransferRaw]: for abi in erc721_transfer_event_abis: try: transfer_data = get_event_data(w3.codec, abi, log) - nft_transfer = NFT_transfer( + nft_transfer = NFTTransferRaw( contract_address=transfer_data["address"], transfer_from=transfer_data["args"]["from"], transfer_to=transfer_data["args"]["to"], tokenId=transfer_data["args"]["tokenId"], - transfer_tx=transfer_data["transactionHash"].hex(), + transfer_tx=transfer_data["transactionHash"], ) - if ( - nft_transfer.transfer_from - == "0x0000000000000000000000000000000000000000" # Blackhole address - ): - nft_transfer.is_mint = True return nft_transfer except: continue @@ -136,7 +150,7 @@ def decode_nft_transfer_data(log) -> Optional[NFT_transfer]: def get_nft_transfers( block_number_from: int, contract_address: Optional[str] = None -) -> List[NFT_transfer]: +) -> List[NFTTransfer]: filter_params = FilterParams( fromBlock=block_number_from, topics=[transfer_event_signature] ) @@ -145,17 +159,33 @@ def get_nft_transfers( filter_params["address"] = w3.toChecksumAddress(contract_address) logs = w3.eth.get_logs(filter_params) - nft_transfers: List[NFT_transfer] = [] + nft_transfers: List[NFTTransfer] = [] + tx_value: Dict[bytes, List[NFTTransferRaw]] = defaultdict(list) for log in logs: nft_transfer = decode_nft_transfer_data(log) if nft_transfer is not None: - nft_transfers.append(nft_transfer) + tx_value[nft_transfer.transfer_tx].append(nft_transfer) + + for tx_hash, transfers in tx_value.items(): + # value = get_value_by_tx(tx_hash) + value = 0 + for transfer in transfers: + kwargs = { + **asdict(transfer), + "transfer_tx": transfer.transfer_tx.hex(), + "is_mint": transfer.transfer_from + == "0x0000000000000000000000000000000000000000", + "value": value, + } + parsed_transfer = NFTTransfer(**kwargs) + + nft_transfers.append(parsed_transfer) return nft_transfers cryptoKittiesAddress = "0x06012c8cf97BEaD5deAe237070F9587f8E7A266d" transfesrs = get_nft_transfers( - w3.eth.block_number - 1000, "0x2aea4add166ebf38b63d09a75de1a7b94aa24163" + w3.eth.block_number - 120, ) print(transfesrs) From 738bc20b5e8caa03f6c90f71ab3db9f2f8d35541 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Wed, 1 Sep 2021 15:01:24 -0700 Subject: [PATCH 04/27] Cleaned up NFT crawler code a bit Hooked it up to comamnd line --- crawlers/mooncrawl/.gitignore | 2 + .../mooncrawl/mooncrawl/eth_nft_explorer.py | 72 ++++++++----------- crawlers/mooncrawl/mooncrawl/ethcrawler.py | 65 ++++++++++++++++- crawlers/mooncrawl/mypy.ini | 3 + 4 files changed, 99 insertions(+), 43 deletions(-) create mode 100644 crawlers/mooncrawl/.gitignore diff --git a/crawlers/mooncrawl/.gitignore b/crawlers/mooncrawl/.gitignore new file mode 100644 index 00000000..d8fa592b --- /dev/null +++ b/crawlers/mooncrawl/.gitignore @@ -0,0 +1,2 @@ +.venv/ +.mooncrawl/ diff --git a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py index 98f290f8..5b7c74dd 100644 --- a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py +++ b/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py @@ -1,14 +1,14 @@ from dataclasses import dataclass, asdict -from collections import defaultdict -from typing import Dict, List, Optional +from typing import cast, List, Optional +from hexbytes.main import HexBytes + +from eth_typing.encoding import HexStr +from tqdm import tqdm from web3 import Web3 -import web3 -from web3.types import FilterParams +from web3.types import FilterParams, LogReceipt from web3._utils.events import get_event_data -w3 = Web3(Web3.HTTPProvider("http://127.0.0.1:18375")) - # First abi is for old NFT's like crypto kitties # The erc721 standart requieres that Transfer event is indexed for all arguments # That is how we get distinguished from erc20 transfer events @@ -89,7 +89,7 @@ class NFT_contract: total_supply: str -def get_erc721_contract_info(address: str) -> NFT_contract: +def get_erc721_contract_info(w3: Web3, address: str) -> NFT_contract: contract = w3.eth.contract( address=w3.toChecksumAddress(address), abi=erc721_functions_abi ) @@ -101,7 +101,10 @@ def get_erc721_contract_info(address: str) -> NFT_contract: ) -transfer_event_signature = w3.sha3(text="Transfer(address,address,uint256)").hex() +# SHA3 hash of the string "Transfer(address,address,uint256)" +TRANSFER_EVENT_SIGNATURE = HexBytes( + "0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef" +) @dataclass @@ -110,7 +113,7 @@ class NFTTransferRaw: transfer_from: str transfer_to: str tokenId: int - transfer_tx: bytes + transfer_tx: HexBytes @dataclass @@ -124,14 +127,14 @@ class NFTTransfer: is_mint: bool = False -def get_value_by_tx(tx_hash): +def get_value_by_tx(w3: Web3, tx_hash: HexBytes): print(f"Trying to get tx: {tx_hash.hex()}") tx = w3.eth.get_transaction(tx_hash) print("got it") return tx["value"] -def decode_nft_transfer_data(log) -> Optional[NFTTransferRaw]: +def decode_nft_transfer_data(w3: Web3, log: LogReceipt) -> Optional[NFTTransferRaw]: for abi in erc721_transfer_event_abis: try: transfer_data = get_event_data(w3.codec, abi, log) @@ -149,47 +152,34 @@ def decode_nft_transfer_data(log) -> Optional[NFTTransferRaw]: def get_nft_transfers( - block_number_from: int, contract_address: Optional[str] = None + w3: Web3, + from_block: Optional[int] = None, + to_block: Optional[int] = None, + contract_address: Optional[str] = None, ) -> List[NFTTransfer]: - filter_params = FilterParams( - fromBlock=block_number_from, topics=[transfer_event_signature] - ) + filter_params = FilterParams(topics=[cast(HexStr, TRANSFER_EVENT_SIGNATURE.hex())]) + + if from_block is not None: + filter_params["fromBlock"] = from_block + + if to_block is not None: + filter_params["toBlock"] = to_block if contract_address is not None: filter_params["address"] = w3.toChecksumAddress(contract_address) logs = w3.eth.get_logs(filter_params) nft_transfers: List[NFTTransfer] = [] - tx_value: Dict[bytes, List[NFTTransferRaw]] = defaultdict(list) - for log in logs: - nft_transfer = decode_nft_transfer_data(log) + for log in tqdm(logs): + nft_transfer = decode_nft_transfer_data(w3, log) if nft_transfer is not None: - tx_value[nft_transfer.transfer_tx].append(nft_transfer) - - for tx_hash, transfers in tx_value.items(): - # value = get_value_by_tx(tx_hash) - value = 0 - for transfer in transfers: kwargs = { - **asdict(transfer), - "transfer_tx": transfer.transfer_tx.hex(), - "is_mint": transfer.transfer_from + **asdict(nft_transfer), + "transfer_tx": nft_transfer.transfer_tx.hex(), + "is_mint": nft_transfer.transfer_from == "0x0000000000000000000000000000000000000000", - "value": value, } - parsed_transfer = NFTTransfer(**kwargs) + parsed_transfer = NFTTransfer(**kwargs) # type: ignore nft_transfers.append(parsed_transfer) return nft_transfers - - -cryptoKittiesAddress = "0x06012c8cf97BEaD5deAe237070F9587f8E7A266d" -transfesrs = get_nft_transfers( - w3.eth.block_number - 120, -) - -print(transfesrs) -print(f"Total nft transfers: {len(transfesrs)}") -minted = list(filter(lambda transfer: transfer.is_mint == True, transfesrs)) -# print(minted) -print(f"Minted count: {len(minted)}") diff --git a/crawlers/mooncrawl/mooncrawl/ethcrawler.py b/crawlers/mooncrawl/mooncrawl/ethcrawler.py index 793ab3ea..c19079a2 100644 --- a/crawlers/mooncrawl/mooncrawl/ethcrawler.py +++ b/crawlers/mooncrawl/mooncrawl/ethcrawler.py @@ -8,11 +8,13 @@ import json import os import sys import time -from typing import Iterator, List +from typing import cast, Iterator, List import dateutil.parser +from web3 import Web3 from .ethereum import ( + connect, crawl_blocks_executor, crawl_blocks, check_missing_blocks, @@ -21,8 +23,9 @@ from .ethereum import ( DateRange, trending, ) +from .eth_nft_explorer import get_nft_transfers from .publish import publish_json -from .settings import MOONSTREAM_CRAWL_WORKERS +from .settings import MOONSTREAM_CRAWL_WORKERS, MOONSTREAM_IPC_PATH from .version import MOONCRAWL_VERSION @@ -31,6 +34,22 @@ class ProcessingOrder(Enum): ASCENDING = 1 +def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: + """ + Returns a web3 client either by parsing "--web3" argument on the given arguments or by looking up + the MOONSTREAM_IPC_PATH environment variable. + """ + web3_connection_string = MOONSTREAM_IPC_PATH + args_web3 = vars(args).get("web3") + if args_web3 is not None: + web3_connection_string = cast(str, args_web3) + if web3_connection_string is None: + raise ValueError( + "Could not find Web3 connection information in arguments or in MOONSTREAM_IPC_PATH environment variable" + ) + return connect(web3_connection_string) + + def yield_blocks_numbers_lists( blocks_range_str: str, order: ProcessingOrder = ProcessingOrder.DESCENDING, @@ -200,6 +219,16 @@ def ethcrawler_trending_handler(args: argparse.Namespace) -> None: json.dump(results, ofp) +def ethcrawler_nft_handler(args: argparse.Namespace) -> None: + web3_client = web3_client_from_cli_or_env(args) + transfers = get_nft_transfers(web3_client, args.start, args.end, args.address) + for transfer in transfers: + print(transfer) + + print("Total transfers:", len(transfers)) + print("Mints:", len([transfer for transfer in transfers if transfer.is_mint])) + + def main() -> None: parser = argparse.ArgumentParser(description="Moonstream crawlers CLI") parser.set_defaults(func=lambda _: parser.print_help()) @@ -389,6 +418,38 @@ def main() -> None: ) parser_ethcrawler_trending.set_defaults(func=ethcrawler_trending_handler) + parser_ethcrawler_nft = subcommands.add_parser( + "nft", description="Collect information about NFTs from Ethereum blockchains" + ) + parser_ethcrawler_nft.add_argument( + "-s", + "--start", + type=int, + default=None, + help="Starting block number (inclusive if block available)", + ) + parser_ethcrawler_nft.add_argument( + "-e", + "--end", + type=int, + default=None, + help="Ending block number (inclusive if block available)", + ) + parser_ethcrawler_nft.add_argument( + "-a", + "--address", + type=str, + default=None, + help="(Optional) NFT contract address that you want to limit the crawl to, e.g. 0x06012c8cf97BEaD5deAe237070F9587f8E7A266d for CryptoKitties.", + ) + parser_ethcrawler_nft.add_argument( + "--web3", + type=str, + default=None, + help="(Optional) Web3 connection string. If not provided, uses the value specified by MOONSTREAM_IPC_PATH environment variable.", + ) + parser_ethcrawler_nft.set_defaults(func=ethcrawler_nft_handler) + args = parser.parse_args() args.func(args) diff --git a/crawlers/mooncrawl/mypy.ini b/crawlers/mooncrawl/mypy.ini index 47838c47..45381262 100644 --- a/crawlers/mooncrawl/mypy.ini +++ b/crawlers/mooncrawl/mypy.ini @@ -8,3 +8,6 @@ ignore_missing_imports = True [mypy-pyevmasm.*] ignore_missing_imports = True + +[mypy-tqdm.*] +ignore_missing_imports = True From 6ab99a03b2d943d400fa1fc48b003856f134081a Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 21:53:18 -0700 Subject: [PATCH 05/27] Moved nft crawler into its own submodule --- crawlers/mooncrawl/mooncrawl/ethcrawler.py | 63 +------------- crawlers/mooncrawl/mooncrawl/nft/__init__.py | 0 crawlers/mooncrawl/mooncrawl/nft/cli.py | 83 +++++++++++++++++++ .../{eth_nft_explorer.py => nft/ethereum.py} | 0 4 files changed, 84 insertions(+), 62 deletions(-) create mode 100644 crawlers/mooncrawl/mooncrawl/nft/__init__.py create mode 100644 crawlers/mooncrawl/mooncrawl/nft/cli.py rename crawlers/mooncrawl/mooncrawl/{eth_nft_explorer.py => nft/ethereum.py} (100%) diff --git a/crawlers/mooncrawl/mooncrawl/ethcrawler.py b/crawlers/mooncrawl/mooncrawl/ethcrawler.py index c19079a2..d9d51e5d 100644 --- a/crawlers/mooncrawl/mooncrawl/ethcrawler.py +++ b/crawlers/mooncrawl/mooncrawl/ethcrawler.py @@ -8,13 +8,11 @@ import json import os import sys import time -from typing import cast, Iterator, List +from typing import Iterator, List import dateutil.parser -from web3 import Web3 from .ethereum import ( - connect, crawl_blocks_executor, crawl_blocks, check_missing_blocks, @@ -23,7 +21,6 @@ from .ethereum import ( DateRange, trending, ) -from .eth_nft_explorer import get_nft_transfers from .publish import publish_json from .settings import MOONSTREAM_CRAWL_WORKERS, MOONSTREAM_IPC_PATH from .version import MOONCRAWL_VERSION @@ -34,22 +31,6 @@ class ProcessingOrder(Enum): ASCENDING = 1 -def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: - """ - Returns a web3 client either by parsing "--web3" argument on the given arguments or by looking up - the MOONSTREAM_IPC_PATH environment variable. - """ - web3_connection_string = MOONSTREAM_IPC_PATH - args_web3 = vars(args).get("web3") - if args_web3 is not None: - web3_connection_string = cast(str, args_web3) - if web3_connection_string is None: - raise ValueError( - "Could not find Web3 connection information in arguments or in MOONSTREAM_IPC_PATH environment variable" - ) - return connect(web3_connection_string) - - def yield_blocks_numbers_lists( blocks_range_str: str, order: ProcessingOrder = ProcessingOrder.DESCENDING, @@ -219,16 +200,6 @@ def ethcrawler_trending_handler(args: argparse.Namespace) -> None: json.dump(results, ofp) -def ethcrawler_nft_handler(args: argparse.Namespace) -> None: - web3_client = web3_client_from_cli_or_env(args) - transfers = get_nft_transfers(web3_client, args.start, args.end, args.address) - for transfer in transfers: - print(transfer) - - print("Total transfers:", len(transfers)) - print("Mints:", len([transfer for transfer in transfers if transfer.is_mint])) - - def main() -> None: parser = argparse.ArgumentParser(description="Moonstream crawlers CLI") parser.set_defaults(func=lambda _: parser.print_help()) @@ -418,38 +389,6 @@ def main() -> None: ) parser_ethcrawler_trending.set_defaults(func=ethcrawler_trending_handler) - parser_ethcrawler_nft = subcommands.add_parser( - "nft", description="Collect information about NFTs from Ethereum blockchains" - ) - parser_ethcrawler_nft.add_argument( - "-s", - "--start", - type=int, - default=None, - help="Starting block number (inclusive if block available)", - ) - parser_ethcrawler_nft.add_argument( - "-e", - "--end", - type=int, - default=None, - help="Ending block number (inclusive if block available)", - ) - parser_ethcrawler_nft.add_argument( - "-a", - "--address", - type=str, - default=None, - help="(Optional) NFT contract address that you want to limit the crawl to, e.g. 0x06012c8cf97BEaD5deAe237070F9587f8E7A266d for CryptoKitties.", - ) - parser_ethcrawler_nft.add_argument( - "--web3", - type=str, - default=None, - help="(Optional) Web3 connection string. If not provided, uses the value specified by MOONSTREAM_IPC_PATH environment variable.", - ) - parser_ethcrawler_nft.set_defaults(func=ethcrawler_nft_handler) - args = parser.parse_args() args.func(args) diff --git a/crawlers/mooncrawl/mooncrawl/nft/__init__.py b/crawlers/mooncrawl/mooncrawl/nft/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py new file mode 100644 index 00000000..96d74f89 --- /dev/null +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -0,0 +1,83 @@ +""" +A command line tool to crawl information about NFTs from various sources. +""" +import argparse +from typing import cast + +from web3 import Web3 + +from ..ethereum import connect +from .ethereum import get_nft_transfers +from ..settings import MOONSTREAM_IPC_PATH + + +def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: + """ + Returns a web3 client either by parsing "--web3" argument on the given arguments or by looking up + the MOONSTREAM_IPC_PATH environment variable. + """ + web3_connection_string = MOONSTREAM_IPC_PATH + args_web3 = vars(args).get("web3") + if args_web3 is not None: + web3_connection_string = cast(str, args_web3) + if web3_connection_string is None: + raise ValueError( + "Could not find Web3 connection information in arguments or in MOONSTREAM_IPC_PATH environment variable" + ) + return connect(web3_connection_string) + + +def ethereum_handler(args: argparse.Namespace) -> None: + web3_client = web3_client_from_cli_or_env(args) + transfers = get_nft_transfers(web3_client, args.start, args.end, args.address) + for transfer in transfers: + print(transfer) + + print("Total transfers:", len(transfers)) + print("Mints:", len([transfer for transfer in transfers if transfer.is_mint])) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Moonstream NFT crawlers") + parser.set_defaults(func=lambda _: parser.print_help()) + subcommands = parser.add_subparsers(description="Subcommands") + + parser_ethereum = subcommands.add_parser( + "ethereum", + description="Collect information about NFTs from Ethereum blockchains", + ) + parser_ethereum.add_argument( + "-s", + "--start", + type=int, + default=None, + help="Starting block number (inclusive if block available)", + ) + parser_ethereum.add_argument( + "-e", + "--end", + type=int, + default=None, + help="Ending block number (inclusive if block available)", + ) + parser_ethereum.add_argument( + "-a", + "--address", + type=str, + default=None, + help="(Optional) NFT contract address that you want to limit the crawl to, e.g. 0x06012c8cf97BEaD5deAe237070F9587f8E7A266d for CryptoKitties.", + ) + parser_ethereum.add_argument( + "--web3", + type=str, + default=None, + help="(Optional) Web3 connection string. If not provided, uses the value specified by MOONSTREAM_IPC_PATH environment variable.", + ) + parser_ethereum.set_defaults(func=ethereum_handler) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py similarity index 100% rename from crawlers/mooncrawl/mooncrawl/eth_nft_explorer.py rename to crawlers/mooncrawl/mooncrawl/nft/ethereum.py From 0694e7d45e1de422b2ab35938a3b0305e07ebdec Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 21:54:33 -0700 Subject: [PATCH 06/27] Added nft CLI and bumped version to 0.0.4 --- crawlers/mooncrawl/mooncrawl/version.py | 2 +- crawlers/mooncrawl/setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/crawlers/mooncrawl/mooncrawl/version.py b/crawlers/mooncrawl/mooncrawl/version.py index 8fad0f1d..5cc01123 100644 --- a/crawlers/mooncrawl/mooncrawl/version.py +++ b/crawlers/mooncrawl/mooncrawl/version.py @@ -2,4 +2,4 @@ Moonstream crawlers version. """ -MOONCRAWL_VERSION = "0.0.3" +MOONCRAWL_VERSION = "0.0.4" diff --git a/crawlers/mooncrawl/setup.py b/crawlers/mooncrawl/setup.py index 1e893bd4..6d176d2e 100644 --- a/crawlers/mooncrawl/setup.py +++ b/crawlers/mooncrawl/setup.py @@ -49,6 +49,7 @@ setup( "esd=mooncrawl.esd:main", "identity=mooncrawl.identity:main", "etherscan=mooncrawl.etherscan:main", + "nft=mooncrawl.nft.cli:main", ] }, ) From f06f2e3fad837a0ac1d5e0d9024aaff8a56becba Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 22:16:42 -0700 Subject: [PATCH 07/27] Added Humbug publication functionality for "nft ethereum" command --- crawlers/mooncrawl/mooncrawl/nft/cli.py | 56 ++++++++++++++++++++++--- crawlers/mooncrawl/mooncrawl/publish.py | 3 +- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index 96d74f89..28c52d12 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -2,13 +2,19 @@ A command line tool to crawl information about NFTs from various sources. """ import argparse -from typing import cast +from dataclasses import asdict +import json +import os +import sys +from typing import Any, cast, Dict, List from web3 import Web3 from ..ethereum import connect from .ethereum import get_nft_transfers +from ..publish import publish_json from ..settings import MOONSTREAM_IPC_PATH +from ..version import MOONCRAWL_VERSION def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: @@ -30,11 +36,34 @@ def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: def ethereum_handler(args: argparse.Namespace) -> None: web3_client = web3_client_from_cli_or_env(args) transfers = get_nft_transfers(web3_client, args.start, args.end, args.address) - for transfer in transfers: - print(transfer) - print("Total transfers:", len(transfers)) - print("Mints:", len([transfer for transfer in transfers if transfer.is_mint])) + # TODO(zomglings): Create a function which calculates statistics about ethereum NFTs in the + # ethereum module and call it here. Don't do this calculation here. + num_mints = len([transfer for transfer in transfers if transfer.is_mint]) + + # TODO(zomglings): Add dates as well as block numbers. + result = { + "num_transfers": len(transfers), + "num_mints": num_mints, + "initial_block": args.start, + "terminal_block": args.end, + } + + humbug_token = args.humbug + if humbug_token is None: + humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN") + if humbug_token: + title = f"NFT activity on the Ethereum blockchain: Blocks {args.start} to {args.end}" + publish_json( + "nft_ethereum", + humbug_token, + title, + result, + tags=[f"crawler_version:{MOONCRAWL_VERSION}"], + wait=False, + ) + with args.outfile as ofp: + json.dump(result, ofp) def main() -> None: @@ -73,6 +102,23 @@ def main() -> None: default=None, help="(Optional) Web3 connection string. If not provided, uses the value specified by MOONSTREAM_IPC_PATH environment variable.", ) + parser_ethereum.add_argument( + "--humbug", + default=None, + help=( + "If you would like to write this data to a Moonstream journal, please provide a Humbug " + "token for that here. (This argument overrides any value set in the " + "MOONSTREAM_HUMBUG_TOKEN environment variable)" + ), + ) + parser_ethereum.add_argument( + "-o", + "--outfile", + type=argparse.FileType("w"), + default=sys.stdout, + help="Optional file to write output to. By default, prints to stdout.", + ) + parser_ethereum.set_defaults(func=ethereum_handler) args = parser.parse_args() diff --git a/crawlers/mooncrawl/mooncrawl/publish.py b/crawlers/mooncrawl/mooncrawl/publish.py index c1765610..bc8a73ae 100644 --- a/crawlers/mooncrawl/mooncrawl/publish.py +++ b/crawlers/mooncrawl/mooncrawl/publish.py @@ -11,6 +11,7 @@ def publish_json( title: str, content: Dict[str, Any], tags: Optional[List[str]] = None, + wait: bool = True, ) -> None: spire_api_url = os.environ.get( "MOONSTREAM_SPIRE_API_URL", "https://spire.bugout.dev" @@ -26,7 +27,7 @@ def publish_json( "Authorization": f"Bearer {humbug_token}", } request_body = {"title": title, "content": json.dumps(content), "tags": tags} - query_parameters = {"sync": True} + query_parameters = {"sync": wait} response = requests.post( report_url, headers=headers, json=request_body, params=query_parameters ) From a564737dbc95e878e25f711bb283bc4db10b3ae4 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 22:37:40 -0700 Subject: [PATCH 08/27] summary function for Ethereum NFTs --- crawlers/mooncrawl/mooncrawl/nft/cli.py | 19 ++------- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 42 +++++++++++++++++++- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index 28c52d12..5c90a2bb 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -2,16 +2,15 @@ A command line tool to crawl information about NFTs from various sources. """ import argparse -from dataclasses import asdict import json import os import sys -from typing import Any, cast, Dict, List +from typing import cast from web3 import Web3 from ..ethereum import connect -from .ethereum import get_nft_transfers +from .ethereum import summary as ethereum_summary from ..publish import publish_json from ..settings import MOONSTREAM_IPC_PATH from ..version import MOONCRAWL_VERSION @@ -35,19 +34,7 @@ def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: def ethereum_handler(args: argparse.Namespace) -> None: web3_client = web3_client_from_cli_or_env(args) - transfers = get_nft_transfers(web3_client, args.start, args.end, args.address) - - # TODO(zomglings): Create a function which calculates statistics about ethereum NFTs in the - # ethereum module and call it here. Don't do this calculation here. - num_mints = len([transfer for transfer in transfers if transfer.is_mint]) - - # TODO(zomglings): Add dates as well as block numbers. - result = { - "num_transfers": len(transfers), - "num_mints": num_mints, - "initial_block": args.start, - "terminal_block": args.end, - } + result = ethereum_summary(web3_client, args.start, args.end, args.address) humbug_token = args.humbug if humbug_token is None: diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 5b7c74dd..5595eb44 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, asdict -from typing import cast, List, Optional +from datetime import datetime +from typing import Any, cast, Dict, List, Optional from hexbytes.main import HexBytes from eth_typing.encoding import HexStr @@ -183,3 +184,42 @@ def get_nft_transfers( parsed_transfer = NFTTransfer(**kwargs) # type: ignore nft_transfers.append(parsed_transfer) return nft_transfers + + +def summary( + w3: Web3, + from_block: Optional[int] = None, + to_block: Optional[int] = None, + address: Optional[str] = None, +) -> Dict[str, Any]: + if to_block is None: + to_block = w3.eth.get_block_number() + + # By default, let us summarize 100 blocks worth of NFT transfers + if from_block is None: + from_block = to_block - 100 + + start_block = w3.eth.get_block(from_block) + start_time = datetime.utcfromtimestamp(start_block.timestamp).isoformat() + end_block = w3.eth.get_block(to_block) + end_time = datetime.utcfromtimestamp(end_block.timestamp).isoformat() + + transfers = get_nft_transfers(w3, from_block, to_block, address) + num_mints = sum(transfer.is_mint for transfer in transfers) + + result = { + "date_range": { + "start_time": start_time, + "include_start": True, + "end_time": end_time, + "include_end": True, + }, + "blocks": { + "start": from_block, + "end": to_block, + }, + "num_transfers": len(transfers), + "num_mints": num_mints, + } + + return result From 5a997ad77253c5cd457f501c3a4763a9a4f5da3d Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 22:42:32 -0700 Subject: [PATCH 09/27] Few improvements to "nft ethereum" --- crawlers/mooncrawl/mooncrawl/nft/cli.py | 7 ++++++- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index 5c90a2bb..35d256e7 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -36,11 +36,16 @@ def ethereum_handler(args: argparse.Namespace) -> None: web3_client = web3_client_from_cli_or_env(args) result = ethereum_summary(web3_client, args.start, args.end, args.address) + start_time = result.get("date_range", {}).get("start_time", "UNKNOWN") + start_block = result.get("blocks", {}).get("start", -1) + end_time = result.get("date_range", {}).get("end_time", "UNKNOWN") + end_block = result.get("blocks", {}).get("end", -1) + humbug_token = args.humbug if humbug_token is None: humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN") if humbug_token: - title = f"NFT activity on the Ethereum blockchain: Blocks {args.start} to {args.end}" + title = f"NFT activity on the Ethereum blockchain: {start_time} (block {start_block}) to {end_time} (block {end_block})" publish_json( "nft_ethereum", humbug_token, diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 5595eb44..1b240087 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -197,7 +197,7 @@ def summary( # By default, let us summarize 100 blocks worth of NFT transfers if from_block is None: - from_block = to_block - 100 + from_block = to_block - 99 start_block = w3.eth.get_block(from_block) start_time = datetime.utcfromtimestamp(start_block.timestamp).isoformat() From ad94507553c7a20e4388d90681a5ded28f119ef2 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 22:57:15 -0700 Subject: [PATCH 10/27] Added support for labelling Ethereum transactions in database We did this using the existing `ethereum_labels` table. We just made the `address_id` column nullable and added an (indexed) `transaction_hash` column. --- ...bb9cffcf_support_labels_on_transactions.py | 53 +++++++++++++++++++ db/moonstreamdb/models.py | 3 +- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 db/alembic/versions/39a5bb9cffcf_support_labels_on_transactions.py diff --git a/db/alembic/versions/39a5bb9cffcf_support_labels_on_transactions.py b/db/alembic/versions/39a5bb9cffcf_support_labels_on_transactions.py new file mode 100644 index 00000000..6688a96e --- /dev/null +++ b/db/alembic/versions/39a5bb9cffcf_support_labels_on_transactions.py @@ -0,0 +1,53 @@ +"""Support labels on transactions + +Revision ID: 39a5bb9cffcf +Revises: ecb7817db377 +Create Date: 2021-09-02 22:54:39.055168 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "39a5bb9cffcf" +down_revision = "ecb7817db377" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "ethereum_labels", + sa.Column("transaction_hash", sa.VARCHAR(length=256), nullable=True), + ) + op.alter_column( + "ethereum_labels", "address_id", existing_type=sa.INTEGER(), nullable=True + ) + op.create_index( + op.f("ix_ethereum_labels_transaction_hash"), + "ethereum_labels", + ["transaction_hash"], + unique=False, + ) + op.create_unique_constraint( + op.f("uq_opensea_crawler_state_id"), "opensea_crawler_state", ["id"] + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint( + op.f("uq_opensea_crawler_state_id"), "opensea_crawler_state", type_="unique" + ) + op.drop_index( + op.f("ix_ethereum_labels_transaction_hash"), table_name="ethereum_labels" + ) + op.execute("DELETE FROM ethereum_labels WHERE address_id IS NULL;") + op.alter_column( + "ethereum_labels", "address_id", existing_type=sa.INTEGER(), nullable=False + ) + op.drop_column("ethereum_labels", "transaction_hash") + # ### end Alembic commands ### diff --git a/db/moonstreamdb/models.py b/db/moonstreamdb/models.py index 7d7798a4..a4b96786 100644 --- a/db/moonstreamdb/models.py +++ b/db/moonstreamdb/models.py @@ -147,9 +147,10 @@ class EthereumLabel(Base): # type: ignore address_id = Column( Integer, ForeignKey("ethereum_addresses.id", ondelete="CASCADE"), - nullable=False, + nullable=True, index=True, ) + transaction_hash = Column(VARCHAR(256), nullable=True, index=True) label_data = Column(JSONB, nullable=True) created_at = Column( DateTime(timezone=True), server_default=utcnow(), nullable=False From b9a48b464e86f082031c4794bd344d8a2ff7c135 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 23:02:02 -0700 Subject: [PATCH 11/27] Adds foreign key constraint to ethereum_labels.transaction_hash This is something we forgot to do in the previous migration. --- ...d512b2e_support_labels_on_transactions.py} | 19 +++++++++++++------ db/moonstreamdb/models.py | 7 ++++++- 2 files changed, 19 insertions(+), 7 deletions(-) rename db/alembic/versions/{39a5bb9cffcf_support_labels_on_transactions.py => 72f1ad512b2e_support_labels_on_transactions.py} (72%) diff --git a/db/alembic/versions/39a5bb9cffcf_support_labels_on_transactions.py b/db/alembic/versions/72f1ad512b2e_support_labels_on_transactions.py similarity index 72% rename from db/alembic/versions/39a5bb9cffcf_support_labels_on_transactions.py rename to db/alembic/versions/72f1ad512b2e_support_labels_on_transactions.py index 6688a96e..3f3b2041 100644 --- a/db/alembic/versions/39a5bb9cffcf_support_labels_on_transactions.py +++ b/db/alembic/versions/72f1ad512b2e_support_labels_on_transactions.py @@ -1,8 +1,8 @@ """Support labels on transactions -Revision ID: 39a5bb9cffcf +Revision ID: 72f1ad512b2e Revises: ecb7817db377 -Create Date: 2021-09-02 22:54:39.055168 +Create Date: 2021-09-02 22:59:46.408595 """ from alembic import op @@ -10,7 +10,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = "39a5bb9cffcf" +revision = "72f1ad512b2e" down_revision = "ecb7817db377" branch_labels = None depends_on = None @@ -31,8 +31,13 @@ def upgrade(): ["transaction_hash"], unique=False, ) - op.create_unique_constraint( - op.f("uq_opensea_crawler_state_id"), "opensea_crawler_state", ["id"] + op.create_foreign_key( + op.f("fk_ethereum_labels_transaction_hash_ethereum_transactions"), + "ethereum_labels", + "ethereum_transactions", + ["transaction_hash"], + ["hash"], + ondelete="CASCADE", ) # ### end Alembic commands ### @@ -40,7 +45,9 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.drop_constraint( - op.f("uq_opensea_crawler_state_id"), "opensea_crawler_state", type_="unique" + op.f("fk_ethereum_labels_transaction_hash_ethereum_transactions"), + "ethereum_labels", + type_="foreignkey", ) op.drop_index( op.f("ix_ethereum_labels_transaction_hash"), table_name="ethereum_labels" diff --git a/db/moonstreamdb/models.py b/db/moonstreamdb/models.py index a4b96786..40f349ca 100644 --- a/db/moonstreamdb/models.py +++ b/db/moonstreamdb/models.py @@ -150,7 +150,12 @@ class EthereumLabel(Base): # type: ignore nullable=True, index=True, ) - transaction_hash = Column(VARCHAR(256), nullable=True, index=True) + transaction_hash = Column( + VARCHAR(256), + ForeignKey("ethereum_transactions.hash", ondelete="CASCADE"), + nullable=True, + index=True, + ) label_data = Column(JSONB, nullable=True) created_at = Column( DateTime(timezone=True), server_default=utcnow(), nullable=False From a4fff6498f66789934d4af26fd42a8cfb6e5eed5 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 23:04:23 -0700 Subject: [PATCH 12/27] Bumped moonstreamdb version to 0.1.0 --- db/moonstreamdb/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/moonstreamdb/version.py b/db/moonstreamdb/version.py index d02ca139..bf9ce2fa 100644 --- a/db/moonstreamdb/version.py +++ b/db/moonstreamdb/version.py @@ -2,4 +2,4 @@ Moonstream database version. """ -MOONSTREAMDB_VERSION = "0.0.3" +MOONSTREAMDB_VERSION = "0.1.0" From 9d5421f8667814a09b12b0a7652702c462870b26 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 23:05:06 -0700 Subject: [PATCH 13/27] Bumped moonstreamdb dependency in mooncrawl --- crawlers/mooncrawl/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawlers/mooncrawl/setup.py b/crawlers/mooncrawl/setup.py index dbd15182..8e7eda49 100644 --- a/crawlers/mooncrawl/setup.py +++ b/crawlers/mooncrawl/setup.py @@ -32,7 +32,7 @@ setup( package_data={"mooncrawl": ["py.typed"]}, zip_safe=False, install_requires=[ - "moonstreamdb @ git+https://git@github.com/bugout-dev/moonstream.git@39d2b8e36a49958a9ae085ec2cc1be3fc732b9d0#egg=moonstreamdb&subdirectory=db", + "moonstreamdb @ git+https://git@github.com/bugout-dev/moonstream.git@a4fff6498f66789934d4af26fd42a8cfb6e5eed5#egg=moonstreamdb&subdirectory=db", "humbug", "python-dateutil", "requests", From dbb5a98b68015e64b3b996d9cccab7ac96952db4 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Thu, 2 Sep 2021 23:05:51 -0700 Subject: [PATCH 14/27] Updated moonstreamdb version in API requirements.txt --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 4cc7c386..2ed443b6 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -12,7 +12,7 @@ h11==0.12.0 idna==3.2 jmespath==0.10.0 humbug==0.2.7 --e git+https://git@github.com/bugout-dev/moonstream.git@94135b054cabb9dc11b0a2406431619279979469#egg=moonstreamdb&subdirectory=db +-e git+https://git@github.com/bugout-dev/moonstream.git@a4fff6498f66789934d4af26fd42a8cfb6e5eed5#egg=moonstreamdb&subdirectory=db mypy==0.910 mypy-extensions==0.4.3 pathspec==0.9.0 From 60aa3006edfa8d030ecc53794ea82367b2963446 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Fri, 3 Sep 2021 01:02:09 -0700 Subject: [PATCH 15/27] [WIP] Adding "erc721" label to unlabelled NFT contracts --- crawlers/mooncrawl/mooncrawl/ethereum.py | 1 - crawlers/mooncrawl/mooncrawl/nft/cli.py | 65 ++++++-- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 161 +++++++++++++++++-- 3 files changed, 203 insertions(+), 24 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/ethereum.py b/crawlers/mooncrawl/mooncrawl/ethereum.py index 723cc717..672bfe61 100644 --- a/crawlers/mooncrawl/mooncrawl/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/ethereum.py @@ -1,7 +1,6 @@ from concurrent.futures import Future, ProcessPoolExecutor, wait from dataclasses import dataclass from datetime import datetime -from os import close from typing import Any, Callable, Dict, List, Optional, Tuple, Union from sqlalchemy import desc, Column diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index 35d256e7..e000a47e 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -7,10 +7,11 @@ import os import sys from typing import cast +from moonstreamdb.db import yield_db_session_ctx from web3 import Web3 from ..ethereum import connect -from .ethereum import summary as ethereum_summary +from .ethereum import summary as ethereum_summary, add_labels from ..publish import publish_json from ..settings import MOONSTREAM_IPC_PATH from ..version import MOONCRAWL_VERSION @@ -32,7 +33,13 @@ def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: return connect(web3_connection_string) -def ethereum_handler(args: argparse.Namespace) -> None: +def ethereum_label_handler(args: argparse.Namespace) -> None: + web3_client = web3_client_from_cli_or_env(args) + with yield_db_session_ctx() as db_session: + add_labels(web3_client, db_session, args.start, args.end, args.address) + + +def ethereum_summary_handler(args: argparse.Namespace) -> None: web3_client = web3_client_from_cli_or_env(args) result = ethereum_summary(web3_client, args.start, args.end, args.address) @@ -67,34 +74,73 @@ def main() -> None: "ethereum", description="Collect information about NFTs from Ethereum blockchains", ) - parser_ethereum.add_argument( + parser_ethereum.set_defaults(func=lambda _: parser_ethereum.print_help()) + subparsers_ethereum = parser_ethereum.add_subparsers() + + parser_ethereum_label = subparsers_ethereum.add_parser( + "label", + description="Label addresses and transactions in databse using crawled NFT transfer information", + ) + parser_ethereum_label.add_argument( "-s", "--start", type=int, default=None, help="Starting block number (inclusive if block available)", ) - parser_ethereum.add_argument( + parser_ethereum_label.add_argument( "-e", "--end", type=int, default=None, help="Ending block number (inclusive if block available)", ) - parser_ethereum.add_argument( + parser_ethereum_label.add_argument( "-a", "--address", type=str, default=None, help="(Optional) NFT contract address that you want to limit the crawl to, e.g. 0x06012c8cf97BEaD5deAe237070F9587f8E7A266d for CryptoKitties.", ) - parser_ethereum.add_argument( + parser_ethereum_label.add_argument( "--web3", type=str, default=None, help="(Optional) Web3 connection string. If not provided, uses the value specified by MOONSTREAM_IPC_PATH environment variable.", ) - parser_ethereum.add_argument( + parser_ethereum_label.set_defaults(func=ethereum_label_handler) + + parser_ethereum_summary = subparsers_ethereum.add_parser( + "summary", description="Generate Ethereum NFT summary" + ) + parser_ethereum_summary.add_argument( + "-s", + "--start", + type=int, + default=None, + help="Starting block number (inclusive if block available)", + ) + parser_ethereum_summary.add_argument( + "-e", + "--end", + type=int, + default=None, + help="Ending block number (inclusive if block available)", + ) + parser_ethereum_summary.add_argument( + "-a", + "--address", + type=str, + default=None, + help="(Optional) NFT contract address that you want to limit the crawl to, e.g. 0x06012c8cf97BEaD5deAe237070F9587f8E7A266d for CryptoKitties.", + ) + parser_ethereum_summary.add_argument( + "--web3", + type=str, + default=None, + help="(Optional) Web3 connection string. If not provided, uses the value specified by MOONSTREAM_IPC_PATH environment variable.", + ) + parser_ethereum_summary.add_argument( "--humbug", default=None, help=( @@ -103,15 +149,14 @@ def main() -> None: "MOONSTREAM_HUMBUG_TOKEN environment variable)" ), ) - parser_ethereum.add_argument( + parser_ethereum_summary.add_argument( "-o", "--outfile", type=argparse.FileType("w"), default=sys.stdout, help="Optional file to write output to. By default, prints to stdout.", ) - - parser_ethereum.set_defaults(func=ethereum_handler) + parser_ethereum_summary.set_defaults(func=ethereum_summary_handler) args = parser.parse_args() args.func(args) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 1b240087..7e829657 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -1,15 +1,26 @@ from dataclasses import dataclass, asdict from datetime import datetime -from typing import Any, cast, Dict, List, Optional +from mooncrawl.nft.cli import web3_client_from_cli_or_env from hexbytes.main import HexBytes +from typing import Any, cast, Dict, List, Optional, Set, Tuple from eth_typing.encoding import HexStr +from moonstreamdb.models import EthereumAddress, EthereumLabel, EthereumTransaction +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.orm import Session from tqdm import tqdm from web3 import Web3 from web3.types import FilterParams, LogReceipt from web3._utils.events import get_event_data +# Default length (in blocks) of an Ethereum NFT crawl. +DEFAULT_CRAWL_LENGTH = 100 + +NFT_LABEL = "erc721" +MINT_LABEL = "nft_mint" +TRANSFER_LABEL = "nft_transfer" + # First abi is for old NFT's like crypto kitties # The erc721 standart requieres that Transfer event is indexed for all arguments # That is how we get distinguished from erc20 transfer events @@ -186,25 +197,149 @@ def get_nft_transfers( return nft_transfers +def get_block_bounds( + w3: Web3, from_block: Optional[int] = None, to_block: Optional[int] = None +) -> Tuple[int, int]: + """ + Returns starting and ending blocks for an "nft ethereum" crawl subject to the following rules: + 1. Neither start nor end can be None. + 2. If both from_block and to_block are None, then start = end - DEFAULT_CRAWL_LENGTH + 1 + """ + end = to_block + if end is None: + end = w3.eth.get_block_number() + + start = from_block + if start is None: + start = end - DEFAULT_CRAWL_LENGTH + 1 + + return start, end + + +def ensure_addresses(db_session: Session, addresses: Set[str]) -> Dict[str, int]: + """ + Ensures that the given addresses are registered in the ethereum_addresses table of the given + moonstreamdb database connection. Returns a mapping from the addresses to the ids of their + corresponding row in the ethereum_addresses table. + """ + if len(addresses) == 0: + return {} + + # SQLAlchemy reference: + # https://docs.sqlalchemy.org/en/14/orm/persistence_techniques.html#using-postgresql-on-conflict-with-returning-to-return-upserted-orm-objects + stmt = ( + insert(EthereumAddress) + .values([{"address": address} for address in addresses]) + .on_conflict_do_nothing(index_elements=[EthereumAddress.address]) + ) + + try: + db_session.execute(stmt) + except Exception: + db_session.rollback() + raise + + rows = ( + db_session.query(EthereumAddress) + .filter(EthereumAddress.address.in_(addresses)) + .all() + ) + address_ids = {address.address: address.id for address in rows} + return address_ids + + +def add_labels( + w3: Web3, + db_session: Session, + from_block: Optional[int] = None, + to_block: Optional[int] = None, + address: Optional[str] = None, + batch_size: int = 100, +) -> None: + """ + Crawls blocks between from_block and to_block checking for NFT mints and transfers. + + For each mint/transfer, if the contract address involved in the operation has not already been + added to the ethereum_addresses table, this method adds it and labels the address with the NFT + collection metadata. + + It also adds mint/transfer labels to each (transaction, contract address) pair describing the + NFT operation they represent. + + ## NFT collection metadata labels + + Label has type "erc721". + + Label data: {"name": "", "symbol": "", "totalSupply": ""} + + ## Mint and transfer labels + Adds labels to the database for each transaction that involved an NFT transfer. Adds the contract + address in the address_id column of ethereum_labels. + + + Labels (transaction, contract address) pair as: + - "nft_mint" if the transaction minted a token on the NFT contract + - "nft_transfer" if the transaction transferred a token on the NFT contract + + Label data will always be of the form: {"token_id": ""} + + Arguments: + - w3: Web3 client + - db_session: Connection to Postgres database with moonstreamdb schema + - from_block and to_block: Blocks to crawl + - address: Optional contract address representing an NFT collection to restrict the crawl to + - batch_size: Number of mint/transfer transactions to label at a time (per database transaction) + """ + assert batch_size > 0, f"Batch size must be positive (received {batch_size})" + + start, end = get_block_bounds(w3, from_block, to_block) + transfers = get_nft_transfers(w3, start, end, address) + + batch_start = 0 + batch_end = batch_size + + address_ids: Dict[str, int] = {} + + with tqdm(total=len(transfers)) as pbar: + while batch_start < batch_end: + job = transfers[batch_start:batch_end] + contract_addresses = {transfer.contract_address for transfer in job} + updated_address_ids = ensure_addresses(db_session, contract_addresses) + for address, address_id in updated_address_ids.items(): + address_ids[address] = address_id + + labelled_address_ids = ( + db_session.query(EthereumLabel) + .filter(EthereumLabel.label == NFT_LABEL) + .filter(EthereumLabel.address_id.in_(address_ids.values())) + .all() + ) + unlabelled_address_ids = [ + address_id + for address_id in address_ids.values() + if address_id not in labelled_address_ids + ] + # TODO(yhtyyar): Continue + + # Update batch at end of iteration + pbar.update(batch_end - batch_start) + batch_start = batch_end + batch_end = min(batch_end + batch_size, len(transfers)) + + def summary( w3: Web3, from_block: Optional[int] = None, to_block: Optional[int] = None, address: Optional[str] = None, ) -> Dict[str, Any]: - if to_block is None: - to_block = w3.eth.get_block_number() - - # By default, let us summarize 100 blocks worth of NFT transfers - if from_block is None: - from_block = to_block - 99 - - start_block = w3.eth.get_block(from_block) + start, end = get_block_bounds(w3, from_block, to_block) + start_block = w3.eth.get_block(start) start_time = datetime.utcfromtimestamp(start_block.timestamp).isoformat() - end_block = w3.eth.get_block(to_block) + end_block = w3.eth.get_block(end) end_time = datetime.utcfromtimestamp(end_block.timestamp).isoformat() - transfers = get_nft_transfers(w3, from_block, to_block, address) + transfers = get_nft_transfers(w3, start, end, address) num_mints = sum(transfer.is_mint for transfer in transfers) result = { @@ -215,8 +350,8 @@ def summary( "include_end": True, }, "blocks": { - "start": from_block, - "end": to_block, + "start": start, + "end": end, }, "num_transfers": len(transfers), "num_mints": num_mints, From 7031991661cf996f21d47f8e2dd3b804ede9be02 Mon Sep 17 00:00:00 2001 From: yhtiyar Date: Fri, 3 Sep 2021 20:10:35 +0300 Subject: [PATCH 16/27] added erc721 token --- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 52 ++++++++++++++++---- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 7e829657..0c255cab 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -1,6 +1,5 @@ from dataclasses import dataclass, asdict from datetime import datetime -from mooncrawl.nft.cli import web3_client_from_cli_or_env from hexbytes.main import HexBytes from typing import Any, cast, Dict, List, Optional, Set, Tuple @@ -248,6 +247,34 @@ def ensure_addresses(db_session: Session, addresses: Set[str]) -> Dict[str, int] return address_ids +def label_erc721_addresses( + w3: Web3, db_session: Session, address_ids: List[Tuple[str, int]] +): + labels: List[EthereumLabel] = [] + for address, id in address_ids: + try: + contract_info = get_erc721_contract_info(w3, address) + labels.append( + EthereumLabel( + address_id=id, + label=NFT_LABEL, + label_data={ + "name": contract_info.name, + "symbol": contract_info.symbol, + "totalSupply": contract_info.total_supply, + }, + ) + ) + except: + print(f"Failed to get metadata of contract {address}") + try: + db_session.bulk_save_objects(labels) + db_session.commit() + except Exception as e: + db_session.rollback() + print(f"Failed to save labels to db:\n{e}") + + def add_labels( w3: Web3, db_session: Session, @@ -308,18 +335,23 @@ def add_labels( for address, address_id in updated_address_ids.items(): address_ids[address] = address_id - labelled_address_ids = ( - db_session.query(EthereumLabel) - .filter(EthereumLabel.label == NFT_LABEL) - .filter(EthereumLabel.address_id.in_(address_ids.values())) - .all() - ) + labelled_address_ids = [ + label.address_id + for label in ( + db_session.query(EthereumLabel) + .filter(EthereumLabel.label == NFT_LABEL) + .filter(EthereumLabel.address_id.in_(address_ids.values())) + .all() + ) + ] unlabelled_address_ids = [ - address_id - for address_id in address_ids.values() + (address, address_id) + for address, address_id in address_ids.items() if address_id not in labelled_address_ids ] - # TODO(yhtyyar): Continue + + # Adding 'erc721' labels + label_erc721_addresses(w3, db_session, unlabelled_address_ids) # Update batch at end of iteration pbar.update(batch_end - batch_start) From d2ff8d46eebe0f683625d8864eecab152840d8ac Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Fri, 3 Sep 2021 10:50:13 -0700 Subject: [PATCH 17/27] [WIP] nft_mint and nft_transfer labels Not yet tested. --- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 93 ++++++++++++++++++-- 1 file changed, 84 insertions(+), 9 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 0c255cab..e163e0d4 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, asdict from datetime import datetime +import logging from hexbytes.main import HexBytes from typing import Any, cast, Dict, List, Optional, Set, Tuple @@ -12,7 +13,6 @@ from web3 import Web3 from web3.types import FilterParams, LogReceipt from web3._utils.events import get_event_data - # Default length (in blocks) of an Ethereum NFT crawl. DEFAULT_CRAWL_LENGTH = 100 @@ -20,6 +20,10 @@ NFT_LABEL = "erc721" MINT_LABEL = "nft_mint" TRANSFER_LABEL = "nft_transfer" + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + # First abi is for old NFT's like crypto kitties # The erc721 standart requieres that Transfer event is indexed for all arguments # That is how we get distinguished from erc20 transfer events @@ -220,6 +224,8 @@ def ensure_addresses(db_session: Session, addresses: Set[str]) -> Dict[str, int] Ensures that the given addresses are registered in the ethereum_addresses table of the given moonstreamdb database connection. Returns a mapping from the addresses to the ids of their corresponding row in the ethereum_addresses table. + + Returns address_ids for *every* address, not just the new ones. """ if len(addresses) == 0: return {} @@ -249,7 +255,7 @@ def ensure_addresses(db_session: Session, addresses: Set[str]) -> Dict[str, int] def label_erc721_addresses( w3: Web3, db_session: Session, address_ids: List[Tuple[str, int]] -): +) -> None: labels: List[EthereumLabel] = [] for address, id in address_ids: try: @@ -266,13 +272,69 @@ def label_erc721_addresses( ) ) except: - print(f"Failed to get metadata of contract {address}") + logger.error(f"Failed to get metadata of contract {address}") try: db_session.bulk_save_objects(labels) db_session.commit() except Exception as e: db_session.rollback() - print(f"Failed to save labels to db:\n{e}") + logger.error(f"Failed to save labels to db:\n{e}") + + +def label_key(label: EthereumLabel) -> Tuple[str, int, str, str, str]: + return ( + label.transaction_hash, + label.address_id, + label.label_data["tokenId"].astext(), + label.label_data["from"].astext(), + label.label_data["to"].astext(), + ) + + +def label_transfers( + db_session: Session, transfers: List[NFTTransfer], address_ids: Dict[str, int] +) -> None: + """ + Adds "nft_mint" or "nft_transfer" to the (transaction, address) pair represented by each of the + given NFTTransfer objects. + """ + transaction_hashes: List[str] = [] + labels: List[EthereumLabel] = [] + for transfer in transfers: + transaction_hash = transfer.transfer_tx + transaction_hashes.append(transaction_hash) + address_id = address_ids.get(transfer.contract_address) + label = MINT_LABEL if transfer.is_mint else TRANSFER_LABEL + row = EthereumLabel( + address_id=address_id, + transaction_hash=transaction_hash, + label=label, + label_data={ + "tokenId": transfer.tokenId, + "from": transfer.transfer_from, + "to": transfer.transfer_to, + }, + ) + labels.append(row) + + existing_labels = ( + db_session.query(EthereumLabel) + .filter(EthereumLabel.address_id.in_(address_ids.values())) + .filter(EthereumLabel.transaction_hash.in_(transaction_hashes)) + ).all() + existing_label_keys = {label_key(label) for label in existing_labels} + + new_labels = [ + label for label in labels if label_key(label) not in existing_label_keys + ] + + try: + db_session.bulk_save_objects(new_labels) + db_session.commit() + except Exception as e: + db_session.rollback() + logger.error("Could not write transfer/mint labels to database") + logger.error(e) def add_labels( @@ -297,7 +359,12 @@ def add_labels( Label has type "erc721". - Label data: {"name": "", "symbol": "", "totalSupply": ""} + Label data: + { + "name": "", + "symbol": "", + "totalSupply": "" + } ## Mint and transfer labels Adds labels to the database for each transaction that involved an NFT transfer. Adds the contract @@ -308,7 +375,12 @@ def add_labels( - "nft_mint" if the transaction minted a token on the NFT contract - "nft_transfer" if the transaction transferred a token on the NFT contract - Label data will always be of the form: {"token_id": ""} + Label data will always be of the form: + { + "tokenId": "", + "from": "", + "to": "" + } Arguments: - w3: Web3 client @@ -350,9 +422,12 @@ def add_labels( if address_id not in labelled_address_ids ] - # Adding 'erc721' labels + # Add 'erc721' labels label_erc721_addresses(w3, db_session, unlabelled_address_ids) + # Add mint/transfer labels to (transaction, contract_address) pairs + label_transfers(db_session, job, updated_address_ids) + # Update batch at end of iteration pbar.update(batch_end - batch_start) batch_start = batch_end @@ -367,9 +442,9 @@ def summary( ) -> Dict[str, Any]: start, end = get_block_bounds(w3, from_block, to_block) start_block = w3.eth.get_block(start) - start_time = datetime.utcfromtimestamp(start_block.timestamp).isoformat() + start_time = datetime.utcfromtimestamp(start_block["timestamp"]).isoformat() end_block = w3.eth.get_block(end) - end_time = datetime.utcfromtimestamp(end_block.timestamp).isoformat() + end_time = datetime.utcfromtimestamp(end_block["timestamp"]).isoformat() transfers = get_nft_transfers(w3, start, end, address) num_mints = sum(transfer.is_mint for transfer in transfers) From 506bad759db90a5cfc1250ecbfbff4c7b6b34502 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Fri, 3 Sep 2021 13:30:13 -0700 Subject: [PATCH 18/27] Fixed some bugs in ethcrawler and nft ethereum crawler Tested that the "nft ethereum label" is working correctly. --- crawlers/mooncrawl/mooncrawl/ethcrawler.py | 2 +- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 47 +++++++++++++------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/ethcrawler.py b/crawlers/mooncrawl/mooncrawl/ethcrawler.py index 9e6d564b..6728e912 100644 --- a/crawlers/mooncrawl/mooncrawl/ethcrawler.py +++ b/crawlers/mooncrawl/mooncrawl/ethcrawler.py @@ -92,7 +92,7 @@ def ethcrawler_blocks_sync_handler(args: argparse.Namespace) -> None: while True: bottom_block_number, top_block_number = get_latest_blocks(args.confirmations) if bottom_block_number is None: - raise ValueError("Variable bottom_block_number can't be None") + bottom_block_number = 0 bottom_block_number = max(bottom_block_number + 1, starting_block) if bottom_block_number >= top_block_number: print( diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index e163e0d4..fd676f83 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -97,22 +97,37 @@ erc721_functions_abi = [ @dataclass -class NFT_contract: +class NFTContract: address: str - name: str - symbol: str - total_supply: str + name: Optional[str] = None + symbol: Optional[str] = None + total_supply: Optional[str] = None -def get_erc721_contract_info(w3: Web3, address: str) -> NFT_contract: +def get_erc721_contract_info(w3: Web3, address: str) -> NFTContract: contract = w3.eth.contract( address=w3.toChecksumAddress(address), abi=erc721_functions_abi ) - return NFT_contract( - address=address, - name=contract.functions.name().call(), - symbol=contract.functions.symbol().call(), - total_supply=contract.functions.totalSupply().call(), + name: Optional[str] = None + try: + name = contract.functions.name().call() + except: + logger.error(f"Could not get name for potential NFT contract: {address}") + + symbol: Optional[str] = None + try: + symbol = contract.functions.symbol().call() + except: + logger.error(f"Could not get symbol for potential NFT contract: {address}") + + totalSupply: Optional[str] = None + try: + totalSupply = contract.functions.totalSupply().call() + except: + logger.error(f"Could not get totalSupply for potential NFT contract: {address}") + + return NFTContract( + address=address, name=name, symbol=symbol, total_supply=totalSupply ) @@ -240,6 +255,7 @@ def ensure_addresses(db_session: Session, addresses: Set[str]) -> Dict[str, int] try: db_session.execute(stmt) + db_session.commit() except Exception: db_session.rollback() raise @@ -271,8 +287,9 @@ def label_erc721_addresses( }, ) ) - except: + except Exception as e: logger.error(f"Failed to get metadata of contract {address}") + logger.error(e) try: db_session.bulk_save_objects(labels) db_session.commit() @@ -281,13 +298,13 @@ def label_erc721_addresses( logger.error(f"Failed to save labels to db:\n{e}") -def label_key(label: EthereumLabel) -> Tuple[str, int, str, str, str]: +def label_key(label: EthereumLabel) -> Tuple[str, int, int, str, str]: return ( label.transaction_hash, label.address_id, - label.label_data["tokenId"].astext(), - label.label_data["from"].astext(), - label.label_data["to"].astext(), + label.label_data["tokenId"], + label.label_data["from"], + label.label_data["to"], ) From d0cbff9277b31c2ebb743f38071faba7f8d951d6 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Fri, 3 Sep 2021 13:33:23 -0700 Subject: [PATCH 19/27] Superficial progress bar updates --- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 62 ++++++++++---------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index fd676f83..78fe235f 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -200,7 +200,7 @@ def get_nft_transfers( logs = w3.eth.get_logs(filter_params) nft_transfers: List[NFTTransfer] = [] - for log in tqdm(logs): + for log in tqdm(logs, desc="Crawling NFT transfers from Ethereum node"): nft_transfer = decode_nft_transfer_data(w3, log) if nft_transfer is not None: kwargs = { @@ -416,39 +416,41 @@ def add_labels( address_ids: Dict[str, int] = {} - with tqdm(total=len(transfers)) as pbar: - while batch_start < batch_end: - job = transfers[batch_start:batch_end] - contract_addresses = {transfer.contract_address for transfer in job} - updated_address_ids = ensure_addresses(db_session, contract_addresses) - for address, address_id in updated_address_ids.items(): - address_ids[address] = address_id + pbar = tqdm(total=len(transfers)) + pbar.set_description("Processing NFT transfer") + while batch_start < batch_end: + job = transfers[batch_start:batch_end] + contract_addresses = {transfer.contract_address for transfer in job} + updated_address_ids = ensure_addresses(db_session, contract_addresses) + for address, address_id in updated_address_ids.items(): + address_ids[address] = address_id - labelled_address_ids = [ - label.address_id - for label in ( - db_session.query(EthereumLabel) - .filter(EthereumLabel.label == NFT_LABEL) - .filter(EthereumLabel.address_id.in_(address_ids.values())) - .all() - ) - ] - unlabelled_address_ids = [ - (address, address_id) - for address, address_id in address_ids.items() - if address_id not in labelled_address_ids - ] + labelled_address_ids = [ + label.address_id + for label in ( + db_session.query(EthereumLabel) + .filter(EthereumLabel.label == NFT_LABEL) + .filter(EthereumLabel.address_id.in_(address_ids.values())) + .all() + ) + ] + unlabelled_address_ids = [ + (address, address_id) + for address, address_id in address_ids.items() + if address_id not in labelled_address_ids + ] - # Add 'erc721' labels - label_erc721_addresses(w3, db_session, unlabelled_address_ids) + # Add 'erc721' labels + label_erc721_addresses(w3, db_session, unlabelled_address_ids) - # Add mint/transfer labels to (transaction, contract_address) pairs - label_transfers(db_session, job, updated_address_ids) + # Add mint/transfer labels to (transaction, contract_address) pairs + label_transfers(db_session, job, updated_address_ids) - # Update batch at end of iteration - pbar.update(batch_end - batch_start) - batch_start = batch_end - batch_end = min(batch_end + batch_size, len(transfers)) + # Update batch at end of iteration + pbar.update(batch_end - batch_start) + batch_start = batch_end + batch_end = min(batch_end + batch_size, len(transfers)) + pbar.close() def summary( From 19c0dfeeb39e1c3691aa9a9e708970f3c5fe7132 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Fri, 3 Sep 2021 14:10:13 -0700 Subject: [PATCH 20/27] [WIP] Working prototypes for "nft ethereum label" and "nft ethereum summary" TODO: - `nft ethereum label` should have reasonable defaults for `--start` and `--end`. It should use the currenty block for `--end` and the last labelled block for `--start`? - Keep on plugging away at `nft ethereum summary` as per: https://github.com/bugout-dev/moonstream/issues/223 --- crawlers/mooncrawl/mooncrawl/nft/cli.py | 73 +++++++--------- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 87 ++++++++++++++------ 2 files changed, 91 insertions(+), 69 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index e000a47e..6fe0224d 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -2,11 +2,11 @@ A command line tool to crawl information about NFTs from various sources. """ import argparse -import json -import os +from datetime import datetime, timedelta, timezone import sys from typing import cast +import dateutil.parser from moonstreamdb.db import yield_db_session_ctx from web3 import Web3 @@ -40,32 +40,34 @@ def ethereum_label_handler(args: argparse.Namespace) -> None: def ethereum_summary_handler(args: argparse.Namespace) -> None: - web3_client = web3_client_from_cli_or_env(args) - result = ethereum_summary(web3_client, args.start, args.end, args.address) + with yield_db_session_ctx() as db_session: + result = ethereum_summary(db_session, args.start, args.end) - start_time = result.get("date_range", {}).get("start_time", "UNKNOWN") - start_block = result.get("blocks", {}).get("start", -1) - end_time = result.get("date_range", {}).get("end_time", "UNKNOWN") - end_block = result.get("blocks", {}).get("end", -1) + # start_time = result.get("date_range", {}).get("start_time", "UNKNOWN") + # start_block = result.get("blocks", {}).get("start", -1) + # end_time = result.get("date_range", {}).get("end_time", "UNKNOWN") + # end_block = result.get("blocks", {}).get("end", -1) - humbug_token = args.humbug - if humbug_token is None: - humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN") - if humbug_token: - title = f"NFT activity on the Ethereum blockchain: {start_time} (block {start_block}) to {end_time} (block {end_block})" - publish_json( - "nft_ethereum", - humbug_token, - title, - result, - tags=[f"crawler_version:{MOONCRAWL_VERSION}"], - wait=False, - ) - with args.outfile as ofp: - json.dump(result, ofp) + # humbug_token = args.humbug + # if humbug_token is None: + # humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN") + # if humbug_token: + # title = f"NFT activity on the Ethereum blockchain: {start_time} (block {start_block}) to {end_time} (block {end_block})" + # publish_json( + # "nft_ethereum", + # humbug_token, + # title, + # result, + # tags=[f"crawler_version:{MOONCRAWL_VERSION}"], + # wait=False, + # ) + # with args.outfile as ofp: + # json.dump(result, ofp) def main() -> None: + time_now = datetime.now(timezone.utc) + parser = argparse.ArgumentParser(description="Moonstream NFT crawlers") parser.set_defaults(func=lambda _: parser.print_help()) subcommands = parser.add_subparsers(description="Subcommands") @@ -116,29 +118,16 @@ def main() -> None: parser_ethereum_summary.add_argument( "-s", "--start", - type=int, - default=None, - help="Starting block number (inclusive if block available)", + type=dateutil.parser.parse, + default=(time_now - timedelta(hours=1, minutes=0)).isoformat(), + help=f"Start time for window to calculate NFT statistics (default: {(time_now - timedelta(hours=1,minutes=0)).isoformat()})", ) parser_ethereum_summary.add_argument( "-e", "--end", - type=int, - default=None, - help="Ending block number (inclusive if block available)", - ) - parser_ethereum_summary.add_argument( - "-a", - "--address", - type=str, - default=None, - help="(Optional) NFT contract address that you want to limit the crawl to, e.g. 0x06012c8cf97BEaD5deAe237070F9587f8E7A266d for CryptoKitties.", - ) - parser_ethereum_summary.add_argument( - "--web3", - type=str, - default=None, - help="(Optional) Web3 connection string. If not provided, uses the value specified by MOONSTREAM_IPC_PATH environment variable.", + type=dateutil.parser.parse, + default=time_now.isoformat(), + help=f"End time for window to calculate NFT statistics (default: {time_now.isoformat()})", ) parser_ethereum_summary.add_argument( "--humbug", diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 78fe235f..dfe0bac1 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -5,7 +5,13 @@ from hexbytes.main import HexBytes from typing import Any, cast, Dict, List, Optional, Set, Tuple from eth_typing.encoding import HexStr -from moonstreamdb.models import EthereumAddress, EthereumLabel, EthereumTransaction +from moonstreamdb.models import ( + EthereumAddress, + EthereumBlock, + EthereumLabel, + EthereumTransaction, +) +from sqlalchemy import and_ from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import Session from tqdm import tqdm @@ -13,6 +19,8 @@ from web3 import Web3 from web3.types import FilterParams, LogReceipt from web3._utils.events import get_event_data +from ..ethereum import DateRange + # Default length (in blocks) of an Ethereum NFT crawl. DEFAULT_CRAWL_LENGTH = 100 @@ -454,33 +462,58 @@ def add_labels( def summary( - w3: Web3, - from_block: Optional[int] = None, - to_block: Optional[int] = None, - address: Optional[str] = None, + db_session: Session, + start_time: datetime, + end_time: datetime, ) -> Dict[str, Any]: - start, end = get_block_bounds(w3, from_block, to_block) - start_block = w3.eth.get_block(start) - start_time = datetime.utcfromtimestamp(start_block["timestamp"]).isoformat() - end_block = w3.eth.get_block(end) - end_time = datetime.utcfromtimestamp(end_block["timestamp"]).isoformat() + start_timestamp = int(start_time.timestamp()) + end_timestamp = int(end_time.timestamp()) - transfers = get_nft_transfers(w3, start, end, address) - num_mints = sum(transfer.is_mint for transfer in transfers) + base_query = ( + db_session.query( + EthereumLabel.label, + EthereumLabel.label_data, + EthereumLabel.address_id, + EthereumTransaction.hash, + EthereumTransaction.value, + EthereumBlock.block_number, + EthereumBlock.timestamp, + ) + .join( + EthereumTransaction, + EthereumLabel.transaction_hash == EthereumTransaction.hash, + ) + .join( + EthereumBlock, + EthereumTransaction.block_number == EthereumBlock.block_number, + ) + .filter( + and_( + EthereumBlock.timestamp >= start_timestamp, + EthereumBlock.timestamp <= end_timestamp, + ) + ) + .filter(EthereumLabel.label.in_([MINT_LABEL, TRANSFER_LABEL])) + ) - result = { - "date_range": { - "start_time": start_time, - "include_start": True, - "end_time": end_time, - "include_end": True, - }, - "blocks": { - "start": start, - "end": end, - }, - "num_transfers": len(transfers), - "num_mints": num_mints, - } + print(base_query.distinct(EthereumTransaction.hash).count()) - return result + return {} + + +# result = { +# "date_range": { +# "start_time": start_time, +# "include_start": True, +# "end_time": end_time, +# "include_end": True, +# }, +# "blocks": { +# "start": start, +# "end": end, +# }, +# "num_transfers": len(transfers), +# "num_mints": num_mints, +# } + +# return result From 5cc7def647d894829f1b5642f2565051cd94997a Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Sun, 5 Sep 2021 21:14:01 -0700 Subject: [PATCH 21/27] Working summaries involving nft mints, transfers, value This is currently only done for the past hour. Next step is to modify the CLI to do it for: 1. Last hour 2. Last 24 hours 3. Last week --- crawlers/mooncrawl/mooncrawl/nft/cli.py | 10 +- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 124 +++++++++++++------ 2 files changed, 86 insertions(+), 48 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index 6fe0224d..514035b6 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -3,6 +3,7 @@ A command line tool to crawl information about NFTs from various sources. """ import argparse from datetime import datetime, timedelta, timezone +import json import sys from typing import cast @@ -43,11 +44,6 @@ def ethereum_summary_handler(args: argparse.Namespace) -> None: with yield_db_session_ctx() as db_session: result = ethereum_summary(db_session, args.start, args.end) - # start_time = result.get("date_range", {}).get("start_time", "UNKNOWN") - # start_block = result.get("blocks", {}).get("start", -1) - # end_time = result.get("date_range", {}).get("end_time", "UNKNOWN") - # end_block = result.get("blocks", {}).get("end", -1) - # humbug_token = args.humbug # if humbug_token is None: # humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN") @@ -61,8 +57,8 @@ def ethereum_summary_handler(args: argparse.Namespace) -> None: # tags=[f"crawler_version:{MOONCRAWL_VERSION}"], # wait=False, # ) - # with args.outfile as ofp: - # json.dump(result, ofp) + with args.outfile as ofp: + json.dump(result, ofp) def main() -> None: diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index dfe0bac1..1a77bf59 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -11,9 +11,9 @@ from moonstreamdb.models import ( EthereumLabel, EthereumTransaction, ) -from sqlalchemy import and_ +from sqlalchemy import and_, func from sqlalchemy.dialects.postgresql import insert -from sqlalchemy.orm import Session +from sqlalchemy.orm import Session, Query from tqdm import tqdm from web3 import Web3 from web3.types import FilterParams, LogReceipt @@ -28,7 +28,6 @@ NFT_LABEL = "erc721" MINT_LABEL = "nft_mint" TRANSFER_LABEL = "nft_transfer" - logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -469,51 +468,94 @@ def summary( start_timestamp = int(start_time.timestamp()) end_timestamp = int(end_time.timestamp()) - base_query = ( - db_session.query( - EthereumLabel.label, - EthereumLabel.label_data, - EthereumLabel.address_id, - EthereumTransaction.hash, - EthereumTransaction.value, - EthereumBlock.block_number, - EthereumBlock.timestamp, - ) - .join( - EthereumTransaction, - EthereumLabel.transaction_hash == EthereumTransaction.hash, - ) + time_filter = and_( + EthereumBlock.timestamp >= start_timestamp, + EthereumBlock.timestamp <= end_timestamp, + ) + + transactions_query = ( + db_session.query(EthereumTransaction) .join( EthereumBlock, EthereumTransaction.block_number == EthereumBlock.block_number, ) - .filter( - and_( - EthereumBlock.timestamp >= start_timestamp, - EthereumBlock.timestamp <= end_timestamp, - ) - ) - .filter(EthereumLabel.label.in_([MINT_LABEL, TRANSFER_LABEL])) + .filter(time_filter) ) - print(base_query.distinct(EthereumTransaction.hash).count()) + def nft_query(label: str) -> Query: + query = ( + db_session.query( + EthereumLabel.label, + EthereumLabel.label_data, + EthereumLabel.address_id, + EthereumTransaction.hash, + EthereumTransaction.value, + EthereumBlock.block_number, + EthereumBlock.timestamp, + ) + .join( + EthereumTransaction, + EthereumLabel.transaction_hash == EthereumTransaction.hash, + ) + .join( + EthereumBlock, + EthereumTransaction.block_number == EthereumBlock.block_number, + ) + .filter(time_filter) + .filter(EthereumLabel.label == label) + ) + return query - return {} + transfer_query = nft_query(TRANSFER_LABEL) + mint_query = nft_query(MINT_LABEL) + blocks_result: Dict[str, int] = {} + min_block = ( + db_session.query(func.min(EthereumBlock.block_number)) + .filter(time_filter) + .scalar() + ) + max_block = ( + db_session.query(func.max(EthereumBlock.block_number)) + .filter(time_filter) + .scalar() + ) + if min_block is not None: + blocks_result["start"] = min_block + if max_block is not None: + blocks_result["end"] = max_block -# result = { -# "date_range": { -# "start_time": start_time, -# "include_start": True, -# "end_time": end_time, -# "include_end": True, -# }, -# "blocks": { -# "start": start, -# "end": end, -# }, -# "num_transfers": len(transfers), -# "num_mints": num_mints, -# } + num_transactions = transactions_query.distinct(EthereumTransaction.hash).count() + num_transfers = transfer_query.distinct(EthereumTransaction.hash).count() -# return result + total_value = db_session.query( + func.sum(transactions_query.subquery().c.value) + ).scalar() + transfer_value = db_session.query( + func.sum(transfer_query.subquery().c.value) + ).scalar() + + num_minted = mint_query.distinct(EthereumTransaction.hash).count() + + result = { + "date_range": { + "start_time": start_time.isoformat(), + "include_start": True, + "end_time": end_time.isoformat(), + "include_end": True, + }, + "blocks": blocks_result, + "transactions": { + "total": f"{num_transactions}", + "amount": f"{num_transfers}", + "percentage": f"{num_transfers/num_transactions * 100}", + }, + "value": { + "total": f"{total_value}", + "amount": f"{transfer_value}", + "percentage": f"{transfer_value/total_value * 100}", + }, + "mints": num_minted, + } + + return result From 1013d5cb255bc74cac34f366e837facf9cebd9dc Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Sun, 5 Sep 2021 22:59:10 -0700 Subject: [PATCH 22/27] First version of "nft ethereum summary" crawler --- crawlers/mooncrawl/mooncrawl/nft/cli.py | 9 +---- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 35 ++++++++++++++++++-- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index 514035b6..da18850b 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -42,7 +42,7 @@ def ethereum_label_handler(args: argparse.Namespace) -> None: def ethereum_summary_handler(args: argparse.Namespace) -> None: with yield_db_session_ctx() as db_session: - result = ethereum_summary(db_session, args.start, args.end) + result = ethereum_summary(db_session, args.end) # humbug_token = args.humbug # if humbug_token is None: @@ -111,13 +111,6 @@ def main() -> None: parser_ethereum_summary = subparsers_ethereum.add_parser( "summary", description="Generate Ethereum NFT summary" ) - parser_ethereum_summary.add_argument( - "-s", - "--start", - type=dateutil.parser.parse, - default=(time_now - timedelta(hours=1, minutes=0)).isoformat(), - help=f"Start time for window to calculate NFT statistics (default: {(time_now - timedelta(hours=1,minutes=0)).isoformat()})", - ) parser_ethereum_summary.add_argument( "-e", "--end", diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 1a77bf59..ff4b50fa 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, asdict -from datetime import datetime +from datetime import datetime, time, timedelta import logging from hexbytes.main import HexBytes from typing import Any, cast, Dict, List, Optional, Set, Tuple @@ -460,11 +460,14 @@ def add_labels( pbar.close() -def summary( +def time_bounded_summary( db_session: Session, start_time: datetime, end_time: datetime, ) -> Dict[str, Any]: + """ + Produces a summary of Ethereum NFT activity between the given start_time and end_time (inclusive). + """ start_timestamp = int(start_time.timestamp()) end_timestamp = int(end_time.timestamp()) @@ -559,3 +562,31 @@ def summary( } return result + + +def summary(db_session: Session, end_time: datetime) -> Dict[str, Any]: + """ + Produces a summary of all Ethereum NFT activity: + 1. From 1 hour before end_time to end_time + 2. From 1 day before end_time to end_time + 3. From 1 week before end_time to end_time + """ + start_times = { + "hour": end_time - timedelta(hours=1), + "day": end_time - timedelta(days=1), + "week": end_time - timedelta(weeks=1), + } + summaries = { + period: time_bounded_summary(db_session, start_time, end_time) + for period, start_time in start_times.items() + } + + def aggregate_summary(key: str) -> Dict[str, Any]: + return {period: summary.get(key) for period, summary in summaries.items()} + + return { + "crawled_at": end_time.isoformat(), + "blocks": aggregate_summary("blocks"), + "transactions": aggregate_summary("transactions"), + "value": aggregate_summary("value"), + } From 1f771a650e699d8cce08ddce790c2b7672d135fb Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Mon, 6 Sep 2021 00:01:18 -0700 Subject: [PATCH 23/27] "mints" now points to an object with an "amounts" key Makes it consistent with the other statistics. --- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index ff4b50fa..9c1d1957 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -558,7 +558,9 @@ def time_bounded_summary( "amount": f"{transfer_value}", "percentage": f"{transfer_value/total_value * 100}", }, - "mints": num_minted, + "mints": { + "amount": num_minted, + }, } return result @@ -589,4 +591,5 @@ def summary(db_session: Session, end_time: datetime) -> Dict[str, Any]: "blocks": aggregate_summary("blocks"), "transactions": aggregate_summary("transactions"), "value": aggregate_summary("value"), + "mints": aggregate_summary("mints"), } From 36233447c5cc34170204e6eb3203887901d1295b Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Tue, 7 Sep 2021 04:08:22 -0700 Subject: [PATCH 24/27] [WIP] Trying to get number of owners calculation to work DISTINCT ON query seems to be returning incorrect results. --- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 96 +++++++++++++++----- 1 file changed, 71 insertions(+), 25 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 9c1d1957..0ddb2212 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, asdict -from datetime import datetime, time, timedelta +from datetime import datetime, timedelta +import json import logging from hexbytes.main import HexBytes from typing import Any, cast, Dict, List, Optional, Set, Tuple @@ -11,7 +12,7 @@ from moonstreamdb.models import ( EthereumLabel, EthereumTransaction, ) -from sqlalchemy import and_, func +from sqlalchemy import and_, func, text from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import Session, Query from tqdm import tqdm @@ -19,8 +20,6 @@ from web3 import Web3 from web3.types import FilterParams, LogReceipt from web3._utils.events import get_event_data -from ..ethereum import DateRange - # Default length (in blocks) of an Ethereum NFT crawl. DEFAULT_CRAWL_LENGTH = 100 @@ -31,6 +30,26 @@ TRANSFER_LABEL = "nft_transfer" logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +# Summary keys +SUMMARY_KEY_BLOCKS = "blocks" +SUMMARY_KEY_NUM_TRANSACTIONS = "num_transactions" +SUMMARY_KEY_TOTAL_VALUE = "total_value" +SUMMARY_KEY_NFT_TRANSFERS = "nft_transfers" +SUMMARY_KEY_NFT_TRANSFER_VALUE = "nft_transfer_value" +SUMMARY_KEY_NFT_MINTS = "nft_mints" +SUMMARY_KEY_NFT_OWNERS = "nft_owners" + +SUMMARY_KEYS = [ + SUMMARY_KEY_BLOCKS, + SUMMARY_KEY_NUM_TRANSACTIONS, + SUMMARY_KEY_TOTAL_VALUE, + SUMMARY_KEY_NFT_TRANSFERS, + SUMMARY_KEY_NFT_TRANSFER_VALUE, + SUMMARY_KEY_NFT_MINTS, + SUMMARY_KEY_NFT_OWNERS, +] + + # First abi is for old NFT's like crypto kitties # The erc721 standart requieres that Transfer event is indexed for all arguments # That is how we get distinguished from erc20 transfer events @@ -512,6 +531,36 @@ def time_bounded_summary( transfer_query = nft_query(TRANSFER_LABEL) mint_query = nft_query(MINT_LABEL) + current_owner_query = ( + db_session.query( + EthereumLabel.address_id.label("address_id"), + EthereumLabel.label_data["to"].astext.label("owner_address"), + EthereumLabel.label_data["tokenId"].astext.label("token_id"), + EthereumTransaction.block_number.label("block_number"), + EthereumTransaction.transaction_index.label("transaction_index"), + EthereumTransaction.value.label("transfer_value"), + ) + .join( + EthereumTransaction, + EthereumLabel.transaction_hash == EthereumTransaction.hash, + ) + .join( + EthereumBlock, + EthereumTransaction.block_number == EthereumBlock.block_number, + ) + .filter(time_filter) + .filter(EthereumLabel.label == TRANSFER_LABEL) + .order_by( + # Without "owner_address" and "transfer_value" as sort keys, the final distinct query + # does not seem to be deterministic. + # Maybe relevant Stackoverflow post: https://stackoverflow.com/a/59410440 + text( + "address_id, token_id, block_number desc, transaction_index desc, owner_address, transfer_value" + ) + ) + .distinct("address_id", "token_id") + ) + blocks_result: Dict[str, int] = {} min_block = ( db_session.query(func.min(EthereumBlock.block_number)) @@ -540,6 +589,12 @@ def time_bounded_summary( num_minted = mint_query.distinct(EthereumTransaction.hash).count() + num_owners = ( + db_session.query(current_owner_query.subquery()) + .distinct(text("owner_address")) + .count() + ) + result = { "date_range": { "start_time": start_time.isoformat(), @@ -547,20 +602,13 @@ def time_bounded_summary( "end_time": end_time.isoformat(), "include_end": True, }, - "blocks": blocks_result, - "transactions": { - "total": f"{num_transactions}", - "amount": f"{num_transfers}", - "percentage": f"{num_transfers/num_transactions * 100}", - }, - "value": { - "total": f"{total_value}", - "amount": f"{transfer_value}", - "percentage": f"{transfer_value/total_value * 100}", - }, - "mints": { - "amount": num_minted, - }, + SUMMARY_KEY_BLOCKS: blocks_result, + SUMMARY_KEY_NUM_TRANSACTIONS: f"{num_transactions}", + SUMMARY_KEY_TOTAL_VALUE: f"{total_value}", + SUMMARY_KEY_NFT_TRANSFERS: f"{num_transfers}", + SUMMARY_KEY_NFT_TRANSFER_VALUE: f"{transfer_value}", + SUMMARY_KEY_NFT_MINTS: f"{num_minted}", + SUMMARY_KEY_NFT_OWNERS: f"{num_owners}", } return result @@ -574,7 +622,7 @@ def summary(db_session: Session, end_time: datetime) -> Dict[str, Any]: 3. From 1 week before end_time to end_time """ start_times = { - "hour": end_time - timedelta(hours=1), + "hour": end_time, "day": end_time - timedelta(days=1), "week": end_time - timedelta(weeks=1), } @@ -586,10 +634,8 @@ def summary(db_session: Session, end_time: datetime) -> Dict[str, Any]: def aggregate_summary(key: str) -> Dict[str, Any]: return {period: summary.get(key) for period, summary in summaries.items()} - return { - "crawled_at": end_time.isoformat(), - "blocks": aggregate_summary("blocks"), - "transactions": aggregate_summary("transactions"), - "value": aggregate_summary("value"), - "mints": aggregate_summary("mints"), + result = { + summary_key: aggregate_summary(summary_key) for summary_key in SUMMARY_KEYS } + result["crawled_at"] = end_time.isoformat() + return result From 8f541f84e3ad98c5cd97f70968abd48265b36936 Mon Sep 17 00:00:00 2001 From: Neeraj Kashyap Date: Tue, 7 Sep 2021 04:48:06 -0700 Subject: [PATCH 25/27] NFT purchaser and minter statistics --- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 80 ++++++++++++-------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 0ddb2212..e52ae60f 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -37,7 +37,8 @@ SUMMARY_KEY_TOTAL_VALUE = "total_value" SUMMARY_KEY_NFT_TRANSFERS = "nft_transfers" SUMMARY_KEY_NFT_TRANSFER_VALUE = "nft_transfer_value" SUMMARY_KEY_NFT_MINTS = "nft_mints" -SUMMARY_KEY_NFT_OWNERS = "nft_owners" +SUMMARY_KEY_NFT_PURCHASERS = "nft_owners" +SUMMARY_KEY_NFT_MINTERS = "nft_minters" SUMMARY_KEYS = [ SUMMARY_KEY_BLOCKS, @@ -46,7 +47,8 @@ SUMMARY_KEYS = [ SUMMARY_KEY_NFT_TRANSFERS, SUMMARY_KEY_NFT_TRANSFER_VALUE, SUMMARY_KEY_NFT_MINTS, - SUMMARY_KEY_NFT_OWNERS, + SUMMARY_KEY_NFT_PURCHASERS, + SUMMARY_KEY_NFT_MINTERS, ] @@ -531,35 +533,40 @@ def time_bounded_summary( transfer_query = nft_query(TRANSFER_LABEL) mint_query = nft_query(MINT_LABEL) - current_owner_query = ( - db_session.query( - EthereumLabel.address_id.label("address_id"), - EthereumLabel.label_data["to"].astext.label("owner_address"), - EthereumLabel.label_data["tokenId"].astext.label("token_id"), - EthereumTransaction.block_number.label("block_number"), - EthereumTransaction.transaction_index.label("transaction_index"), - EthereumTransaction.value.label("transfer_value"), - ) - .join( - EthereumTransaction, - EthereumLabel.transaction_hash == EthereumTransaction.hash, - ) - .join( - EthereumBlock, - EthereumTransaction.block_number == EthereumBlock.block_number, - ) - .filter(time_filter) - .filter(EthereumLabel.label == TRANSFER_LABEL) - .order_by( - # Without "owner_address" and "transfer_value" as sort keys, the final distinct query - # does not seem to be deterministic. - # Maybe relevant Stackoverflow post: https://stackoverflow.com/a/59410440 - text( - "address_id, token_id, block_number desc, transaction_index desc, owner_address, transfer_value" + def holder_query(label: str) -> Query: + query = ( + db_session.query( + EthereumLabel.address_id.label("address_id"), + EthereumLabel.label_data["to"].astext.label("owner_address"), + EthereumLabel.label_data["tokenId"].astext.label("token_id"), + EthereumTransaction.block_number.label("block_number"), + EthereumTransaction.transaction_index.label("transaction_index"), + EthereumTransaction.value.label("transfer_value"), ) + .join( + EthereumTransaction, + EthereumLabel.transaction_hash == EthereumTransaction.hash, + ) + .join( + EthereumBlock, + EthereumTransaction.block_number == EthereumBlock.block_number, + ) + .filter(EthereumLabel.label == label) + .filter(time_filter) + .order_by( + # Without "transfer_value" and "owner_address" as sort keys, the final distinct query + # does not seem to be deterministic. + # Maybe relevant Stackoverflow post: https://stackoverflow.com/a/59410440 + text( + "address_id, token_id, block_number desc, transaction_index desc, transfer_value, owner_address" + ) + ) + .distinct("address_id", "token_id") ) - .distinct("address_id", "token_id") - ) + return query + + purchaser_query = holder_query(TRANSFER_LABEL) + minter_query = holder_query(MINT_LABEL) blocks_result: Dict[str, int] = {} min_block = ( @@ -589,8 +596,14 @@ def time_bounded_summary( num_minted = mint_query.distinct(EthereumTransaction.hash).count() - num_owners = ( - db_session.query(current_owner_query.subquery()) + num_purchasers = ( + db_session.query(purchaser_query.subquery()) + .distinct(text("owner_address")) + .count() + ) + + num_minters = ( + db_session.query(minter_query.subquery()) .distinct(text("owner_address")) .count() ) @@ -608,7 +621,8 @@ def time_bounded_summary( SUMMARY_KEY_NFT_TRANSFERS: f"{num_transfers}", SUMMARY_KEY_NFT_TRANSFER_VALUE: f"{transfer_value}", SUMMARY_KEY_NFT_MINTS: f"{num_minted}", - SUMMARY_KEY_NFT_OWNERS: f"{num_owners}", + SUMMARY_KEY_NFT_PURCHASERS: f"{num_purchasers}", + SUMMARY_KEY_NFT_MINTERS: f"{num_minters}", } return result @@ -622,7 +636,7 @@ def summary(db_session: Session, end_time: datetime) -> Dict[str, Any]: 3. From 1 week before end_time to end_time """ start_times = { - "hour": end_time, + "hour": end_time - timedelta(hours=1), "day": end_time - timedelta(days=1), "week": end_time - timedelta(weeks=1), } From e294cfa926bd8aa8f56ce3a10967be8efe19dafb Mon Sep 17 00:00:00 2001 From: yhtiyar Date: Tue, 7 Sep 2021 17:40:59 +0300 Subject: [PATCH 26/27] now, logs querries to node is done with batches --- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 32 +++++++++++--------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 0ddb2212..3ac5ba1b 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -220,13 +220,13 @@ def get_nft_transfers( if to_block is not None: filter_params["toBlock"] = to_block - if contract_address is not None: filter_params["address"] = w3.toChecksumAddress(contract_address) logs = w3.eth.get_logs(filter_params) + nft_transfers: List[NFTTransfer] = [] - for log in tqdm(logs, desc="Crawling NFT transfers from Ethereum node"): + for log in tqdm(logs, desc=f"Processing logs for blocks {from_block}-{to_block}"): nft_transfer = decode_nft_transfer_data(w3, log) if nft_transfer is not None: kwargs = { @@ -385,8 +385,8 @@ def add_labels( db_session: Session, from_block: Optional[int] = None, to_block: Optional[int] = None, - address: Optional[str] = None, - batch_size: int = 100, + contract_address: Optional[str] = None, + batch_size: int = 50, ) -> None: """ Crawls blocks between from_block and to_block checking for NFT mints and transfers. @@ -435,17 +435,21 @@ def add_labels( assert batch_size > 0, f"Batch size must be positive (received {batch_size})" start, end = get_block_bounds(w3, from_block, to_block) - transfers = get_nft_transfers(w3, start, end, address) - batch_start = 0 - batch_end = batch_size + batch_start = start + batch_end = start + batch_size - 1 address_ids: Dict[str, int] = {} - pbar = tqdm(total=len(transfers)) - pbar.set_description("Processing NFT transfer") - while batch_start < batch_end: - job = transfers[batch_start:batch_end] + pbar = tqdm(total=(end - start + 1)) + pbar.set_description("Processing blocks") + while batch_start <= batch_end: + job = get_nft_transfers( + w3, + from_block=batch_start, + to_block=batch_end, + contract_address=contract_address, + ) contract_addresses = {transfer.contract_address for transfer in job} updated_address_ids = ensure_addresses(db_session, contract_addresses) for address, address_id in updated_address_ids.items(): @@ -473,9 +477,9 @@ def add_labels( label_transfers(db_session, job, updated_address_ids) # Update batch at end of iteration - pbar.update(batch_end - batch_start) - batch_start = batch_end - batch_end = min(batch_end + batch_size, len(transfers)) + pbar.update(batch_end - batch_start + 1) + batch_start = batch_end + 1 + batch_end = min(batch_end + batch_size, end) pbar.close() From e51e008164b2dc36fdb0dd6b6f6edad57b8d065b Mon Sep 17 00:00:00 2001 From: yhtiyar Date: Wed, 8 Sep 2021 17:00:02 +0300 Subject: [PATCH 27/27] added sync command, that will continiously work --- crawlers/mooncrawl/mooncrawl/nft/cli.py | 84 +++++++++++++++++++- crawlers/mooncrawl/mooncrawl/nft/ethereum.py | 10 +-- 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/nft/cli.py b/crawlers/mooncrawl/mooncrawl/nft/cli.py index da18850b..7063861a 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/cli.py +++ b/crawlers/mooncrawl/mooncrawl/nft/cli.py @@ -4,11 +4,15 @@ A command line tool to crawl information about NFTs from various sources. import argparse from datetime import datetime, timedelta, timezone import json +import os import sys -from typing import cast +import time +from typing import Any, Dict, cast import dateutil.parser from moonstreamdb.db import yield_db_session_ctx +from moonstreamdb.models import EthereumBlock +from sqlalchemy.orm.session import Session from web3 import Web3 from ..ethereum import connect @@ -34,12 +38,77 @@ def web3_client_from_cli_or_env(args: argparse.Namespace) -> Web3: return connect(web3_connection_string) +def get_latest_block_from_db(db_session: Session): + return ( + db_session.query(EthereumBlock) + .order_by(EthereumBlock.timestamp.desc()) + .limit(1) + .one() + ) + + +def ethereum_sync_handler(args: argparse.Namespace) -> None: + web3_client = web3_client_from_cli_or_env(args) + humbug_token = os.environ.get("MOONSTREAM_HUMBUG_TOKEN") + if humbug_token is None: + raise ValueError("MOONSTREAM_HUMBUG_TOKEN env variable is not set") + + with yield_db_session_ctx() as db_session: + start = args.start + if start is None: + time_now = datetime.now(timezone.utc) + week_ago = time_now - timedelta(weeks=1) + start = ( + db_session.query(EthereumBlock) + .filter(EthereumBlock.timestamp >= week_ago.timestamp()) + .order_by(EthereumBlock.timestamp.asc()) + .limit(1) + .one() + ).block_number + + latest_block = get_latest_block_from_db(db_session) + end = latest_block.block_number + assert ( + start <= end + ), f"Start block {start} is greater than latest_block {end} in db" + + while True: + print(f"Labeling blocks {start}-{end}") + add_labels(web3_client, db_session, start, end) + + end_time = datetime.fromtimestamp(latest_block.timestamp, timezone.utc) + print(f"Creating summary with endtime={end_time}") + result = ethereum_summary(db_session, end_time) + push_summary(result, end, humbug_token) + + sleep_time = 60 * 60 + print(f"Going to sleep for:{sleep_time}s") + time.sleep(sleep_time) + + start = end + 1 + latest_block = get_latest_block_from_db(db_session) + end = latest_block.block_number + + def ethereum_label_handler(args: argparse.Namespace) -> None: web3_client = web3_client_from_cli_or_env(args) with yield_db_session_ctx() as db_session: add_labels(web3_client, db_session, args.start, args.end, args.address) +def push_summary(result: Dict[str, Any], end_block_no: int, humbug_token: str): + + title = f"NFT activity on the Ethereum blockchain: end time: {result['crawled_at'] } (block {end_block_no})" + publish_json( + "nft_ethereum", + humbug_token, + title, + result, + tags=[f"crawler_version:{MOONCRAWL_VERSION}", f"end_block:{end_block_no}"], + wait=False, + ) + + def ethereum_summary_handler(args: argparse.Namespace) -> None: with yield_db_session_ctx() as db_session: result = ethereum_summary(db_session, args.end) @@ -136,6 +205,19 @@ def main() -> None: ) parser_ethereum_summary.set_defaults(func=ethereum_summary_handler) + parser_ethereum_sync = subparsers_ethereum.add_parser( + "sync", + description="Label addresses and transactions in databse using crawled NFT transfer information, sync mode", + ) + parser_ethereum_sync.add_argument( + "-s", + "--start", + type=int, + required=False, + help="Starting block number (inclusive if block available)", + ) + parser_ethereum_sync.set_defaults(func=ethereum_sync_handler) + args = parser.parse_args() args.func(args) diff --git a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py index 8647d2e9..9f84a6ee 100644 --- a/crawlers/mooncrawl/mooncrawl/nft/ethereum.py +++ b/crawlers/mooncrawl/mooncrawl/nft/ethereum.py @@ -388,7 +388,7 @@ def add_labels( from_block: Optional[int] = None, to_block: Optional[int] = None, contract_address: Optional[str] = None, - batch_size: int = 50, + batch_size: int = 100, ) -> None: """ Crawls blocks between from_block and to_block checking for NFT mints and transfers. @@ -439,12 +439,12 @@ def add_labels( start, end = get_block_bounds(w3, from_block, to_block) batch_start = start - batch_end = start + batch_size - 1 + batch_end = min(start + batch_size - 1, end) address_ids: Dict[str, int] = {} pbar = tqdm(total=(end - start + 1)) - pbar.set_description("Processing blocks") + pbar.set_description(f"Labeling blocks {start}-{end}") while batch_start <= batch_end: job = get_nft_transfers( w3, @@ -598,7 +598,7 @@ def time_bounded_summary( func.sum(transfer_query.subquery().c.value) ).scalar() - num_minted = mint_query.distinct(EthereumTransaction.hash).count() + num_minted = mint_query.count() num_purchasers = ( db_session.query(purchaser_query.subquery()) @@ -652,7 +652,7 @@ def summary(db_session: Session, end_time: datetime) -> Dict[str, Any]: def aggregate_summary(key: str) -> Dict[str, Any]: return {period: summary.get(key) for period, summary in summaries.items()} - result = { + result: Dict[str, Any] = { summary_key: aggregate_summary(summary_key) for summary_key in SUMMARY_KEYS } result["crawled_at"] = end_time.isoformat()