kopia lustrzana https://github.com/bugout-dev/moonstream
119 wiersze
5.9 KiB
Python
119 wiersze
5.9 KiB
Python
"""
|
|
Functions to access various data in the NFTs dataset.
|
|
"""
|
|
import sqlite3
|
|
from typing import Union
|
|
|
|
import pandas as pd
|
|
|
|
from .datastore import event_tables, EventType
|
|
|
|
# TODO(zomglings): Make it so that table names are parametrized by importable variables. The way
|
|
# things are now, we have to be very careful if we ever rename a table in our dataset. We should
|
|
# also propagate the name change here.
|
|
NFTS = "nfts"
|
|
MINTS = event_tables[EventType.MINT]
|
|
TRANSFERS = event_tables[EventType.TRANSFER]
|
|
CURRENT_OWNERS = "current_owners"
|
|
CURRENT_MARKET_VALUES = "current_market_values"
|
|
TRANSFER_STATISTICS_BY_ADDRESS = "transfer_statistics_by_address"
|
|
MINT_HOLDING_TIMES = "mint_holding_times"
|
|
TRANSFER_HOLDING_TIMES = "transfer_holding_times"
|
|
|
|
AVAILABLE_DATAFRAMES = {
|
|
NFTS: """Describes the NFT contracts represented in this dataset, with a name and symbol if they were available at time of crawl.
|
|
|
|
Columns:
|
|
1. address: The Ethereum address of the NFT contract.
|
|
2. name: The name of the collection of NFTs that the contract represents.
|
|
3. symbol: The symbol of the collection of NFTs that the contract represents.
|
|
""",
|
|
MINTS: """All token mint events crawled in this dataset.
|
|
|
|
Columns:
|
|
1. event_id: A unique event ID associated with the event.
|
|
2. transaction_hash: The hash of the transaction which triggered the event.
|
|
3. block_number: The transaction block in which the transaction was mined.
|
|
4. nft_address: The address of the NFT collection containing the minted token.
|
|
5. token_id: The ID of the token that was minted.
|
|
6. from_address: The "from" address for the transfer event. For a mint, this should be the 0 address: 0x0000000000000000000000000000000000000000.
|
|
7. to_address: The "to" address for the transfer event. This represents the owner of the freshly minted token.
|
|
8. transaction_value: The amount of WEI that were sent with the transaction in which the token was minted.
|
|
9. timestamp: The time at which the mint operation was mined into the blockchain (this is the timestamp for the mined block).
|
|
""",
|
|
TRANSFERS: """All token transfer events crawled in this dataset.
|
|
|
|
Columns:
|
|
1. event_id: A unique event ID associated with the event.
|
|
2. transaction_hash: The hash of the transaction which triggered the event.
|
|
3. block_number: The transaction block in which the transaction was mined.
|
|
4. nft_address: The address of the NFT collection containing the transferred token.
|
|
5. token_id: The ID of the token that was transferred.
|
|
6. from_address: The "from" address for the transfer event. This is the address that owned the token at the *start* of the transfer.
|
|
7. to_address: The "to" address for the transfer event. This is the address that owned the token at the *end* of the transfer.
|
|
8. transaction_value: The amount of WEI that were sent with the transaction in which the token was transferred.
|
|
9. timestamp: The time at which the transfer operation was mined into the blockchain (this is the timestamp for the mined block).
|
|
""",
|
|
CURRENT_OWNERS: f"""This table is derived from the {NFTS}, {MINTS}, and {TRANSFERS} tables. It represents the current owner of each token in the dataset.
|
|
|
|
Columns:
|
|
1. nft_address: The address of the NFT collection containing the token whose ownership we are denoting.
|
|
2. token_id: The ID of the token (inside the collection) whose ownership we are denoting.
|
|
3. owner: The address that owned the token at the time of construction of this dataset.
|
|
""",
|
|
CURRENT_MARKET_VALUES: f"""This table is derived from the {NFTS}, {MINTS}, and {TRANSFERS} tables. It represents the current market value (in WEI) of each token in the dataset.
|
|
|
|
Columns:
|
|
1. nft_address: The address of the NFT collection containing the token whose market value we are denoting.
|
|
2. token_id: The ID of the token (inside the collection) whose market value we are denoting.
|
|
3. market_value: The estimated market value of the token at the time of construction of this dataset.
|
|
|
|
For this dataset, we estimate the market value as the last non-zero transaction value for a transfer involving this token.
|
|
This estimate may be inaccurate for some transfers (e.g. multiple token transfers made by an escrow contract in a single transaction)
|
|
but ought to be reasonably accurate for a large majority of tokens.
|
|
""",
|
|
TRANSFER_STATISTICS_BY_ADDRESS: f"""This table is derived from the {NFTS}, {MINTS}, and {TRANSFERS} tables. For each address that participated in
|
|
at least one NFT transfer between April 1, 2021 and September 25, 2021, this table shows exactly how many NFTs that address transferred to
|
|
other addresses and how many NFT transfers that address was the recipient of.
|
|
|
|
Columns:
|
|
1. address: An Ethereum address that participated in at least one NFT transfer between April 1, 2021 and September 25, 2021.
|
|
2. transfers_out: The number of NFTs that the given address transferred to any other address between April 1, 2021 and September 25, 2021.
|
|
3. transfers_in: The number of NFTs that any other address transferred to given address between April 1, 2021 and September 25, 2021.
|
|
""",
|
|
}
|
|
|
|
|
|
def explain() -> None:
|
|
"""
|
|
Explains the structure of the dataset.
|
|
"""
|
|
preamble = """
|
|
The Moonstream NFTs dataset
|
|
===========================
|
|
|
|
This dataset consists of the following dataframes:"""
|
|
|
|
print(preamble)
|
|
for name, explanation in AVAILABLE_DATAFRAMES.items():
|
|
print(f"\nDataframe: {name}")
|
|
print(
|
|
f"Load using:\n\t{name}_df = nfts.dataset.load_dataframe(<sqlite connection or path to sqlite db>, {name})"
|
|
)
|
|
print("")
|
|
print(explanation)
|
|
print("- - -")
|
|
|
|
|
|
def load_dataframe(db: Union[str, sqlite3.Connection], name: str) -> pd.DataFrame:
|
|
"""
|
|
Loads one of the available dataframes. To learn more about the available dataframes, run:
|
|
>>> nfts.dataset.explain()
|
|
"""
|
|
if name not in AVAILABLE_DATAFRAMES:
|
|
raise ValueError(
|
|
f"Invalid dataframe: {name}. Please choose from one of the available dataframes: {','.join(AVAILABLE_DATAFRAMES)}."
|
|
)
|
|
df = pd.read_sql_table(name, db)
|
|
return df
|