kopia lustrzana https://github.com/bugout-dev/moonstream
229 wiersze
11 KiB
Python
229 wiersze
11 KiB
Python
"""
|
|
Functions to access various data in the NFTs dataset.
|
|
"""
|
|
import sqlite3
|
|
from typing import List, Optional, Tuple
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import scipy.sparse
|
|
from tqdm import tqdm
|
|
|
|
from .datastore import event_tables, EventType
|
|
|
|
# TODO(zomglings): Make it so that table names are parametrized by importable variables. The way
|
|
# things are now, we have to be very careful if we ever rename a table in our dataset. We should
|
|
# also propagate the name change here.
|
|
NFTS = "nfts"
|
|
MINTS = event_tables[EventType.MINT]
|
|
TRANSFERS = event_tables[EventType.TRANSFER]
|
|
CURRENT_OWNERS = "current_owners"
|
|
CURRENT_MARKET_VALUES = "current_market_values"
|
|
TRANSFER_STATISTICS_BY_ADDRESS = "transfer_statistics_by_address"
|
|
MINT_HOLDING_TIMES = "mint_holding_times"
|
|
TRANSFER_HOLDING_TIMES = "transfer_holding_times"
|
|
OWNERSHIP_TRANSITIONS = "ownership_transitions"
|
|
|
|
AVAILABLE_DATAFRAMES = {
|
|
NFTS: """Describes the NFT contracts represented in this dataset, with a name and symbol if they were available at time of crawl.
|
|
|
|
Columns:
|
|
1. address: The Ethereum address of the NFT contract.
|
|
2. name: The name of the collection of NFTs that the contract represents.
|
|
3. symbol: The symbol of the collection of NFTs that the contract represents.
|
|
""",
|
|
MINTS: """All token mint events crawled in this dataset.
|
|
|
|
Columns:
|
|
1. event_id: A unique event ID associated with the event.
|
|
2. transaction_hash: The hash of the transaction which triggered the event.
|
|
3. block_number: The transaction block in which the transaction was mined.
|
|
4. nft_address: The address of the NFT collection containing the minted token.
|
|
5. token_id: The ID of the token that was minted.
|
|
6. from_address: The "from" address for the transfer event. For a mint, this should be the 0 address: 0x0000000000000000000000000000000000000000.
|
|
7. to_address: The "to" address for the transfer event. This represents the owner of the freshly minted token.
|
|
8. transaction_value: The amount of WEI that were sent with the transaction in which the token was minted.
|
|
9. timestamp: The time at which the mint operation was mined into the blockchain (this is the timestamp for the mined block).
|
|
""",
|
|
TRANSFERS: """All token transfer events crawled in this dataset.
|
|
|
|
Columns:
|
|
1. event_id: A unique event ID associated with the event.
|
|
2. transaction_hash: The hash of the transaction which triggered the event.
|
|
3. block_number: The transaction block in which the transaction was mined.
|
|
4. nft_address: The address of the NFT collection containing the transferred token.
|
|
5. token_id: The ID of the token that was transferred.
|
|
6. from_address: The "from" address for the transfer event. This is the address that owned the token at the *start* of the transfer.
|
|
7. to_address: The "to" address for the transfer event. This is the address that owned the token at the *end* of the transfer.
|
|
8. transaction_value: The amount of WEI that were sent with the transaction in which the token was transferred.
|
|
9. timestamp: The time at which the transfer operation was mined into the blockchain (this is the timestamp for the mined block).
|
|
""",
|
|
CURRENT_OWNERS: f"""This table is derived from the {NFTS}, {MINTS}, and {TRANSFERS} tables. It represents the current owner of each token in the dataset.
|
|
|
|
Columns:
|
|
1. nft_address: The address of the NFT collection containing the token whose ownership we are denoting.
|
|
2. token_id: The ID of the token (inside the collection) whose ownership we are denoting.
|
|
3. owner: The address that owned the token at the time of construction of this dataset.
|
|
""",
|
|
CURRENT_MARKET_VALUES: f"""This table is derived from the {NFTS}, {MINTS}, and {TRANSFERS} tables. It represents the current market value (in WEI) of each token in the dataset.
|
|
|
|
Columns:
|
|
1. nft_address: The address of the NFT collection containing the token whose market value we are denoting.
|
|
2. token_id: The ID of the token (inside the collection) whose market value we are denoting.
|
|
3. market_value: The estimated market value of the token at the time of construction of this dataset.
|
|
|
|
For this dataset, we estimate the market value as the last non-zero transaction value for a transfer involving this token.
|
|
This estimate may be inaccurate for some transfers (e.g. multiple token transfers made by an escrow contract in a single transaction)
|
|
but ought to be reasonably accurate for a large majority of tokens.
|
|
""",
|
|
TRANSFER_STATISTICS_BY_ADDRESS: f"""This table is derived from the {NFTS}, {MINTS}, and {TRANSFERS} tables. For each address that participated in
|
|
at least one NFT transfer between April 1, 2021 and September 25, 2021, this table shows exactly how many NFTs that address transferred to
|
|
other addresses and how many NFT transfers that address was the recipient of.
|
|
|
|
Columns:
|
|
1. address: An Ethereum address that participated in at least one NFT transfer between April 1, 2021 and September 25, 2021.
|
|
2. transfers_out: The number of NFTs that the given address transferred to any other address between April 1, 2021 and September 25, 2021.
|
|
3. transfers_in: The number of NFTs that any other address transferred to given address between April 1, 2021 and September 25, 2021.
|
|
""",
|
|
}
|
|
|
|
|
|
AVAILABLE_MATRICES = {
|
|
OWNERSHIP_TRANSITIONS: f"""{OWNERSHIP_TRANSITIONS} is an adjacency matrix which counts the number of times that a token was transferred from a source address (indexed by the rows of the matrix) to a target address (indexed by the columns of the matrix).
|
|
|
|
These counts only include data about mints and transfers made between April 1, 2021 and September 25, 2021. We also denote the current owners of an NFT as having transitioned
|
|
the NFT from themselves back to themselves. This gives some estimate of an owner retaining the NFT in the given time period.
|
|
|
|
Load this matrix as follows:
|
|
>>> indexed_addresses, transitions = ds.load_ownership_transitions()
|
|
|
|
- "indexed_addresses" is a list denoting the address that each index (row/column) in the matrix represents.
|
|
- "transitions" is a numpy ndarray containing the matrix, with source addresses on the row axis and target addresses on the column axis.
|
|
"""
|
|
}
|
|
|
|
|
|
def explain() -> None:
|
|
"""
|
|
Explains the structure of the dataset.
|
|
"""
|
|
preamble = """
|
|
The Moonstream NFTs dataset
|
|
===========================
|
|
|
|
To load the NFTs dataset from a SQLite file, run:
|
|
>>> ds = nfts.dataset.FromSQLite(<path to sqlite database>)
|
|
|
|
This dataset consists of the following dataframes:"""
|
|
|
|
print(preamble)
|
|
for name, explanation in AVAILABLE_DATAFRAMES.items():
|
|
print(f"\nDataframe: {name}")
|
|
print(
|
|
f'Load using:\n>>> {name}_df = ds.load_dataframe(<sqlite connection or path to sqlite db>, "{name}")'
|
|
)
|
|
print("")
|
|
print(explanation)
|
|
print("- - -")
|
|
|
|
for name, explanation in AVAILABLE_MATRICES.items():
|
|
print(f"\nMatrix: {name}")
|
|
print("")
|
|
print(explanation)
|
|
print("- - -")
|
|
|
|
|
|
class FromSQLite:
|
|
def __init__(self, datafile: str) -> None:
|
|
"""
|
|
Initialize an NFTs dataset instance by connecting it to a SQLite database containing the data.
|
|
"""
|
|
self.conn = sqlite3.connect(datafile)
|
|
self.ownership_transitions: Optional[
|
|
Tuple[List[str], scipy.sparse.spmatrix]
|
|
] = None
|
|
self.ownership_transition_probabilities: Optional[
|
|
Tuple[List[str], scipy.sparse.spmatrix]
|
|
] = None
|
|
|
|
def load_dataframe(self, name: str) -> pd.DataFrame:
|
|
"""
|
|
Loads one of the available dataframes. To learn more about the available dataframes, run:
|
|
>>> nfts.dataset.explain()
|
|
"""
|
|
if name not in AVAILABLE_DATAFRAMES:
|
|
raise ValueError(
|
|
f"Invalid dataframe: {name}. Please choose from one of the available dataframes: {','.join(AVAILABLE_DATAFRAMES)}."
|
|
)
|
|
df = pd.read_sql_query(f"SELECT * FROM {name};", self.conn)
|
|
return df
|
|
|
|
def load_ownership_transitions(
|
|
self, force: bool = False
|
|
) -> Tuple[List[str], scipy.sparse.spmatrix]:
|
|
"""
|
|
Loads ownership transitions adjacency matrix from SQLite database.
|
|
|
|
To learn more about this matrix, run:
|
|
>>> nfts.dataset.explain()
|
|
"""
|
|
if self.ownership_transitions is not None and not force:
|
|
return self.ownership_transitions
|
|
cur = self.conn.cursor()
|
|
address_indexes_query = """
|
|
WITH all_addresses AS (
|
|
SELECT from_address AS address FROM ownership_transitions
|
|
UNION
|
|
SELECT to_address AS address FROM ownership_transitions
|
|
)
|
|
SELECT DISTINCT(all_addresses.address) AS address FROM all_addresses ORDER BY address ASC;
|
|
"""
|
|
addresses = [row[0] for row in cur.execute(address_indexes_query)]
|
|
num_addresses = len(addresses)
|
|
address_indexes = {address: i for i, address in enumerate(addresses)}
|
|
|
|
adjacency_matrix = scipy.sparse.dok_matrix((num_addresses, num_addresses))
|
|
adjacency_query = "SELECT from_address, to_address, num_transitions FROM ownership_transitions;"
|
|
|
|
rows = cur.execute(adjacency_query)
|
|
for from_address, to_address, num_transitions in tqdm(
|
|
rows, desc="Ownership transitions (adjacency matrix)"
|
|
):
|
|
from_index = address_indexes[from_address]
|
|
to_index = address_indexes[to_address]
|
|
adjacency_matrix[from_index, to_index] = num_transitions
|
|
|
|
self.ownership_transitions = (addresses, adjacency_matrix)
|
|
return self.ownership_transitions
|
|
|
|
def load_ownership_transition_probabilities(
|
|
self,
|
|
force: bool = False,
|
|
) -> Tuple[List[str], scipy.sparse.spmatrix]:
|
|
"""
|
|
Returns transition probabilities of ownership transitions, with each entry A_{i,j} denoting the
|
|
probability that the address represented by row i transferred and NFT to the address represented by row[j].
|
|
"""
|
|
if self.ownership_transition_probabilities is not None and not force:
|
|
return self.ownership_transition_probabilities
|
|
|
|
addresses, adjacency_matrix = self.load_ownership_transitions(force)
|
|
|
|
# Sum of the entries in each row:
|
|
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.spmatrix.sum.html#scipy.sparse.spmatrix.sum
|
|
row_sums = adjacency_matrix.sum(axis=1)
|
|
|
|
# Convert adjacency matrix to matrix of transition probabilities.
|
|
# We cannot do this by simply dividing transition_probabilites /= row_sums because that tries
|
|
# to coerce the matrix into a dense numpy ndarray and requires terabytes of memory.
|
|
transition_probabilities = adjacency_matrix.copy()
|
|
for i, j in zip(*transition_probabilities.nonzero()):
|
|
transition_probabilities[i, j] = (
|
|
transition_probabilities[i, j] / row_sums[i]
|
|
)
|
|
|
|
# Now we identify and remove burn addresses from this data.
|
|
|
|
self.ownership_transition_probabilities = (addresses, transition_probabilities)
|
|
return self.ownership_transition_probabilities
|