diff --git a/datasets/nfts/nfts/dataset.py b/datasets/nfts/nfts/dataset.py index 16c70e4e..9062c9ca 100644 --- a/datasets/nfts/nfts/dataset.py +++ b/datasets/nfts/nfts/dataset.py @@ -2,10 +2,12 @@ Functions to access various data in the NFTs dataset. """ import sqlite3 -from typing import Any, List, Tuple +from typing import List, Optional, Tuple +import numpy as np import pandas as pd import scipy.sparse +from tqdm import tqdm from .datastore import event_tables, EventType @@ -124,7 +126,7 @@ This dataset consists of the following dataframes:""" print(explanation) print("- - -") - for name, explanation in AVAILABLE_MATRICES: + for name, explanation in AVAILABLE_MATRICES.items(): print(f"\nMatrix: {name}") print("") print(explanation) @@ -137,6 +139,12 @@ class FromSQLite: Initialize an NFTs dataset instance by connecting it to a SQLite database containing the data. """ self.conn = sqlite3.connect(datafile) + self.ownership_transitions: Optional[ + Tuple[List[str], scipy.sparse.spmatrix] + ] = None + self.ownership_transition_probabilities: Optional[ + Tuple[List[str], scipy.sparse.spmatrix] + ] = None def load_dataframe(self, name: str) -> pd.DataFrame: """ @@ -150,13 +158,17 @@ class FromSQLite: df = pd.read_sql_query(f"SELECT * FROM {name};", self.conn) return df - def load_ownership_transitions(self) -> Tuple[List[str], Any]: + def load_ownership_transitions( + self, force: bool = False + ) -> Tuple[List[str], scipy.sparse.spmatrix]: """ Loads ownership transitions adjacency matrix from SQLite database. To learn more about this matrix, run: >>> nfts.dataset.explain() """ + if self.ownership_transitions is not None and not force: + return self.ownership_transitions cur = self.conn.cursor() address_indexes_query = """ WITH all_addresses AS ( @@ -173,9 +185,44 @@ SELECT DISTINCT(all_addresses.address) AS address FROM all_addresses ORDER BY ad adjacency_matrix = scipy.sparse.dok_matrix((num_addresses, num_addresses)) adjacency_query = "SELECT from_address, to_address, num_transitions FROM ownership_transitions;" - for from_address, to_address, num_transitions in cur.execute(adjacency_query): + rows = cur.execute(adjacency_query) + for from_address, to_address, num_transitions in tqdm( + rows, desc="Ownership transitions (adjacency matrix)" + ): from_index = address_indexes[from_address] to_index = address_indexes[to_address] adjacency_matrix[from_index, to_index] = num_transitions - return addresses, adjacency_matrix + self.ownership_transitions = (addresses, adjacency_matrix) + return self.ownership_transitions + + def load_ownership_transition_probabilities( + self, + force: bool = False, + ) -> Tuple[List[str], scipy.sparse.spmatrix]: + """ + Returns transition probabilities of ownership transitions, with each entry A_{i,j} denoting the + probability that the address represented by row i transferred and NFT to the address represented by row[j]. + """ + if self.ownership_transition_probabilities is not None and not force: + return self.ownership_transition_probabilities + + addresses, adjacency_matrix = self.load_ownership_transitions(force) + + # Sum of the entries in each row: + # https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.spmatrix.sum.html#scipy.sparse.spmatrix.sum + row_sums = adjacency_matrix.sum(axis=1) + + # Convert adjacency matrix to matrix of transition probabilities. + # We cannot do this by simply dividing transition_probabilites /= row_sums because that tries + # to coerce the matrix into a dense numpy ndarray and requires terabytes of memory. + transition_probabilities = adjacency_matrix.copy() + for i, j in zip(*transition_probabilities.nonzero()): + transition_probabilities[i, j] = ( + transition_probabilities[i, j] / row_sums[i] + ) + + # Now we identify and remove burn addresses from this data. + + self.ownership_transition_probabilities = (addresses, transition_probabilities) + return self.ownership_transition_probabilities diff --git a/datasets/nfts/notebooks/.gitignore b/datasets/nfts/notebooks/.gitignore new file mode 100644 index 00000000..7897fa70 --- /dev/null +++ b/datasets/nfts/notebooks/.gitignore @@ -0,0 +1 @@ +img/ diff --git a/datasets/nfts/notebooks/nft_ownership.ipynb b/datasets/nfts/notebooks/nft_ownership.ipynb new file mode 100644 index 00000000..1f26f181 --- /dev/null +++ b/datasets/nfts/notebooks/nft_ownership.ipynb @@ -0,0 +1,1232 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e2c7afd6-752c-477a-adcc-417eefd575f1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sqlite3\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import nfts.dataset\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9f0e7f34-591b-4694-99d7-e20535afe33a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "The Moonstream NFTs dataset\n", + "===========================\n", + "\n", + "To load the NFTs dataset from a SQLite file, run:\n", + ">>> ds = nfts.dataset.FromSQLite()\n", + "\n", + "This dataset consists of the following dataframes:\n", + "\n", + "Dataframe: nfts\n", + "Load using:\n", + ">>> nfts_df = ds.load_dataframe(, \"nfts\")\n", + "\n", + "Describes the NFT contracts represented in this dataset, with a name and symbol if they were available at time of crawl.\n", + "\n", + "Columns:\n", + "1. address: The Ethereum address of the NFT contract.\n", + "2. name: The name of the collection of NFTs that the contract represents.\n", + "3. symbol: The symbol of the collection of NFTs that the contract represents.\n", + "\n", + "- - -\n", + "\n", + "Dataframe: mints\n", + "Load using:\n", + ">>> mints_df = ds.load_dataframe(, \"mints\")\n", + "\n", + "All token mint events crawled in this dataset.\n", + "\n", + "Columns:\n", + "1. event_id: A unique event ID associated with the event.\n", + "2. transaction_hash: The hash of the transaction which triggered the event.\n", + "3. block_number: The transaction block in which the transaction was mined.\n", + "4. nft_address: The address of the NFT collection containing the minted token.\n", + "5. token_id: The ID of the token that was minted.\n", + "6. from_address: The \"from\" address for the transfer event. For a mint, this should be the 0 address: 0x0000000000000000000000000000000000000000.\n", + "7. to_address: The \"to\" address for the transfer event. This represents the owner of the freshly minted token.\n", + "8. transaction_value: The amount of WEI that were sent with the transaction in which the token was minted.\n", + "9. timestamp: The time at which the mint operation was mined into the blockchain (this is the timestamp for the mined block).\n", + "\n", + "- - -\n", + "\n", + "Dataframe: transfers\n", + "Load using:\n", + ">>> transfers_df = ds.load_dataframe(, \"transfers\")\n", + "\n", + "All token transfer events crawled in this dataset.\n", + "\n", + "Columns:\n", + "1. event_id: A unique event ID associated with the event.\n", + "2. transaction_hash: The hash of the transaction which triggered the event.\n", + "3. block_number: The transaction block in which the transaction was mined.\n", + "4. nft_address: The address of the NFT collection containing the transferred token.\n", + "5. token_id: The ID of the token that was transferred.\n", + "6. from_address: The \"from\" address for the transfer event. This is the address that owned the token at the *start* of the transfer.\n", + "7. to_address: The \"to\" address for the transfer event. This is the address that owned the token at the *end* of the transfer.\n", + "8. transaction_value: The amount of WEI that were sent with the transaction in which the token was transferred.\n", + "9. timestamp: The time at which the transfer operation was mined into the blockchain (this is the timestamp for the mined block).\n", + "\n", + "- - -\n", + "\n", + "Dataframe: current_owners\n", + "Load using:\n", + ">>> current_owners_df = ds.load_dataframe(, \"current_owners\")\n", + "\n", + "This table is derived from the nfts, mints, and transfers tables. It represents the current owner of each token in the dataset.\n", + "\n", + "Columns:\n", + "1. nft_address: The address of the NFT collection containing the token whose ownership we are denoting.\n", + "2. token_id: The ID of the token (inside the collection) whose ownership we are denoting.\n", + "3. owner: The address that owned the token at the time of construction of this dataset.\n", + "\n", + "- - -\n", + "\n", + "Dataframe: current_market_values\n", + "Load using:\n", + ">>> current_market_values_df = ds.load_dataframe(, \"current_market_values\")\n", + "\n", + "This table is derived from the nfts, mints, and transfers tables. It represents the current market value (in WEI) of each token in the dataset.\n", + "\n", + "Columns:\n", + "1. nft_address: The address of the NFT collection containing the token whose market value we are denoting.\n", + "2. token_id: The ID of the token (inside the collection) whose market value we are denoting.\n", + "3. market_value: The estimated market value of the token at the time of construction of this dataset.\n", + "\n", + "For this dataset, we estimate the market value as the last non-zero transaction value for a transfer involving this token.\n", + "This estimate may be inaccurate for some transfers (e.g. multiple token transfers made by an escrow contract in a single transaction)\n", + "but ought to be reasonably accurate for a large majority of tokens.\n", + "\n", + "- - -\n", + "\n", + "Dataframe: transfer_statistics_by_address\n", + "Load using:\n", + ">>> transfer_statistics_by_address_df = ds.load_dataframe(, \"transfer_statistics_by_address\")\n", + "\n", + "This table is derived from the nfts, mints, and transfers tables. For each address that participated in\n", + "at least one NFT transfer between April 1, 2021 and September 25, 2021, this table shows exactly how many NFTs that address transferred to\n", + "other addresses and how many NFT transfers that address was the recipient of.\n", + "\n", + "Columns:\n", + "1. address: An Ethereum address that participated in at least one NFT transfer between April 1, 2021 and September 25, 2021.\n", + "2. transfers_out: The number of NFTs that the given address transferred to any other address between April 1, 2021 and September 25, 2021.\n", + "3. transfers_in: The number of NFTs that any other address transferred to given address between April 1, 2021 and September 25, 2021.\n", + "\n", + "- - -\n", + "\n", + "Matrix: ownership_transitions\n", + "\n", + "ownership_transitions is an adjacency matrix which counts the number of times that a token was transferred from a source address (indexed by the rows of the matrix) to a target address (indexed by the columns of the matrix).\n", + "\n", + "These counts only include data about mints and transfers made between April 1, 2021 and September 25, 2021. We also denote the current owners of an NFT as having transitioned\n", + "the NFT from themselves back to themselves. This gives some estimate of an owner retaining the NFT in the given time period.\n", + "\n", + "Load this matrix as follows:\n", + ">>> indexed_addresses, transitions = ds.load_ownership_transitions()\n", + "\n", + "- \"indexed_addresses\" is a list denoting the address that each index (row/column) in the matrix represents.\n", + "- \"transitions\" is a numpy ndarray containing the matrix, with source addresses on the row axis and target addresses on the column axis.\n", + "\n", + "- - -\n" + ] + } + ], + "source": [ + "nfts.dataset.explain()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b25c369a-3751-4e18-a539-f8e950982537", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Path to SQLite database containing the NFTs dataset: /home/neeraj/data/nfts/nfts.sqlite\n" + ] + } + ], + "source": [ + "DATABASE = os.path.expanduser(\"~/data/nfts/nfts.sqlite\")\n", + "print(f\"Path to SQLite database containing the NFTs dataset: {DATABASE}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b7a608aa-d304-4c53-9073-bb3d2379482c", + "metadata": {}, + "outputs": [], + "source": [ + "ds = nfts.dataset.FromSQLite(DATABASE)" + ] + }, + { + "cell_type": "markdown", + "id": "8977af98-ff38-48c9-bc3a-7a11d2b7e8fc", + "metadata": { + "tags": [] + }, + "source": [ + "### Who owns NFTs?" + ] + }, + { + "cell_type": "markdown", + "id": "17564e85-99bc-4456-8353-ef892b042921", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4793b4e8-3138-4a85-8266-09c42b29eb3e", + "metadata": {}, + "outputs": [], + "source": [ + "current_owners_df = ds.load_dataframe(\"current_owners\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a52f616b-0441-46a1-b8b3-c117464b35d8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nft_addresstoken_idowner
00x00000000000b7F8E8E8Ad148f9d53303Bfe2079600xb776cAb26B9e6Be821842DC0cc0e8217489a4581
10x00000000000b7F8E8E8Ad148f9d53303Bfe2079610x8A73024B39A4477a5Dc43fD6360e446851AD1D28
20x00000000000b7F8E8E8Ad148f9d53303Bfe20796100x5e5C817E9264B46cBBB980198684Ad9d14f3e0B4
30x00000000000b7F8E8E8Ad148f9d53303Bfe20796110x8376f63c13b99D3eedfA51ddd77Ff375279B3Ba0
40x00000000000b7F8E8E8Ad148f9d53303Bfe20796120xb5e34552F32BA9226C987769BF6555a538510BA8
\n", + "
" + ], + "text/plain": [ + " nft_address token_id \\\n", + "0 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 0 \n", + "1 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 1 \n", + "2 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 10 \n", + "3 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 11 \n", + "4 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 12 \n", + "\n", + " owner \n", + "0 0xb776cAb26B9e6Be821842DC0cc0e8217489a4581 \n", + "1 0x8A73024B39A4477a5Dc43fD6360e446851AD1D28 \n", + "2 0x5e5C817E9264B46cBBB980198684Ad9d14f3e0B4 \n", + "3 0x8376f63c13b99D3eedfA51ddd77Ff375279B3Ba0 \n", + "4 0xb5e34552F32BA9226C987769BF6555a538510BA8 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "current_owners_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5ab1dbb2-0d93-4bbd-a179-8f6735089f2b", + "metadata": {}, + "outputs": [], + "source": [ + "top_owners_df = current_owners_df.groupby([\"owner\"], as_index=False).size().rename(columns={\"size\": \"num_tokens\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "223b7d4b-0362-4ce8-8ef7-4bb7f0b88ab8", + "metadata": {}, + "outputs": [], + "source": [ + "top_owners_df.sort_values(\"num_tokens\", inplace=True, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "89964e2f-458d-4e09-8f4b-cc8984a5f55f", + "metadata": {}, + "source": [ + "#### Top 20 NFT owners" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a84b69f5-1295-4ebc-b15e-34425e6ebff9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ownernum_tokens
72720x02E4103b8A3c55AcDDF298311a9928f9Fe27822C100045
00x000000000000000000000000000000000000000083548
4702270xE052113bd7D7700d623414a0a4585BCaE754E9d551028
5620450xcDA72070E455bb31C7690a170224Ce43623d0B6f50131
3692280x96bEE49d3386d674bF4E956D9B3ce61b9540409D36751
4390860xC69b4c6fFDBaF843A0d0588c99E3C67f27069BEa32905
4272700xBa0d01220a7CeA942596123102535F800f55876332691
70x000000000000000000000000000000000000dEaD19758
2770x0008d343091EF8BD3EFA730F6aAE5A26a285C7a212137
4547050xD387A6E4e84a6C86bd90C158C6028A58CC8Ac45911497
\n", + "
" + ], + "text/plain": [ + " owner num_tokens\n", + "7272 0x02E4103b8A3c55AcDDF298311a9928f9Fe27822C 100045\n", + "0 0x0000000000000000000000000000000000000000 83548\n", + "470227 0xE052113bd7D7700d623414a0a4585BCaE754E9d5 51028\n", + "562045 0xcDA72070E455bb31C7690a170224Ce43623d0B6f 50131\n", + "369228 0x96bEE49d3386d674bF4E956D9B3ce61b9540409D 36751\n", + "439086 0xC69b4c6fFDBaF843A0d0588c99E3C67f27069BEa 32905\n", + "427270 0xBa0d01220a7CeA942596123102535F800f558763 32691\n", + "7 0x000000000000000000000000000000000000dEaD 19758\n", + "277 0x0008d343091EF8BD3EFA730F6aAE5A26a285C7a2 12137\n", + "454705 0xD387A6E4e84a6C86bd90C158C6028A58CC8Ac459 11497" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top_owners_df.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "3590f26f-d486-4477-bf1d-b849ecf0f19b", + "metadata": { + "tags": [] + }, + "source": [ + "#### NFT ownership histogram\n", + "\n", + "The following is the cumulative distribution of the number of addressses owning $n$ NFTs for each $n \\geq 1$." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "89e9fa88-8997-4e89-a1a6-5f4dc0912be0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.xlabel(\"Number of tokens owned - n\")\n", + "plt.ylabel(\"Number of addresses owning n tokens (log scale)\")\n", + "_ = plt.hist(top_owners_df[\"num_tokens\"], bins=100, log=True)\n", + "plt.savefig(\"img/tokens_owned_histogram_log.png\", transparent=True)" + ] + }, + { + "cell_type": "markdown", + "id": "e98d9d53-4068-4178-9cc8-603dc6ed824c", + "metadata": {}, + "source": [ + "The *overwhelming* number of NFT owners each only own a small number of tokens. There are very few addresses that own hundreds or even thousands of tokens.\n", + "\n", + "**Note:** This histogram has been charted on a logarithmic scale. We have done this because the true distribution of the count of number of NFTs owned by each address follows an [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). It would be difficult to visually tell apart the differences on ownership patterns over all owners if we charted this distribution using a linear scale." + ] + }, + { + "cell_type": "markdown", + "id": "11989c70-fe49-41c1-b28a-77d9b26fd465", + "metadata": {}, + "source": [ + "Any address which owns thousands of tokens is either purchasing those tokens automatically (if they exist on multiple contracts) or is financing the collections in which they own tokens. First, let us analye the ownership trends amount addresses which do not own large numbers of tokens. This will help us estimate trends in NFT ownership among non-algorithmic and non-smart contract owners.\n", + "\n", + "For this, we set a `scale_cutoff` and only consider addresses which own a number of tokens not exceeding that cutoff.\n", + "\n", + "This allows us to estimate on a linear scale, rather than a logarithmic one, how NFT ownership is distributed among human owners." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6fd8e3ba-a943-4afc-8d9f-82d0f74e2d3f", + "metadata": {}, + "outputs": [], + "source": [ + "scale_cutoff = 1500" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "93681f2b-f6a8-440f-a831-ad43f5beb946", + "metadata": {}, + "outputs": [], + "source": [ + "low_scale_owners = [num_tokens for num_tokens in top_owners_df[\"num_tokens\"] if num_tokens <= scale_cutoff]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ad904ada-cffa-4dbe-8bd8-23a3cb33af50", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.xlabel(f\"Number of tokens owned - n <= {scale_cutoff}\")\n", + "plt.ylabel(\"Number of addresses owning n tokens\")\n", + "_ = plt.hist(low_scale_owners, bins=int(scale_cutoff/5))\n", + "plt.savefig(\"img/tokens_owned_histogram_low_scale.png\", transparent=True)" + ] + }, + { + "cell_type": "markdown", + "id": "3ea46e96-4771-4ef3-ac4a-a6cb310b362c", + "metadata": {}, + "source": [ + "Even at this scale, it is more instructive to view the distribution on a logarithmic scale:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3a9415c3-a5f0-4c87-a6e7-69112c2e6c0f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.xlabel(f\"Number of tokens owned - n <= {scale_cutoff}\")\n", + "plt.ylabel(\"Number of addresses owning n tokens (log scale)\")\n", + "_ = plt.hist(low_scale_owners, bins=int(scale_cutoff/50), log=True)\n", + "plt.savefig(\"img/tokens_owned_histogram_log_low_scale.png\", transparent=True)" + ] + }, + { + "cell_type": "markdown", + "id": "1febf072-b4be-4460-8ef5-88715cb31230", + "metadata": {}, + "source": [ + "This analysis shows that the *decentralized* NFT market is indeed decentralized, with proportionally few NFTs being held by addresses which are minting and purchasing NFTs at industrial scale.\n", + "\n", + "**There are vanishingly few large scale NFT owners on the Ethereum blockchain.**\n", + "\n", + "Note that this is an analysis of addresses, not real-world entities. It is possible for a single person or organization to use a distinct Ethereum address to control each NFT they own. This would currently be difficult enough operationally that only a handful of players in the NFT market are probably doing it. Even this would yield to a further network analysis of where the *funds* for each NFT purchase were coming from." + ] + }, + { + "cell_type": "markdown", + "id": "0a597bc5-24a5-49a1-b6b5-082770d36ee4", + "metadata": {}, + "source": [ + "### The shapes of NFT collections\n", + "\n", + "NFTs are released in collections, with a single contract accounting for multiple tokens.\n", + "\n", + "Are there differences between ownership distributions of NFTs like the [Ethereum Name Service (ENS)](https://ens.domains/), which have utility beyond their artistic value, and those that do not currently have such use cases?\n", + "\n", + "One way we can answer this question is to see how much information each NFT collection gives us about individual owners of tokens in that collection. We will do this by treating each collection as a probability distribution over owners of tokens from that collection. If the collection $C$ consists of $n$ tokens and an address $A$ owns $m$ of those tokens, we will assign that address a probability of $p_A = m/n$ in the collection's associated probability distribution. Then we will calculate the entropy:\n", + "\n", + "$$H(C) = - \\sum_{A} p_A \\log(p_A).$$\n", + "\n", + "Here, the sum is over all addresses $A$ that own at least one token from $C$.\n", + "\n", + "$H(C)$ simultaneously contains information about:\n", + "1. How many tokens were issued as part of the collection $C$.\n", + "2. How evenly the tokens in $C$ are distributed over the addresses $A$ which own those tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8a08e304-b5ff-4e45-a12c-6fd562fe5255", + "metadata": {}, + "outputs": [], + "source": [ + "contract_owners_df = current_owners_df.groupby([\"nft_address\", \"owner\"], as_index=False).size().rename(columns={\"size\": \"num_tokens\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9fd82a56-fe0c-4f02-b996-c5bf0feea5e3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nft_addressownernum_tokens
00x00000000000b7F8E8E8Ad148f9d53303Bfe207960x429a635eD4DaF9529C07d5406D466B349EC343613
10x00000000000b7F8E8E8Ad148f9d53303Bfe207960x5e5C817E9264B46cBBB980198684Ad9d14f3e0B45
20x00000000000b7F8E8E8Ad148f9d53303Bfe207960x8376f63c13b99D3eedfA51ddd77Ff375279B3Ba01
30x00000000000b7F8E8E8Ad148f9d53303Bfe207960x83D7Da9E572C5ad14caAe36771022C43AF084dbF5
40x00000000000b7F8E8E8Ad148f9d53303Bfe207960x8A73024B39A4477a5Dc43fD6360e446851AD1D285
\n", + "
" + ], + "text/plain": [ + " nft_address \\\n", + "0 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 \n", + "1 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 \n", + "2 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 \n", + "3 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 \n", + "4 0x00000000000b7F8E8E8Ad148f9d53303Bfe20796 \n", + "\n", + " owner num_tokens \n", + "0 0x429a635eD4DaF9529C07d5406D466B349EC34361 3 \n", + "1 0x5e5C817E9264B46cBBB980198684Ad9d14f3e0B4 5 \n", + "2 0x8376f63c13b99D3eedfA51ddd77Ff375279B3Ba0 1 \n", + "3 0x83D7Da9E572C5ad14caAe36771022C43AF084dbF 5 \n", + "4 0x8A73024B39A4477a5Dc43fD6360e446851AD1D28 5 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "contract_owners_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7192cc40-c04f-4e91-b731-5ba6ee749fde", + "metadata": {}, + "outputs": [], + "source": [ + "contract_owners_groups = contract_owners_df.groupby([\"nft_address\"])\n", + "\n", + "entropies = {}\n", + "\n", + "for contract_address, owners_group in contract_owners_groups:\n", + " total_supply = owners_group[\"num_tokens\"].sum()\n", + " owners_group[\"p\"] = owners_group[\"num_tokens\"]/total_supply\n", + " owners_group[\"log(p)\"] = np.log2(owners_group[\"p\"])\n", + " owners_group[\"-plog(p)\"] = (-1) * owners_group[\"p\"] * owners_group[\"log(p)\"]\n", + " entropy = owners_group[\"-plog(p)\"].sum()\n", + " entropies[contract_address] = entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2dc080a0-8dd1-49ab-be2d-cc1eed5d0ff4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.xlabel(f\"Ownership entropy of NFT collection\")\n", + "plt.ylabel(\"Number of NFT collections\")\n", + "_ = plt.hist(entropies.values(), bins=80)\n", + "plt.savefig(\"img/ownership_entropy.png\", transparent=True)" + ] + }, + { + "cell_type": "markdown", + "id": "d197f6b6-65d8-4f84-9f12-9d31840adf34", + "metadata": {}, + "source": [ + "#### Collections at the extremes\n", + "\n", + "It is interesting to get a sense of what the collections look like at either extreme of this entropy spectrum." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "9e1bca5a-be9d-46c6-b21a-ae1a648b7fa7", + "metadata": {}, + "outputs": [], + "source": [ + "sorted_entropies = [it for it in entropies.items()]\n", + "sorted_entropies.sort(key=lambda it: it[1], reverse=True)\n", + "entropies_df = pd.DataFrame.from_records(sorted_entropies, columns=[\"nft_address\", \"entropy\"])" + ] + }, + { + "cell_type": "markdown", + "id": "c060fa53-245b-4774-8fba-0bb0aa2aed8d", + "metadata": {}, + "source": [ + "##### Highest entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c80abbc5-2268-47a7-b2df-f93450a4a7d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nft_addressentropy
00x57f1887a8BF19b14fC0dF6Fd9B2acc9Af147eA8513.864019
10x60F80121C31A0d46B5279700f9DF786054aa5eE513.831032
20xC36442b4a4522E871399CD717aBDD847Ab11FE8813.742724
30xabc207502EA88D9BCa29B95Cd2EeE5F0d793641813.714889
40x5537d90A4A2DC9d9b37BAb49B490cF67D4C54E9113.285761
\n", + "
" + ], + "text/plain": [ + " nft_address entropy\n", + "0 0x57f1887a8BF19b14fC0dF6Fd9B2acc9Af147eA85 13.864019\n", + "1 0x60F80121C31A0d46B5279700f9DF786054aa5eE5 13.831032\n", + "2 0xC36442b4a4522E871399CD717aBDD847Ab11FE88 13.742724\n", + "3 0xabc207502EA88D9BCa29B95Cd2EeE5F0d7936418 13.714889\n", + "4 0x5537d90A4A2DC9d9b37BAb49B490cF67D4C54E91 13.285761" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "entropies_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "e0726c04-1349-4b8d-919e-7547cfffd6e7", + "metadata": {}, + "source": [ + "[`0x57f1887a8BF19b14fC0dF6Fd9B2acc9Af147eA85`](https://etherscan.io/address/0x57f1887a8BF19b14fC0dF6Fd9B2acc9Af147eA85) is the [Ethereum Name Service](https://ens.domains/).\n", + "\n", + "[`0x60F80121C31A0d46B5279700f9DF786054aa5eE5`](https://etherscan.io/address/0x60F80121C31A0d46B5279700f9DF786054aa5eE5) is [Rarible's](https://rarible.com/) governance token ([details](https://www.notion.so/rarible/Rarible-com-FAQ-a47b276aa1994f7c8e3bc96d700717c5)). Their aidrops are the cause of this high entropy.\n", + "\n", + "[`0xC36442b4a4522E871399CD717aBDD847Ab11FE88`](https://etherscan.io/address/0xC36442b4a4522E871399CD717aBDD847Ab11FE88) is [Uniswap's](https://uniswap.org/) position NFT, representing [non-fungible liquidity positions](https://uniswap.org/blog/uniswap-v3/) on Uniswap v3.\n", + "\n", + "[`0xabc207502EA88D9BCa29B95Cd2EeE5F0d7936418`](https://etherscan.io/address/0xabc207502EA88D9BCa29B95Cd2EeE5F0d7936418) are badges for [Yield Guild Games](https://yieldguild.io/), which seem to have been airdropped to many existing NFT holders.\n", + "\n", + "[`0x5537d90A4A2DC9d9b37BAb49B490cF67D4C54E91`](https://etherscan.io/address/0x5537d90A4A2DC9d9b37BAb49B490cF67D4C54E91) is the [OneDayPunk](https://punkscape.xyz/) collection, which has gained popularity as a down-market Crypto Punks alternative.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "786574c8-4226-43a4-b3e3-81fb8667583f", + "metadata": {}, + "source": [ + "##### Zero entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "d4c9d052-f9dd-4e4f-9716-31ddd8692294", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nft_addressentropy
92870xfe00276E0A87E5e54ADD7C5FC6cdD80B363DEe040.0
92880xfe6b0dAccBAE832b0283CfBFEBe9543B6b7B10a80.0
92890xff881E3008f081707bdDA1644e6c92DB9599C1C00.0
92900xffC6c59F34Cd9f8861012FDDd0c7F1323082Ab860.0
92910xffCb352Fb3FdbEAab3F662378db28B8D151f210c0.0
\n", + "
" + ], + "text/plain": [ + " nft_address entropy\n", + "9287 0xfe00276E0A87E5e54ADD7C5FC6cdD80B363DEe04 0.0\n", + "9288 0xfe6b0dAccBAE832b0283CfBFEBe9543B6b7B10a8 0.0\n", + "9289 0xff881E3008f081707bdDA1644e6c92DB9599C1C0 0.0\n", + "9290 0xffC6c59F34Cd9f8861012FDDd0c7F1323082Ab86 0.0\n", + "9291 0xffCb352Fb3FdbEAab3F662378db28B8D151f210c 0.0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "entropies_df.tail()" + ] + }, + { + "cell_type": "markdown", + "id": "49008c09-4fec-482a-b080-8836922de57b", + "metadata": {}, + "source": [ + "[`0xfe00276E0A87E5e54ADD7C5FC6cdD80B363DEe04`](https://etherscan.io/address/0xfe00276E0A87E5e54ADD7C5FC6cdD80B363DEe04).\n", + "\n", + "[`0xfe6b0dAccBAE832b0283CfBFEBe9543B6b7B10a8`](https://etherscan.io/address/0xfe6b0dAccBAE832b0283CfBFEBe9543B6b7B10a8).\n", + "\n", + "[`0xff881E3008f081707bdDA1644e6c92DB9599C1C0`](https://etherscan.io/address/0xff881E3008f081707bdDA1644e6c92DB9599C1C0).\n", + "\n", + "[`0xffC6c59F34Cd9f8861012FDDd0c7F1323082Ab86`](https://etherscan.io/address/0xffC6c59F34Cd9f8861012FDDd0c7F1323082Ab86).\n", + "\n", + "[`0xffCb352Fb3FdbEAab3F662378db28B8D151f210c`](https://etherscan.io/address/0xffCb352Fb3FdbEAab3F662378db28B8D151f210c).\n", + "\n", + "All these projects are NFTs that did see release in the time period for which we collected data, but saw no further activity. That means that these are either failed projects or projects that have not yet done an official launch." + ] + }, + { + "cell_type": "markdown", + "id": "598be2d9-5ade-45cd-8777-70a0d61cae34", + "metadata": {}, + "source": [ + "##### Low entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "ca65053e-9cb7-4698-94b6-7da01e509bb7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nft_addressentropy
40250x08CdCF9ba0a4b5667F5A59B78B60FbEFb145e64c2.004886
40260xA4fF6019f9DBbb4bCC61Fa8Bd5C39F36ee4eB1642.003856
40270xB66c7Ca15Af1f357C57294BAf730ABc77FF949402.003756
40280x5f98B87fb68f7Bb6F3a60BD6f0917723365444C12.002227
40290x374DBF0dF7aBc89C2bA776F003E725177Cb357502.001823
\n", + "
" + ], + "text/plain": [ + " nft_address entropy\n", + "4025 0x08CdCF9ba0a4b5667F5A59B78B60FbEFb145e64c 2.004886\n", + "4026 0xA4fF6019f9DBbb4bCC61Fa8Bd5C39F36ee4eB164 2.003856\n", + "4027 0xB66c7Ca15Af1f357C57294BAf730ABc77FF94940 2.003756\n", + "4028 0x5f98B87fb68f7Bb6F3a60BD6f0917723365444C1 2.002227\n", + "4029 0x374DBF0dF7aBc89C2bA776F003E725177Cb35750 2.001823" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "entropies_df.loc[entropies_df[\"entropy\"] > 2].tail()" + ] + }, + { + "cell_type": "markdown", + "id": "0100251f-e2f3-4665-98cf-81594ecc0145", + "metadata": {}, + "source": [ + "[`0x08CdCF9ba0a4b5667F5A59B78B60FbEFb145e64c`](https://etherscan.io/address/0x08CdCF9ba0a4b5667F5A59B78B60FbEFb145e64c) is called [WorldCupToken](https://coinclarity.com/dapp/worldcuptoken/) and was last active 4 years ago. Their recent increase in activity could be in anticipation of the next soccer world cup in 2022.\n", + "\n", + "[`0xA4fF6019f9DBbb4bCC61Fa8Bd5C39F36ee4eB164`](https://etherscan.io/address/0xA4fF6019f9DBbb4bCC61Fa8Bd5C39F36ee4eB164) is associated with a project called [instigators](https://instigators.network/).\n", + "\n", + "[`0xB66c7Ca15Af1f357C57294BAf730ABc77FF94940`](https://etherscan.io/address/0xB66c7Ca15Af1f357C57294BAf730ABc77FF94940) is a token associated with something called the [Gems of Awareness Benefit](https://nftcalendar.io/event/gems-of-awareness-benefit-for-entheon-art-by-alex-grey-x-allyson-grey/).\n", + "\n", + "[`0x5f98B87fb68f7Bb6F3a60BD6f0917723365444C1`](https://etherscan.io/address/0x5f98B87fb68f7Bb6F3a60BD6f0917723365444C1) is [SHADYCON, an NFT associated with Eminem which seems to have been marketed on Nifty Gateway](https://www.eminem.com/news/shadycon-x-nifty-gateway).\n", + "\n", + "[`0x374DBF0dF7aBc89C2bA776F003E725177Cb35750`](https://etherscan.io/address/0x374DBF0dF7aBc89C2bA776F003E725177Cb35750) is [WyldFrogz](https://twitter.com/WyldFrogz), a cryptopunks derivative that seems to have some kind of planet-saving theme." + ] + }, + { + "cell_type": "markdown", + "id": "23ab64f9-85fb-4a12-87ef-a5f6295b43ce", + "metadata": {}, + "source": [ + "##### Medium entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "09997f43-5eeb-43a1-9e01-4076b7a4bc8e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nft_addressentropy
15640x0ae3c3A1504E41a6877De1B854C000EC64894bEa6.021144
15650x1ECA43C93D8e06FB91489818B4967014D748Da536.017002
15660xc57605Bef27ef91DbECc839e71E49574b98857Fc6.011324
15670xd3f69F10532457D35188895fEaA4C20B730EDe886.010405
15680xba61aEF92ebF174DbB39C97Dd29D0F2bd3D83d336.009679
\n", + "
" + ], + "text/plain": [ + " nft_address entropy\n", + "1564 0x0ae3c3A1504E41a6877De1B854C000EC64894bEa 6.021144\n", + "1565 0x1ECA43C93D8e06FB91489818B4967014D748Da53 6.017002\n", + "1566 0xc57605Bef27ef91DbECc839e71E49574b98857Fc 6.011324\n", + "1567 0xd3f69F10532457D35188895fEaA4C20B730EDe88 6.010405\n", + "1568 0xba61aEF92ebF174DbB39C97Dd29D0F2bd3D83d33 6.009679" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "entropies_df.loc[entropies_df[\"entropy\"] > 6].tail()" + ] + }, + { + "cell_type": "markdown", + "id": "f3f2dc22-d111-472b-a28f-076f12f98047", + "metadata": {}, + "source": [ + "[`0x0ae3c3A1504E41a6877De1B854C000EC64894bEa`](https://etherscan.io/address/0x0ae3c3A1504E41a6877De1B854C000EC64894bEa) is the [Circleorzo NFT](https://opensea.io/collection/circleorzo), a collection of images of procedurally generated circles.\n", + "\n", + "[`0x1ECA43C93D8e06FB91489818B4967014D748Da53`](https://etherscan.io/address/0x1ECA43C93D8e06FB91489818B4967014D748Da53) is [Cowboy Punks](https://twitter.com/cowboypunks?lang=en), which appeals to blockheads that prefer westerns to cyberpunk.\n", + "\n", + "[`0xc57605Bef27ef91DbECc839e71E49574b98857Fc`](https://etherscan.io/address/0xc57605Bef27ef91DbECc839e71E49574b98857Fc) seems to be associated with the [Enigma Project](https://www.producthunt.com/posts/enigma-project) and control access to puzzle games.\n", + "\n", + "[`0xd3f69F10532457D35188895fEaA4C20B730EDe88`](https://etherscan.io/address/0xd3f69F10532457D35188895fEaA4C20B730EDe88) is something called hte [RTFKT Capsule Space Drip](https://rtfkt.com/spacedrip) which I do not understand and feel too old to have a hope of ever understanding. The important thing is that it seems these NFTs can be redeemed for a physical object called a space drip. [Here's a blog post about it](https://www.one37pm.com/nft/gaming/space-drip-rtfkt-loopify).\n", + "\n", + "[`0xba61aEF92ebF174DbB39C97Dd29D0F2bd3D83d33`](https://etherscan.io/address/0xba61aEF92ebF174DbB39C97Dd29D0F2bd3D83d33) is an NFT project called [Dommies](https://twitter.com/DommiesNFT)." + ] + }, + { + "cell_type": "markdown", + "id": "158b1714-083d-48fa-820a-c238b510de29", + "metadata": {}, + "source": [ + "##### Entropy as a measure of quality\n", + "\n", + "Based on this analysis, the ownership entropy of an NFT collection shows promise as a measure of its quality. There are certainly examples of high entropy NFT collections (like Rarible's governance token) which have that kind of entropy simply because they have been airdropped at scale. It remains to be seen what the value of these mass airdropped tokens will be in the long term.\n", + "\n", + "At the very least, the entropy measurement indicates that there is a lot of money behind those releases. This is in contrast to lower entropy releases promising thousands of tokens and only minting tens of them." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}