diff --git a/.github/workflows/lint.crawlers.yml b/.github/workflows/lint.crawlers.yml index 44c48bd3..f9f770f2 100644 --- a/.github/workflows/lint.crawlers.yml +++ b/.github/workflows/lint.crawlers.yml @@ -19,7 +19,7 @@ jobs: run: pip install -e .[dev] # - name: Mypy type check # working-directory: ./crawlers - # run: mypy moonstreamcrawlers/ + # run: mypy mooncrawl/ - name: Black syntax check working-directory: ./crawlers - run: black --check moonstreamcrawlers/ + run: black --check mooncrawl/ diff --git a/backend/moonstream/routes/txinfo.py b/backend/moonstream/routes/txinfo.py index eda472ab..3bbc48ae 100644 --- a/backend/moonstream/routes/txinfo.py +++ b/backend/moonstream/routes/txinfo.py @@ -14,7 +14,7 @@ from fastapi import ( ) from fastapi.middleware.cors import CORSMiddleware from moonstreamdb.db import yield_db_session -from moonstreamdb.models import EthereumSmartContract +from moonstreamdb.models import EthereumAddress from sqlalchemy.orm import Session from ..abi_decoder import decode_abi @@ -79,8 +79,8 @@ async def txinfo_ethereum_blockchain_handler( response.errors.append("Could not decode ABI from the given input") smart_contract = ( - db_session.query(EthereumSmartContract) - .filter(EthereumSmartContract.transaction_hash == txinfo_request.tx.hash) + db_session.query(EthereumAddress) + .filter(EthereumAddress.transaction_hash == txinfo_request.tx.hash) .one_or_none() ) diff --git a/backend/requirements.txt b/backend/requirements.txt index 3fe1cccd..67e12e9e 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -11,7 +11,7 @@ fastapi==0.66.0 h11==0.12.0 idna==3.2 jmespath==0.10.0 --e git+https://git@github.com/bugout-dev/moonstream.git@ec3278e192119d1e8a273cfaab6cb53890d2e8e9#egg=moonstreamdb&subdirectory=db +-e git+https://git@github.com/bugout-dev/moonstream.git@39d2b8e36a49958a9ae085ec2cc1be3fc732b9d0#egg=moonstreamdb&subdirectory=db mypy==0.910 mypy-extensions==0.4.3 pathspec==0.9.0 diff --git a/crawlers/mooncrawl/cli.py b/crawlers/mooncrawl/ethcrawler.py similarity index 95% rename from crawlers/mooncrawl/cli.py rename to crawlers/mooncrawl/ethcrawler.py index c37e9ff8..065bff9f 100644 --- a/crawlers/mooncrawl/cli.py +++ b/crawlers/mooncrawl/ethcrawler.py @@ -169,16 +169,8 @@ def main() -> None: parser.set_defaults(func=lambda _: parser.print_help()) subcommands = parser.add_subparsers(description="Crawlers commands") - parser_ethcrawler = subcommands.add_parser( - "ethcrawler", description="Ethereum crawler" - ) - parser_ethcrawler.set_defaults(func=lambda _: parser_ethcrawler.print_help()) - subcommands_ethcrawler = parser_ethcrawler.add_subparsers( - description="Ethereum crawler commands" - ) - # Ethereum blocks parser - parser_ethcrawler_blocks = subcommands_ethcrawler.add_parser( + parser_ethcrawler_blocks = subcommands.add_parser( "blocks", description="Ethereum blocks commands" ) parser_ethcrawler_blocks.set_defaults( @@ -289,7 +281,7 @@ def main() -> None: func=ethcrawler_blocks_missing_handler ) - parser_ethcrawler_contracts = subcommands_ethcrawler.add_parser( + parser_ethcrawler_contracts = subcommands.add_parser( "contracts", description="Ethereum smart contract related crawlers" ) parser_ethcrawler_contracts.set_defaults( diff --git a/crawlers/mooncrawl/ethereum.py b/crawlers/mooncrawl/ethereum.py index a4f94c4a..68ff6d6f 100644 --- a/crawlers/mooncrawl/ethereum.py +++ b/crawlers/mooncrawl/ethereum.py @@ -9,7 +9,7 @@ from .settings import MOONSTREAM_IPC_PATH, MOONSTREAM_CRAWL_WORKERS from moonstreamdb.db import yield_db_session_ctx from moonstreamdb.models import ( EthereumBlock, - EthereumSmartContract, + EthereumAddress, EthereumTransaction, ) @@ -227,7 +227,7 @@ def process_contract_deployments() -> List[Tuple[str, str]]: limit = 10 transactions_remaining = True existing_contract_transaction_hashes = db_session.query( - EthereumSmartContract.transaction_hash + EthereumAddress.transaction_hash ) while transactions_remaining: @@ -251,7 +251,7 @@ def process_contract_deployments() -> List[Tuple[str, str]]: if contract_address is not None: results.append((deployment.hash, contract_address)) db_session.add( - EthereumSmartContract( + EthereumAddress( transaction_hash=deployment.hash, address=contract_address, ) diff --git a/crawlers/mooncrawl/identity.py b/crawlers/mooncrawl/identity.py new file mode 100644 index 00000000..07e533cd --- /dev/null +++ b/crawlers/mooncrawl/identity.py @@ -0,0 +1,103 @@ +import argparse +import json +import os +import time + +import requests + +from moonstreamdb.db import yield_db_session_ctx +from moonstreamdb.models import EthereumAddress + +COINMARKETCAP_API_KEY = os.environ.get("COINMARKETCAP_API_KEY") +if COINMARKETCAP_API_KEY is None: + raise ValueError("COINMARKETCAP_API_KEY environment variable must be set") + +CRAWL_ORIGINS = { + "pro": "https://pro-api.coinmarketcap.com", + "sandbox": "https://sandbox-api.coinmarketcap.com", +} + + +def identities_cmc_handler(args: argparse.Namespace) -> None: + """ + Parse metadata for Ethereum tokens. + """ + headers = { + "X-CMC_PRO_API_KEY": COINMARKETCAP_API_KEY, + "Accept": "application/json", + "Accept-Encoding": "deflate, gzip", + } + if args.sandbox: + CRAWL_ORIGIN = CRAWL_ORIGINS["sandbox"] + else: + CRAWL_ORIGIN = CRAWL_ORIGINS["pro"] + url = f"{CRAWL_ORIGIN}/v1/cryptocurrency/map" + + start_n = 1 + limit_n = 5000 + + while True: + params = {"start": start_n, "limit": limit_n} + try: + r = requests.get(url=url, headers=headers, params=params) + r.raise_for_status() + response = r.json() + except Exception as err: + raise Exception(err) + + if len(response["data"]) == 0: + print("No more data, crawling finished") + break + + with yield_db_session_ctx() as db_session: + for crypto in response["data"]: + if crypto["platform"] is not None: + if ( + crypto["platform"]["id"] == 1027 + and crypto["platform"]["token_address"] is not None + ): + + eth_token = EthereumAddress( + address=crypto["platform"]["token_address"], + name=crypto["name"], + symbol=crypto["symbol"], + ) + db_session.add(eth_token) + print(f"Added {crypto['name']} token") + + db_session.commit() + start_n += limit_n + + print(f"Loop ended, starting new from {start_n} to {start_n + limit_n - 1}") + time.sleep(1) + + +def main(): + parser = argparse.ArgumentParser(description="Crawls address identities CLI") + parser.set_defaults(func=lambda _: parser.print_help()) + subcommands = parser.add_subparsers(description="Crawlers commands") + + parser_cmc = subcommands.add_parser("cmc", description="Coinmarketcap commands") + parser_cmc.set_defaults(func=lambda _: parser_cmc.print_help()) + subcommands_parser_cmc = parser_cmc.add_subparsers( + description="Ethereum blocks commands" + ) + parser_cmc.add_argument( + "-s", + "--sandbox", + action="store_true", + help="Target to sandbox API", + ) + parser_cmc.set_defaults(func=identities_cmc_handler) + + parser_label_cloud = subcommands.add_parser( + "label_cloud", description="Etherscan label cloud commands" + ) + parser_label_cloud.set_defaults(func=identities_get_handler) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/crawlers/setup.py b/crawlers/setup.py index 737e8d65..b5a536bf 100644 --- a/crawlers/setup.py +++ b/crawlers/setup.py @@ -33,11 +33,17 @@ setup( package_data={"mooncrawl": ["py.typed"]}, zip_safe=False, install_requires=[ - "moonstreamdb @ git+https://git@github.com/bugout-dev/moonstream.git@ec3278e192119d1e8a273cfaab6cb53890d2e8e9#egg=moonstreamdb&subdirectory=db", + "moonstreamdb @ git+https://git@github.com/bugout-dev/moonstream.git@39d2b8e36a49958a9ae085ec2cc1be3fc732b9d0#egg=moonstreamdb&subdirectory=db", "requests", "tqdm", "web3", ], extras_require={"dev": ["black", "mypy", "types-requests"]}, - entry_points={"console_scripts": ["mooncrawl=mooncrawl.cli:main"]}, + entry_points={ + "console_scripts": [ + "ethcrawler=mooncrawl.ethcrawler:main", + "esd=mooncrawl.esd:main", + "identity=mooncrawl.identity:main" + ] + }, ) diff --git a/db/alembic/env.py b/db/alembic/env.py index 13d8f631..0bc13c98 100644 --- a/db/alembic/env.py +++ b/db/alembic/env.py @@ -17,22 +17,31 @@ fileConfig(config.config_file_name) # for 'autogenerate' support # from myapp import mymodel # target_metadata = mymodel.Base.metadata -from moonstreamdb.models import Base as ExplorationBase +from moonstreamdb.models import Base as MoonstreamBase -target_metadata = ExplorationBase.metadata +target_metadata = MoonstreamBase.metadata # other values from the config, defined by the needs of env.py, # can be acquired: # my_important_option = config.get_main_option("my_important_option") # ... etc. -from moonstreamdb.models import EthereumBlock, EthereumTransaction, EthereumPendingTransaction, EthereumSmartContract, ESDEventSignature, ESDFunctionSignature +from moonstreamdb.models import ( + EthereumBlock, + EthereumTransaction, + EthereumPendingTransaction, + EthereumAddress, + EthereumLabel, + ESDEventSignature, + ESDFunctionSignature, +) def include_symbol(tablename, schema): return tablename in { EthereumBlock.__tablename__, EthereumTransaction.__tablename__, - EthereumSmartContract.__tablename__, + EthereumAddress.__tablename__, + EthereumLabel.__tablename__, EthereumPendingTransaction.__tablename__, ESDEventSignature.__tablename__, ESDFunctionSignature.__tablename__, diff --git a/db/alembic/versions/40871a7807f6_labels_for_addresses.py b/db/alembic/versions/40871a7807f6_labels_for_addresses.py new file mode 100644 index 00000000..30af1ac9 --- /dev/null +++ b/db/alembic/versions/40871a7807f6_labels_for_addresses.py @@ -0,0 +1,62 @@ +"""Labels for addresses + +Revision ID: 40871a7807f6 +Revises: 571f33ad7587 +Create Date: 2021-08-09 14:50:46.163063 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '40871a7807f6' +down_revision = '571f33ad7587' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index('ix_ethereum_smart_contracts_address', table_name='ethereum_smart_contracts') + op.drop_index('ix_ethereum_smart_contracts_transaction_hash', table_name='ethereum_smart_contracts') + + op.execute("ALTER TABLE ethereum_smart_contracts RENAME TO ethereum_addresses;") + op.alter_column("ethereum_addresses", "transaction_hash", nullable=True) + op.add_column('ethereum_addresses', sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text("TIMEZONE('utc', statement_timestamp())"), nullable=False)) + + op.create_index(op.f('ix_ethereum_addresses_address'), 'ethereum_addresses', ['address'], unique=False) + op.create_index(op.f('ix_ethereum_addresses_transaction_hash'), 'ethereum_addresses', ['transaction_hash'], unique=False) + + op.create_table('ethereum_labels', + sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), + sa.Column('label', sa.VARCHAR(length=256), nullable=False), + sa.Column('address_id', sa.Integer(), nullable=False), + sa.Column('label_data', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text("TIMEZONE('utc', statement_timestamp())"), nullable=False), + sa.ForeignKeyConstraint(['address_id'], ['ethereum_addresses.id'], name=op.f('fk_ethereum_labels_address_id_ethereum_addresses'), ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id', name=op.f('pk_ethereum_labels')), + sa.UniqueConstraint('id', name=op.f('uq_ethereum_labels_id')), + sa.UniqueConstraint('label', 'address_id', name=op.f('uq_ethereum_labels_label')) + ) + op.create_index(op.f('ix_ethereum_labels_address_id'), 'ethereum_labels', ['address_id'], unique=False) + op.create_index(op.f('ix_ethereum_labels_label'), 'ethereum_labels', ['label'], unique=False) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_ethereum_addresses_transaction_hash'), table_name='ethereum_addresses') + op.drop_index(op.f('ix_ethereum_addresses_address'), table_name='ethereum_addresses') + + op.execute("ALTER TABLE ethereum_addresses RENAME TO ethereum_smart_contracts;") + op.alter_column("ethereum_smart_contracts", "transaction_hash", nullable=False) + op.drop_column('ethereum_smart_contracts', 'created_at') + + op.create_index('ix_ethereum_smart_contracts_transaction_hash', 'ethereum_smart_contracts', ['transaction_hash'], unique=False) + op.create_index('ix_ethereum_smart_contracts_address', 'ethereum_smart_contracts', ['address'], unique=False) + + op.drop_index(op.f('ix_ethereum_labels_label'), table_name='ethereum_labels') + op.drop_index(op.f('ix_ethereum_labels_address_id'), table_name='ethereum_labels') + op.drop_table('ethereum_labels') + # ### end Alembic commands ### diff --git a/db/moonstreamdb/cli.py b/db/moonstreamdb/cli.py new file mode 100644 index 00000000..6e2e1d7f --- /dev/null +++ b/db/moonstreamdb/cli.py @@ -0,0 +1,121 @@ +import argparse +import json + +from .db import yield_db_session_ctx +from .models import EthereumAddress, EthereumLabel + + +def labels_add_handler(args: argparse.Namespace) -> None: + """ + Add new label for ethereum address. + """ + try: + label_data = json.loads(args.data) + except ValueError as err: + print(str(err)) + raise ValueError("Unable to parse data as dictionary") + + with yield_db_session_ctx() as db_session: + address = ( + db_session.query(EthereumAddress) + .filter(EthereumAddress.address == str(args.address)) + .one_or_none() + ) + if address is None: + print(f"There is no {args.address} address") + return + + label = EthereumLabel( + label=args.label, address_id=address.id, label_data=label_data + ) + db_session.add(label) + db_session.commit() + + print( + json.dumps( + { + "id": str(label.id), + "label": str(label.label), + "address_id": str(label.address_id), + "label_data": str(label.label_data), + "created_at": str(label.created_at), + } + ) + ) + + +def labels_list_handler(args: argparse.Namespace) -> None: + """ + Return list of all labels. + """ + with yield_db_session_ctx() as db_session: + query = db_session.query(EthereumLabel).all() + if str(args.address) is not None: + query = query.filter(EthereumAddress.address == str(args.address)) + labels = query.all() + + print( + json.dumps( + [ + { + "id": str(label.id), + "label": str(label.label), + "address_id": str(label.address_id), + "label_data": str(label.label_data), + "created_at": str(label.created_at), + } + for label in labels + ] + ) + ) + + +def main(): + parser = argparse.ArgumentParser(description="Crawls address identities CLI") + parser.set_defaults(func=lambda _: parser.print_help()) + subcommands = parser.add_subparsers(description="Crawlers commands") + + parser_labels = subcommands.add_parser("labels", description="Meta labels commands") + parser_labels.set_defaults(func=lambda _: parser_labels.print_help()) + subcommands_labels = parser_labels.add_subparsers( + description="Database meta labels commands" + ) + + parser_labels_add = subcommands_labels.add_parser( + "add", description="Add new label command" + ) + parser_labels_add.add_argument( + "-a", + "--address", + required=True, + help="Address attach to", + ) + parser_labels_add.add_argument( + "-l", + "--label", + required=True, + help="New label name", + ) + parser_labels_add.add_argument( + "-d", + "--data", + help="New label data", + ) + parser_labels_add.set_defaults(func=labels_add_handler) + + parser_labels_list = subcommands_labels.add_parser( + "list", description="List all meta labels command" + ) + parser_labels_list.add_argument( + "-a", + "--address", + help="Filter address", + ) + parser_labels_list.set_defaults(func=labels_list_handler) + + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/db/moonstreamdb/models.py b/db/moonstreamdb/models.py index 8c675757..4ed85bf7 100644 --- a/db/moonstreamdb/models.py +++ b/db/moonstreamdb/models.py @@ -1,4 +1,5 @@ -import sqlalchemy +import uuid + from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import ( BigInteger, @@ -10,7 +11,9 @@ from sqlalchemy import ( Numeric, Text, VARCHAR, + UniqueConstraint, ) +from sqlalchemy.dialects.postgresql import JSONB, UUID from sqlalchemy.sql import expression from sqlalchemy.ext.compiler import compiles @@ -100,17 +103,59 @@ class EthereumTransaction(Base): # type: ignore ) -class EthereumSmartContract(Base): # type: ignore - __tablename__ = "ethereum_smart_contracts" +class EthereumAddress(Base): # type: ignore + __tablename__ = "ethereum_addresses" id = Column(Integer, primary_key=True, autoincrement=True) transaction_hash = Column( VARCHAR(256), ForeignKey("ethereum_transactions.hash", ondelete="CASCADE"), - nullable=False, + nullable=True, index=True, ) address = Column(VARCHAR(256), nullable=False, index=True) + created_at = Column( + DateTime(timezone=True), server_default=utcnow(), nullable=False + ) + + +class EthereumLabel(Base): # type: ignore + """ + Example of label_data: + { + "label": "ERC20", + "label_data": { + "name": "Uniswap", + "symbol": "UNI" + } + }, + { + "label": "Exchange" + "label_data": {...} + } + """ + + __tablename__ = "ethereum_labels" + __table_args__ = (UniqueConstraint("label", "address_id"),) + + id = Column( + UUID(as_uuid=True), + primary_key=True, + default=uuid.uuid4, + unique=True, + nullable=False, + ) + label = Column(VARCHAR(256), nullable=False, index=True) + address_id = Column( + Integer, + ForeignKey("ethereum_addresses.id", ondelete="CASCADE"), + nullable=False, + index=True, + ) + label_data = Column(JSONB, nullable=True) + created_at = Column( + DateTime(timezone=True), server_default=utcnow(), nullable=False + ) class EthereumPendingTransaction(Base): # type: ignore diff --git a/db/moonstreamdb/version.py b/db/moonstreamdb/version.py index 5fcc4759..4b6fb749 100644 --- a/db/moonstreamdb/version.py +++ b/db/moonstreamdb/version.py @@ -2,4 +2,4 @@ Moonstream database version. """ -MOONSTREAMDB_VERSION = "0.0.1" +MOONSTREAMDB_VERSION = "0.0.2" diff --git a/db/setup.py b/db/setup.py index abab0340..0913022e 100644 --- a/db/setup.py +++ b/db/setup.py @@ -34,4 +34,9 @@ setup( zip_safe=False, install_requires=["alembic", "psycopg2-binary", "sqlalchemy"], extras_require={"dev": ["black", "mypy"]}, + entry_points={ + "console_scripts": [ + "moonstreamdb=moonstreamdb.cli:main", + ] + }, )