moonworm-erc721-crawler
Andrey Dolgolev 2022-01-31 22:45:33 +02:00
rodzic 37d435e874
commit 828447f157
1 zmienionych plików z 113 dodań i 11 usunięć

Wyświetl plik

@ -1,10 +1,11 @@
import argparse
import hashlib
import logging
import json
import time
import traceback
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any
from typing import Dict, List, Optional, Any
from moonworm.crawler.moonstream_ethereum_state_provider import ( # type: ignore
MoonstreamEthereumStateProvider,
@ -13,23 +14,17 @@ from moonworm.crawler.networks import Network # type: ignore
from sqlalchemy.orm.session import Session
from web3 import Web3
from ..blockchain import connect
from ..data import AvailableBlockchainType
from .crawler import (
EventCrawlJob,
FunctionCallCrawlJob,
blockchain_type_to_subscription_type,
get_crawl_job_entries,
heartbeat,
get_crawler_point,
make_event_crawl_jobs,
make_function_call_crawl_jobs,
merge_event_crawl_jobs,
merge_function_call_crawl_jobs,
update_crawl_point,
)
from .db import add_events_to_session, add_function_calls_to_session, commit_session
from .db import add_events_to_session, commit_session
from .event_crawler import _crawl_events
from .function_call_crawler import _crawl_functions
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@ -81,7 +76,7 @@ def continuous_crawler(
new_jobs_refetch_interval: float = 120,
use_traker: bool = True,
):
crawler_type = "NFT_crawler"
crawler_type = "ERC721_crawler"
assert (
min_blocks_batch < max_blocks_batch
), "min_blocks_batch must be less than max_blocks_batch"
@ -94,6 +89,17 @@ def continuous_crawler(
new_jobs_refetch_interval > 0
), "new_jobs_refetch_interval must be greater than 0"
# Create tables if not exists works good for sqlite
from db.moonstreamdb.models import PolygonLabel
from db.moonstreamdb.db import engine
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
Base.metadata.create_all(engine)
db_session.commit()
crawl_start_time = datetime.utcnow()
jobs_refetchet_time = crawl_start_time
@ -280,3 +286,99 @@ def continuous_crawler(
logger.exception(e)
raise e
def main() -> None:
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
crawl_parser = subparsers.add_parser("crawl")
crawl_parser.add_argument(
"--start",
"-s",
type=int,
default=None,
)
crawl_parser.add_argument(
"--blockchain-type",
"-b",
type=str,
choices=[
AvailableBlockchainType.ETHEREUM.value,
AvailableBlockchainType.POLYGON.value,
],
required=True,
)
crawl_parser.add_argument(
"--web3",
type=str,
default=None,
help="Web3 provider URL",
)
crawl_parser.add_argument(
"--poa",
action="store_true",
default=False,
help="Use PoA middleware",
)
crawl_parser.add_argument(
"--max-blocks-batch",
"-m",
type=int,
default=100,
help="Maximum number of blocks to crawl in a single batch",
)
crawl_parser.add_argument(
"--min-blocks-batch",
"-n",
type=int,
default=10,
help="Minimum number of blocks to crawl in a single batch",
)
crawl_parser.add_argument(
"--confirmations",
"-c",
type=int,
default=175,
help="Number of confirmations to wait for",
)
crawl_parser.add_argument(
"--min-sleep-time",
"-t",
type=float,
default=0.01,
help="Minimum time to sleep between crawl step",
)
crawl_parser.add_argument(
"--heartbeat-interval",
"-i",
type=float,
default=60,
help="Heartbeat interval in seconds",
)
crawl_parser.add_argument(
"--new-jobs-refetch-interval",
"-r",
type=float,
default=120,
help="Time to wait before refetching new jobs",
)
crawl_parser.add_argument(
"--force",
action="store_true",
default=False,
help="Force start from the start block",
)
crawl_parser.set_defaults(func=handle_crawl)
args = parser.parse_args()
args.func(args)