kopia lustrzana https://github.com/bugout-dev/moonstream
Moonworm crawler optimized to work with access id
rodzic
a25e149e1d
commit
7a3b7c15d7
|
@ -1,13 +1,14 @@
|
|||
import argparse
|
||||
import logging
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
from moonstreamdb.db import yield_db_session_ctx
|
||||
from web3 import Web3
|
||||
from web3.middleware import geth_poa_middleware
|
||||
|
||||
from ..blockchain import AvailableBlockchainType
|
||||
from ..settings import MOONSTREAM_MOONWORM_TASKS_JOURNAL, bugout_client
|
||||
from ..settings import MOONSTREAM_MOONWORM_TASKS_JOURNAL, NB_CONTROLLER_ACCESS_ID
|
||||
from .continuous_crawler import _retry_connect_web3, continuous_crawler
|
||||
from .crawler import (
|
||||
SubscriptionTypes,
|
||||
|
@ -54,7 +55,7 @@ def handle_crawl(args: argparse.Namespace) -> None:
|
|||
logger.info(
|
||||
"No web3 provider URL provided, using default (blockchan.py: connect())"
|
||||
)
|
||||
web3 = _retry_connect_web3(blockchain_type)
|
||||
web3 = _retry_connect_web3(blockchain_type, access_id=args.access_id)
|
||||
else:
|
||||
logger.info(f"Using web3 provider URL: {args.web3}")
|
||||
web3 = Web3(
|
||||
|
@ -109,11 +110,21 @@ def handle_crawl(args: argparse.Namespace) -> None:
|
|||
args.min_sleep_time,
|
||||
args.heartbeat_interval,
|
||||
args.new_jobs_refetch_interval,
|
||||
access_id=args.access_id,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.set_defaults(func=lambda _: parser.print_help())
|
||||
|
||||
parser.add_argument(
|
||||
"--access-id",
|
||||
default=NB_CONTROLLER_ACCESS_ID,
|
||||
type=UUID,
|
||||
help="User access ID",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers()
|
||||
|
||||
crawl_parser = subparsers.add_parser("crawl")
|
||||
|
|
|
@ -3,6 +3,7 @@ import time
|
|||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from uuid import UUID
|
||||
|
||||
from moonworm.crawler.moonstream_ethereum_state_provider import ( # type: ignore
|
||||
MoonstreamEthereumStateProvider,
|
||||
|
@ -85,6 +86,7 @@ def _retry_connect_web3(
|
|||
blockchain_type: AvailableBlockchainType,
|
||||
retry_count: int = 10,
|
||||
sleep_time: float = 5,
|
||||
access_id: Optional[UUID] = None,
|
||||
) -> Web3:
|
||||
"""
|
||||
Retry connecting to the blockchain.
|
||||
|
@ -92,7 +94,7 @@ def _retry_connect_web3(
|
|||
while retry_count > 0:
|
||||
retry_count -= 1
|
||||
try:
|
||||
web3 = connect(blockchain_type)
|
||||
web3 = connect(blockchain_type, access_id=access_id)
|
||||
web3.eth.block_number
|
||||
logger.info(f"Connected to {blockchain_type}")
|
||||
return web3
|
||||
|
@ -121,6 +123,7 @@ def continuous_crawler(
|
|||
min_sleep_time: float = 0.1,
|
||||
heartbeat_interval: float = 60,
|
||||
new_jobs_refetch_interval: float = 120,
|
||||
access_id: Optional[UUID] = None,
|
||||
):
|
||||
crawler_type = "continuous"
|
||||
assert (
|
||||
|
@ -139,7 +142,7 @@ def continuous_crawler(
|
|||
|
||||
jobs_refetchet_time = crawl_start_time
|
||||
if web3 is None:
|
||||
web3 = _retry_connect_web3(blockchain_type)
|
||||
web3 = _retry_connect_web3(blockchain_type, access_id=access_id)
|
||||
|
||||
network = (
|
||||
Network.ethereum
|
||||
|
@ -281,7 +284,7 @@ def continuous_crawler(
|
|||
logger.error("Too many failures, exiting")
|
||||
raise e
|
||||
try:
|
||||
web3 = _retry_connect_web3(blockchain_type)
|
||||
web3 = _retry_connect_web3(blockchain_type, access_id=access_id)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to reconnect: {err}")
|
||||
logger.exception(err)
|
||||
|
|
Ładowanie…
Reference in New Issue