kopia lustrzana https://github.com/bugout-dev/moonstream
fixes after review
rodzic
5ed9772852
commit
5187d30257
|
@ -14,10 +14,8 @@ from ..ethereum import connect
|
|||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SLEEP_TIME = 5 * 60
|
||||
|
||||
|
||||
def runc_crawler_asc(
|
||||
def run_crawler_asc(
|
||||
w3: Web3,
|
||||
session: Session,
|
||||
from_block: Optional[int],
|
||||
|
@ -25,6 +23,7 @@ def runc_crawler_asc(
|
|||
synchronize: bool,
|
||||
batch_size: int,
|
||||
respect_state: bool,
|
||||
sleep_time: int,
|
||||
):
|
||||
"""
|
||||
Runs crawler in ascending order
|
||||
|
@ -44,7 +43,9 @@ def runc_crawler_asc(
|
|||
to_block = moonstream_data_store.get_last_block_number()
|
||||
logger.info(f"Ending block set to : {to_block}")
|
||||
|
||||
assert from_block <= to_block, "from_block must be less than or equal to to_block"
|
||||
assert (
|
||||
from_block <= to_block
|
||||
), "from_block must be less than or equal to to_block in asc order, used --order desc"
|
||||
|
||||
logger.info(f"Starting crawling from block {from_block} to block {to_block}")
|
||||
contract_deployment_crawler.crawl(
|
||||
|
@ -57,13 +58,13 @@ def runc_crawler_asc(
|
|||
while True:
|
||||
contract_deployment_crawler.crawl(
|
||||
from_block=last_crawled_block + 1,
|
||||
to_block=None,
|
||||
to_block=None, # to_block will be set to last_crawled_block
|
||||
batch_size=batch_size,
|
||||
)
|
||||
time.sleep(SLEEP_TIME)
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
def runc_crawler_desc(
|
||||
def run_crawler_desc(
|
||||
w3: Web3,
|
||||
session: Session,
|
||||
from_block: Optional[int],
|
||||
|
@ -71,6 +72,7 @@ def runc_crawler_desc(
|
|||
synchronize: bool,
|
||||
batch_size: int,
|
||||
respect_state: bool,
|
||||
sleep_time: int,
|
||||
):
|
||||
"""
|
||||
Runs crawler in descending order
|
||||
|
@ -92,7 +94,7 @@ def runc_crawler_desc(
|
|||
|
||||
assert (
|
||||
from_block >= to_block
|
||||
), "from_block must be greater than or equal to to_block"
|
||||
), "from_block must be greater than or equal to to_block in desc order, used --order asc"
|
||||
|
||||
logger.info(f"Starting crawling from block {from_block} to block {to_block}")
|
||||
contract_deployment_crawler.crawl(
|
||||
|
@ -101,16 +103,22 @@ def runc_crawler_desc(
|
|||
batch_size=batch_size,
|
||||
)
|
||||
if synchronize:
|
||||
# It will lead to holes if crawler shutted down not clearly and --respect-state will be problem,
|
||||
# since crawler's crawl step is implemented in asc order. Maybe later we can implement desc order
|
||||
raise ValueError("Synchronize not implemented for descending order")
|
||||
last_crawled_block = to_block
|
||||
while True:
|
||||
to_block = moonstream_data_store.get_first_block_number()
|
||||
contract_deployment_crawler.crawl(
|
||||
from_block=last_crawled_block - 1,
|
||||
to_block=to_block,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
def handle_parser(args: argparse.Namespace):
|
||||
with yield_db_session_ctx() as session:
|
||||
w3 = connect()
|
||||
if args.order == "asc":
|
||||
runc_crawler_asc(
|
||||
run_crawler_asc(
|
||||
w3=w3,
|
||||
session=session,
|
||||
from_block=args.start,
|
||||
|
@ -118,9 +126,10 @@ def handle_parser(args: argparse.Namespace):
|
|||
synchronize=args.synchronize,
|
||||
batch_size=args.batch,
|
||||
respect_state=args.respect_state,
|
||||
sleep_time=args.sleep,
|
||||
)
|
||||
elif args.order == "desc":
|
||||
runc_crawler_desc(
|
||||
run_crawler_desc(
|
||||
w3=w3,
|
||||
session=session,
|
||||
from_block=args.start,
|
||||
|
@ -128,9 +137,8 @@ def handle_parser(args: argparse.Namespace):
|
|||
synchronize=args.synchronize,
|
||||
batch_size=args.batch,
|
||||
respect_state=args.respect_state,
|
||||
sleep_time=args.sleep,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid order {args.order}")
|
||||
|
||||
|
||||
def generate_parser():
|
||||
|
@ -150,9 +158,16 @@ def generate_parser():
|
|||
parser.add_argument(
|
||||
"--to", "-t", type=int, default=None, help="block to stop crawling at"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--order", type=str, default="asc", help="order to crawl : (desc, asc)"
|
||||
"--order",
|
||||
"-o",
|
||||
type=str,
|
||||
default="asc",
|
||||
choices=["asc", "desc"],
|
||||
help="order to crawl : (desc, asc)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--synchronize", action="store_true", default=False, help="Continious crawling"
|
||||
)
|
||||
|
@ -163,6 +178,12 @@ def generate_parser():
|
|||
default=False,
|
||||
help="If set to True:\n If order is asc: start=last_labeled_block+1\n If order is desc: start=first_labeled_block-1",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sleep",
|
||||
type=int,
|
||||
default=1,
|
||||
help="time to sleep synzhronize mode waiting for new block crawled to db",
|
||||
)
|
||||
parser.set_defaults(func=handle_parser)
|
||||
return parser
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from dataclasses import dataclass
|
||||
import logging
|
||||
from typing import List, Optional, Tuple, cast
|
||||
from typing import Iterator, List, Optional, Tuple, cast
|
||||
from hexbytes import HexBytes
|
||||
|
||||
from moonstreamdb.models import EthereumBlock, EthereumTransaction, EthereumLabel
|
||||
|
@ -87,7 +87,7 @@ class MoonstreamDataStore:
|
|||
"""
|
||||
first_block = (
|
||||
self.db_session.query(EthereumBlock)
|
||||
.order_by(EthereumBlock.block_number)
|
||||
.order_by(EthereumBlock.block_number.asc())
|
||||
.first()
|
||||
)
|
||||
if first_block is None:
|
||||
|
@ -214,6 +214,23 @@ def get_contract_deployment_transactions(
|
|||
return contract_deployment_transactions
|
||||
|
||||
|
||||
# Function Fully Generated by copilot, looks correct, lol
|
||||
def get_batch_block_range(
|
||||
from_block: int, to_block: int, batch_size: int
|
||||
) -> Iterator[Tuple[int, int]]:
|
||||
"""
|
||||
Returns a list of block ranges with the given batch size, from_block and to_block inclusive.
|
||||
"""
|
||||
if from_block <= to_block:
|
||||
while from_block <= to_block:
|
||||
yield (from_block, min(from_block + batch_size - 1, to_block))
|
||||
from_block += batch_size
|
||||
else:
|
||||
while to_block <= from_block:
|
||||
yield (from_block, max(from_block - batch_size + 1, to_block))
|
||||
from_block -= batch_size
|
||||
|
||||
|
||||
class ContractDeploymentCrawler:
|
||||
"""
|
||||
Crawls contract deployments from MoonstreamDB transactions with the usage of web3
|
||||
|
@ -237,8 +254,7 @@ class ContractDeploymentCrawler:
|
|||
if to_block is None:
|
||||
to_block = self.datastore.get_last_block_number()
|
||||
|
||||
# Copilot is fucking awesome
|
||||
for batch_from_block, batch_to_block in self.get_batch_block_range(
|
||||
for batch_from_block, batch_to_block in get_batch_block_range(
|
||||
from_block, to_block, batch_size
|
||||
):
|
||||
contract_deployment_transactions = get_contract_deployment_transactions(
|
||||
|
@ -247,24 +263,3 @@ class ContractDeploymentCrawler:
|
|||
self.datastore.save_contract_deployment_labels(
|
||||
contract_deployment_transactions
|
||||
)
|
||||
|
||||
# Function Fully Generated by copilot, looks correct, lol
|
||||
def get_batch_block_range(
|
||||
self, from_block: int, to_block: int, batch_size: int
|
||||
) -> List[Tuple[int, int]]:
|
||||
"""
|
||||
Returns a list of block ranges with the given batch size
|
||||
"""
|
||||
if from_block > to_block:
|
||||
raise ValueError("from_block must be less than to_block")
|
||||
if batch_size < 1:
|
||||
raise ValueError("batch_size must be greater than 0")
|
||||
|
||||
block_ranges = []
|
||||
current_from_block = from_block
|
||||
current_to_block = current_from_block + batch_size - 1
|
||||
while current_to_block <= to_block:
|
||||
block_ranges.append((current_from_block, current_to_block))
|
||||
current_from_block = current_to_block + 1
|
||||
current_to_block = current_from_block + batch_size - 1
|
||||
return block_ranges
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
from typing import Optional
|
||||
from unittest import TestCase
|
||||
|
||||
from web3.main import Web3
|
||||
from .deployment_crawler import get_batch_block_range
|
||||
|
||||
|
||||
class TestDeploymentCrawler(TestCase):
|
||||
def test_get_batch_block_range(self):
|
||||
from_block = 0
|
||||
to_block = 101
|
||||
batch_size = 10
|
||||
result = get_batch_block_range(from_block, to_block, batch_size)
|
||||
|
||||
last_end: Optional[int] = None
|
||||
for batch_start, batch_end in result:
|
||||
if last_end is not None:
|
||||
self.assertEqual(batch_start, last_end + 1)
|
||||
self.assertTrue(batch_start <= batch_end)
|
||||
self.assertTrue(batch_start <= to_block)
|
||||
self.assertTrue(batch_end <= to_block)
|
||||
last_end = batch_end
|
||||
|
||||
self.assertEqual(last_end, to_block)
|
||||
|
||||
def test_get_batch_block_range_with_from_block_gt_to_block(self):
|
||||
from_block = 101
|
||||
to_block = 0
|
||||
batch_size = 10
|
||||
result = get_batch_block_range(from_block, to_block, batch_size)
|
||||
|
||||
last_end: Optional[int] = None
|
||||
for batch_start, batch_end in result:
|
||||
if last_end is not None:
|
||||
self.assertEqual(batch_start, last_end - 1)
|
||||
|
||||
last_end = batch_end
|
||||
self.assertTrue(batch_start >= batch_end)
|
||||
self.assertTrue(batch_start >= to_block)
|
||||
self.assertTrue(batch_end >= to_block)
|
||||
|
||||
self.assertEqual(last_end, to_block)
|
|
@ -51,7 +51,7 @@ setup(
|
|||
"identity=mooncrawl.identity:main",
|
||||
"etherscan=mooncrawl.etherscan:main",
|
||||
"nft=mooncrawl.nft.cli:main",
|
||||
"deploycrawler=mooncrawl.contract.cli:main",
|
||||
"contractcrawler=mooncrawl.contract.cli:main",
|
||||
]
|
||||
},
|
||||
)
|
||||
|
|
Ładowanie…
Reference in New Issue