fixes after review

pull/368/head
yhtiyar 2021-11-07 17:04:58 +03:00
rodzic 5ed9772852
commit 5187d30257
4 zmienionych plików z 100 dodań i 42 usunięć

Wyświetl plik

@ -14,10 +14,8 @@ from ..ethereum import connect
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
SLEEP_TIME = 5 * 60
def runc_crawler_asc(
def run_crawler_asc(
w3: Web3,
session: Session,
from_block: Optional[int],
@ -25,6 +23,7 @@ def runc_crawler_asc(
synchronize: bool,
batch_size: int,
respect_state: bool,
sleep_time: int,
):
"""
Runs crawler in ascending order
@ -44,7 +43,9 @@ def runc_crawler_asc(
to_block = moonstream_data_store.get_last_block_number()
logger.info(f"Ending block set to : {to_block}")
assert from_block <= to_block, "from_block must be less than or equal to to_block"
assert (
from_block <= to_block
), "from_block must be less than or equal to to_block in asc order, used --order desc"
logger.info(f"Starting crawling from block {from_block} to block {to_block}")
contract_deployment_crawler.crawl(
@ -57,13 +58,13 @@ def runc_crawler_asc(
while True:
contract_deployment_crawler.crawl(
from_block=last_crawled_block + 1,
to_block=None,
to_block=None, # to_block will be set to last_crawled_block
batch_size=batch_size,
)
time.sleep(SLEEP_TIME)
time.sleep(sleep_time)
def runc_crawler_desc(
def run_crawler_desc(
w3: Web3,
session: Session,
from_block: Optional[int],
@ -71,6 +72,7 @@ def runc_crawler_desc(
synchronize: bool,
batch_size: int,
respect_state: bool,
sleep_time: int,
):
"""
Runs crawler in descending order
@ -92,7 +94,7 @@ def runc_crawler_desc(
assert (
from_block >= to_block
), "from_block must be greater than or equal to to_block"
), "from_block must be greater than or equal to to_block in desc order, used --order asc"
logger.info(f"Starting crawling from block {from_block} to block {to_block}")
contract_deployment_crawler.crawl(
@ -101,16 +103,22 @@ def runc_crawler_desc(
batch_size=batch_size,
)
if synchronize:
# It will lead to holes if crawler shutted down not clearly and --respect-state will be problem,
# since crawler's crawl step is implemented in asc order. Maybe later we can implement desc order
raise ValueError("Synchronize not implemented for descending order")
last_crawled_block = to_block
while True:
to_block = moonstream_data_store.get_first_block_number()
contract_deployment_crawler.crawl(
from_block=last_crawled_block - 1,
to_block=to_block,
batch_size=batch_size,
)
time.sleep(sleep_time)
def handle_parser(args: argparse.Namespace):
with yield_db_session_ctx() as session:
w3 = connect()
if args.order == "asc":
runc_crawler_asc(
run_crawler_asc(
w3=w3,
session=session,
from_block=args.start,
@ -118,9 +126,10 @@ def handle_parser(args: argparse.Namespace):
synchronize=args.synchronize,
batch_size=args.batch,
respect_state=args.respect_state,
sleep_time=args.sleep,
)
elif args.order == "desc":
runc_crawler_desc(
run_crawler_desc(
w3=w3,
session=session,
from_block=args.start,
@ -128,9 +137,8 @@ def handle_parser(args: argparse.Namespace):
synchronize=args.synchronize,
batch_size=args.batch,
respect_state=args.respect_state,
sleep_time=args.sleep,
)
else:
raise ValueError(f"Invalid order {args.order}")
def generate_parser():
@ -150,9 +158,16 @@ def generate_parser():
parser.add_argument(
"--to", "-t", type=int, default=None, help="block to stop crawling at"
)
parser.add_argument(
"--order", type=str, default="asc", help="order to crawl : (desc, asc)"
"--order",
"-o",
type=str,
default="asc",
choices=["asc", "desc"],
help="order to crawl : (desc, asc)",
)
parser.add_argument(
"--synchronize", action="store_true", default=False, help="Continious crawling"
)
@ -163,6 +178,12 @@ def generate_parser():
default=False,
help="If set to True:\n If order is asc: start=last_labeled_block+1\n If order is desc: start=first_labeled_block-1",
)
parser.add_argument(
"--sleep",
type=int,
default=1,
help="time to sleep synzhronize mode waiting for new block crawled to db",
)
parser.set_defaults(func=handle_parser)
return parser

Wyświetl plik

@ -1,6 +1,6 @@
from dataclasses import dataclass
import logging
from typing import List, Optional, Tuple, cast
from typing import Iterator, List, Optional, Tuple, cast
from hexbytes import HexBytes
from moonstreamdb.models import EthereumBlock, EthereumTransaction, EthereumLabel
@ -87,7 +87,7 @@ class MoonstreamDataStore:
"""
first_block = (
self.db_session.query(EthereumBlock)
.order_by(EthereumBlock.block_number)
.order_by(EthereumBlock.block_number.asc())
.first()
)
if first_block is None:
@ -214,6 +214,23 @@ def get_contract_deployment_transactions(
return contract_deployment_transactions
# Function Fully Generated by copilot, looks correct, lol
def get_batch_block_range(
from_block: int, to_block: int, batch_size: int
) -> Iterator[Tuple[int, int]]:
"""
Returns a list of block ranges with the given batch size, from_block and to_block inclusive.
"""
if from_block <= to_block:
while from_block <= to_block:
yield (from_block, min(from_block + batch_size - 1, to_block))
from_block += batch_size
else:
while to_block <= from_block:
yield (from_block, max(from_block - batch_size + 1, to_block))
from_block -= batch_size
class ContractDeploymentCrawler:
"""
Crawls contract deployments from MoonstreamDB transactions with the usage of web3
@ -237,8 +254,7 @@ class ContractDeploymentCrawler:
if to_block is None:
to_block = self.datastore.get_last_block_number()
# Copilot is fucking awesome
for batch_from_block, batch_to_block in self.get_batch_block_range(
for batch_from_block, batch_to_block in get_batch_block_range(
from_block, to_block, batch_size
):
contract_deployment_transactions = get_contract_deployment_transactions(
@ -247,24 +263,3 @@ class ContractDeploymentCrawler:
self.datastore.save_contract_deployment_labels(
contract_deployment_transactions
)
# Function Fully Generated by copilot, looks correct, lol
def get_batch_block_range(
self, from_block: int, to_block: int, batch_size: int
) -> List[Tuple[int, int]]:
"""
Returns a list of block ranges with the given batch size
"""
if from_block > to_block:
raise ValueError("from_block must be less than to_block")
if batch_size < 1:
raise ValueError("batch_size must be greater than 0")
block_ranges = []
current_from_block = from_block
current_to_block = current_from_block + batch_size - 1
while current_to_block <= to_block:
block_ranges.append((current_from_block, current_to_block))
current_from_block = current_to_block + 1
current_to_block = current_from_block + batch_size - 1
return block_ranges

Wyświetl plik

@ -0,0 +1,42 @@
from typing import Optional
from unittest import TestCase
from web3.main import Web3
from .deployment_crawler import get_batch_block_range
class TestDeploymentCrawler(TestCase):
def test_get_batch_block_range(self):
from_block = 0
to_block = 101
batch_size = 10
result = get_batch_block_range(from_block, to_block, batch_size)
last_end: Optional[int] = None
for batch_start, batch_end in result:
if last_end is not None:
self.assertEqual(batch_start, last_end + 1)
self.assertTrue(batch_start <= batch_end)
self.assertTrue(batch_start <= to_block)
self.assertTrue(batch_end <= to_block)
last_end = batch_end
self.assertEqual(last_end, to_block)
def test_get_batch_block_range_with_from_block_gt_to_block(self):
from_block = 101
to_block = 0
batch_size = 10
result = get_batch_block_range(from_block, to_block, batch_size)
last_end: Optional[int] = None
for batch_start, batch_end in result:
if last_end is not None:
self.assertEqual(batch_start, last_end - 1)
last_end = batch_end
self.assertTrue(batch_start >= batch_end)
self.assertTrue(batch_start >= to_block)
self.assertTrue(batch_end >= to_block)
self.assertEqual(last_end, to_block)

Wyświetl plik

@ -51,7 +51,7 @@ setup(
"identity=mooncrawl.identity:main",
"etherscan=mooncrawl.etherscan:main",
"nft=mooncrawl.nft.cli:main",
"deploycrawler=mooncrawl.contract.cli:main",
"contractcrawler=mooncrawl.contract.cli:main",
]
},
)