kopia lustrzana https://github.com/bugout-dev/moonstream
Merge pull request #44 from zomglings/crawlers-update-ethcrawler-blocks-synchronize
Added "--order" and "--jobs" arguments to "ethcrawler blocks synchronize"pull/46/head
commit
ec3278e192
|
@ -3,9 +3,11 @@ Moonstream crawlers CLI.
|
|||
"""
|
||||
import argparse
|
||||
from distutils.util import strtobool
|
||||
from enum import Enum
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from typing import Iterator, List
|
||||
|
||||
from .ethereum import (
|
||||
crawl_blocks_executor,
|
||||
|
@ -17,37 +19,62 @@ from .ethereum import (
|
|||
from .settings import MOONSTREAM_CRAWL_WORKERS
|
||||
|
||||
|
||||
def yield_blocks_numbers_lists(blocks_range_str: str) -> None:
|
||||
class ProcessingOrder(Enum):
|
||||
DESCENDING = 0
|
||||
ASCENDING = 1
|
||||
|
||||
|
||||
def yield_blocks_numbers_lists(
|
||||
blocks_range_str: str,
|
||||
order: ProcessingOrder = ProcessingOrder.DESCENDING,
|
||||
block_step: int = 1000,
|
||||
) -> Iterator[List[int]]:
|
||||
"""
|
||||
Generate list of blocks.
|
||||
Block steps used to prevent long executor tasks and data loss possibility.
|
||||
"""
|
||||
block_step = 1000
|
||||
|
||||
try:
|
||||
blocks_start_end = blocks_range_str.split("-")
|
||||
bottom_block_number = int(blocks_start_end[0])
|
||||
top_block_number = int(blocks_start_end[1])
|
||||
required_blocks_len = top_block_number - bottom_block_number + 1
|
||||
input_start_block = int(blocks_start_end[0])
|
||||
input_end_block = int(blocks_start_end[1])
|
||||
except Exception:
|
||||
print(
|
||||
"Wrong format provided, expected {bottom_block}-{top_block}, as ex. 105-340"
|
||||
)
|
||||
return
|
||||
|
||||
print(f"Required {required_blocks_len} blocks to process")
|
||||
starting_block = max(input_start_block, input_end_block)
|
||||
ending_block = min(input_start_block, input_end_block)
|
||||
|
||||
while not top_block_number < bottom_block_number:
|
||||
temp_bottom_block_number = top_block_number - block_step
|
||||
if temp_bottom_block_number < bottom_block_number:
|
||||
temp_bottom_block_number = bottom_block_number - 1
|
||||
blocks_numbers_list = list(
|
||||
range(top_block_number, temp_bottom_block_number, -1)
|
||||
)
|
||||
stepsize = -1
|
||||
if order == ProcessingOrder.ASCENDING:
|
||||
starting_block = min(input_start_block, input_end_block)
|
||||
ending_block = max(input_start_block, input_end_block)
|
||||
stepsize = 1
|
||||
|
||||
current_block = starting_block
|
||||
|
||||
def keep_going() -> bool:
|
||||
if order == ProcessingOrder.ASCENDING:
|
||||
return current_block <= ending_block
|
||||
return current_block >= ending_block
|
||||
|
||||
while keep_going():
|
||||
temp_ending_block = current_block + stepsize * block_step
|
||||
if order == ProcessingOrder.ASCENDING:
|
||||
if temp_ending_block > ending_block:
|
||||
temp_ending_block = ending_block + 1
|
||||
else:
|
||||
if temp_ending_block < ending_block:
|
||||
temp_ending_block = ending_block - 1
|
||||
blocks_numbers_list = list(range(current_block, temp_ending_block, stepsize))
|
||||
|
||||
yield blocks_numbers_list
|
||||
|
||||
top_block_number -= block_step
|
||||
if order == ProcessingOrder.ASCENDING:
|
||||
current_block += block_step
|
||||
else:
|
||||
current_block -= block_step
|
||||
|
||||
|
||||
def ethcrawler_blocks_sync_handler(args: argparse.Namespace) -> None:
|
||||
|
@ -64,28 +91,21 @@ def ethcrawler_blocks_sync_handler(args: argparse.Namespace) -> None:
|
|||
print(
|
||||
f"Synchronization is unnecessary for blocks {bottom_block_number}-{top_block_number - 1}"
|
||||
)
|
||||
time.sleep(20)
|
||||
time.sleep(5)
|
||||
continue
|
||||
if top_block_number - bottom_block_number >= 10:
|
||||
for blocks_numbers_list in yield_blocks_numbers_lists(
|
||||
f"{bottom_block_number}-{top_block_number}"
|
||||
):
|
||||
print(
|
||||
f"Adding blocks {blocks_numbers_list[-1]}-{blocks_numbers_list[0]}"
|
||||
)
|
||||
crawl_blocks_executor(
|
||||
block_numbers_list=blocks_numbers_list,
|
||||
with_transactions=bool(strtobool(args.transactions)),
|
||||
)
|
||||
else:
|
||||
blocks_numbers_list = range(bottom_block_number, top_block_number + 1)
|
||||
print(f"Adding blocks {bottom_block_number}-{top_block_number - 1}")
|
||||
crawl_blocks(
|
||||
blocks_numbers=blocks_numbers_list,
|
||||
|
||||
for blocks_numbers_list in yield_blocks_numbers_lists(
|
||||
f"{bottom_block_number}-{top_block_number}",
|
||||
order=args.order,
|
||||
):
|
||||
print(f"Adding blocks {blocks_numbers_list[-1]}-{blocks_numbers_list[0]}")
|
||||
# TODO(kompotkot): Set num_processes argument based on number of blocks to synchronize.
|
||||
crawl_blocks_executor(
|
||||
block_numbers_list=blocks_numbers_list,
|
||||
with_transactions=bool(strtobool(args.transactions)),
|
||||
num_processes=args.jobs,
|
||||
)
|
||||
print(f"Synchronized blocks from {bottom_block_number} to {top_block_number}")
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
def ethcrawler_blocks_add_handler(args: argparse.Namespace) -> None:
|
||||
|
@ -169,6 +189,18 @@ def main() -> None:
|
|||
description="Ethereum blocks commands"
|
||||
)
|
||||
|
||||
valid_processing_orders = {
|
||||
"asc": ProcessingOrder.ASCENDING,
|
||||
"desc": ProcessingOrder.DESCENDING,
|
||||
}
|
||||
|
||||
def processing_order(raw_order: str) -> ProcessingOrder:
|
||||
if raw_order in valid_processing_orders:
|
||||
return valid_processing_orders[raw_order]
|
||||
raise ValueError(
|
||||
f"Invalid processing order ({raw_order}). Valid choices: {valid_processing_orders.keys()}"
|
||||
)
|
||||
|
||||
parser_ethcrawler_blocks_sync = subcommands_ethcrawler_blocks.add_parser(
|
||||
"synchronize", description="Synchronize to latest ethereum block commands"
|
||||
)
|
||||
|
@ -186,6 +218,22 @@ def main() -> None:
|
|||
default=0,
|
||||
help="(Optional) Block to start synchronization from. Default: 0",
|
||||
)
|
||||
parser_ethcrawler_blocks_sync.add_argument(
|
||||
"--order",
|
||||
type=processing_order,
|
||||
default=ProcessingOrder.DESCENDING,
|
||||
help="Order in which to process blocks (choices: desc, asc; default: desc)",
|
||||
)
|
||||
parser_ethcrawler_blocks_sync.add_argument(
|
||||
"-j",
|
||||
"--jobs",
|
||||
type=int,
|
||||
default=MOONSTREAM_CRAWL_WORKERS,
|
||||
help=(
|
||||
f"Number of processes to use when synchronizing (default: {MOONSTREAM_CRAWL_WORKERS})."
|
||||
" If you set to 1, the main process handles synchronization without spawning subprocesses."
|
||||
),
|
||||
)
|
||||
parser_ethcrawler_blocks_sync.set_defaults(func=ethcrawler_blocks_sync_handler)
|
||||
|
||||
parser_ethcrawler_blocks_add = subcommands_ethcrawler_blocks.add_parser(
|
||||
|
|
|
@ -136,6 +136,7 @@ def crawl_blocks_executor(
|
|||
block_numbers_list: List[int],
|
||||
with_transactions: bool = False,
|
||||
verbose: bool = False,
|
||||
num_processes: int = MOONSTREAM_CRAWL_WORKERS,
|
||||
) -> None:
|
||||
"""
|
||||
Execute crawler in processes.
|
||||
|
@ -153,27 +154,28 @@ def crawl_blocks_executor(
|
|||
worker_job_lists[i % MOONSTREAM_CRAWL_WORKERS].append(block_number)
|
||||
|
||||
results: List[Future] = []
|
||||
if num_processes == 1:
|
||||
return crawl_blocks(block_numbers_list, with_transactions, verbose)
|
||||
else:
|
||||
with ProcessPoolExecutor(max_workers=MOONSTREAM_CRAWL_WORKERS) as executor:
|
||||
for worker in worker_indices:
|
||||
if verbose:
|
||||
print(f"Spawned process for {len(worker_job_lists[worker])} blocks")
|
||||
result = executor.submit(
|
||||
crawl_blocks,
|
||||
worker_job_lists[worker],
|
||||
with_transactions,
|
||||
)
|
||||
result.add_done_callback(record_error)
|
||||
results.append(result)
|
||||
|
||||
with ProcessPoolExecutor(max_workers=MOONSTREAM_CRAWL_WORKERS) as executor:
|
||||
for worker in worker_indices:
|
||||
if verbose:
|
||||
print(f"Spawned process for {len(worker_job_lists[worker])} blocks")
|
||||
result = executor.submit(
|
||||
crawl_blocks,
|
||||
worker_job_lists[worker],
|
||||
with_transactions,
|
||||
)
|
||||
result.add_done_callback(record_error)
|
||||
results.append(result)
|
||||
|
||||
wait(results)
|
||||
# TODO(kompotkot): Return list of errors and colors responsible for
|
||||
# handling errors
|
||||
if len(errors) > 0:
|
||||
print("Errors:")
|
||||
for error in errors:
|
||||
print(f"- {error}")
|
||||
|
||||
wait(results)
|
||||
# TODO(kompotkot): Return list of errors and colors responsible for
|
||||
# handling errors
|
||||
if len(errors) > 0:
|
||||
print("Errors:")
|
||||
for error in errors:
|
||||
print(f"- {error}")
|
||||
|
||||
def process_contract_deployments() -> List[Tuple[str, str]]:
|
||||
"""
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
import unittest
|
||||
|
||||
from . import cli
|
||||
|
||||
|
||||
class TestYieldBlockNumbersLists(unittest.TestCase):
|
||||
def test_yield_descending_10_6_step_4(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"10-6", block_step=4
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8, 7], [6]])
|
||||
|
||||
def test_yield_descending_10_6_step_3(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"10-6", block_step=3
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8], [7, 6]])
|
||||
|
||||
def test_yield_descending_10_6_descending_step_3(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"10-6", cli.ProcessingOrder.DESCENDING, 3
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8], [7, 6]])
|
||||
|
||||
def test_yield_descending_10_6_descending_step_10(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"10-6", cli.ProcessingOrder.DESCENDING, 10
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8, 7, 6]])
|
||||
|
||||
def test_yield_descending_6_10_step_4(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"6-10", block_step=4
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8, 7], [6]])
|
||||
|
||||
def test_yield_descending_6_10_step_3(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"6-10", block_step=3
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8], [7, 6]])
|
||||
|
||||
def test_yield_descending_6_10_descending_step_3(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"6-10", cli.ProcessingOrder.DESCENDING, 3
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8], [7, 6]])
|
||||
|
||||
def test_yield_descending_6_10_descending_step_10(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"6-10", cli.ProcessingOrder.DESCENDING, 10
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[10, 9, 8, 7, 6]])
|
||||
|
||||
def test_yield_ascending_10_6_ascending_step_3(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"10-6", cli.ProcessingOrder.ASCENDING, 3
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[6, 7, 8], [9, 10]])
|
||||
|
||||
def test_yield_ascending_10_6_ascending_step_10(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"10-6", cli.ProcessingOrder.ASCENDING, 10
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[6, 7, 8, 9, 10]])
|
||||
|
||||
def test_yield_ascending_6_10_ascending_step_4(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"6-10", cli.ProcessingOrder.ASCENDING, 4
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[6, 7, 8, 9], [10]])
|
||||
|
||||
def test_yield_ascending_6_10_ascending_step_10(self):
|
||||
partition = [
|
||||
block_numbers_list
|
||||
for block_numbers_list in cli.yield_blocks_numbers_lists(
|
||||
"6-10", cli.ProcessingOrder.ASCENDING, 10
|
||||
)
|
||||
]
|
||||
self.assertListEqual(partition, [[6, 7, 8, 9, 10]])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Ładowanie…
Reference in New Issue