kopia lustrzana https://github.com/bugout-dev/moonstream
				
				
				
			Working crawler to parse blocks and transactions at Ethereum blockchain
							rodzic
							
								
									4b7d0cb35c
								
							
						
					
					
						commit
						a20e581b52
					
				|  | @ -0,0 +1 @@ | |||
| # moonstream crawlers | ||||
|  | @ -0,0 +1,96 @@ | |||
| """ | ||||
| Moonstream crawlers CLI. | ||||
| """ | ||||
| import argparse | ||||
| from distutils.util import strtobool | ||||
| import time | ||||
| 
 | ||||
| from .ethereum import crawl | ||||
| from .settings import MOONSTREAM_CRAWL_WORKERS | ||||
| 
 | ||||
| 
 | ||||
| def ethcrawler_blocks_add_handler(args: argparse.Namespace) -> None: | ||||
|     """ | ||||
|     Add blocks to moonstream database. | ||||
|     """ | ||||
|     try: | ||||
|         blocks_start_end = args.blocks.split("-") | ||||
|         top_block_number = int(blocks_start_end[1]) | ||||
|         bottom_block_number = int(blocks_start_end[0]) | ||||
|     except Exception: | ||||
|         print( | ||||
|             "Wrong format provided, expected {bottom_block}-{top_block}, as ex. 105-340" | ||||
|         ) | ||||
|         return | ||||
| 
 | ||||
|     block_step = 1000 | ||||
|     blocks_numbers_list_raw = list(range(top_block_number, bottom_block_number - 1, -1)) | ||||
|     blocks_numbers_list_raw_len = len(blocks_numbers_list_raw) | ||||
|     # Block steps used to prevent long executor tasks and data loss possibility | ||||
|     # Block step 2 convert [1,2,3] -> [[1,2],[3]] | ||||
|     if len(blocks_numbers_list_raw) / block_step > 1: | ||||
|         blocks_numbers_lists = [ | ||||
|             blocks_numbers_list_raw[i : i + block_step] | ||||
|             for i in range(0, blocks_numbers_list_raw_len, block_step) | ||||
|         ] | ||||
|     else: | ||||
|         blocks_numbers_lists = [blocks_numbers_list_raw] | ||||
| 
 | ||||
|     startTime = time.time() | ||||
|     for blocks_numbers_list in blocks_numbers_lists: | ||||
|         crawl(blocks_numbers_list, bool(strtobool(args.transactions))) | ||||
|     print( | ||||
|         f"Required time: {time.time() - startTime} for: {blocks_numbers_list_raw_len} " | ||||
|         f"blocks with {MOONSTREAM_CRAWL_WORKERS} workers" | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def main() -> None: | ||||
|     parser = argparse.ArgumentParser(description="Moonstream crawlers CLI") | ||||
|     parser.set_defaults(func=lambda _: parser.print_help()) | ||||
|     subcommands = parser.add_subparsers(description="Crawlers commands") | ||||
| 
 | ||||
|     parser_ethcrawler = subcommands.add_parser( | ||||
|         "ethcrawler", description="Ethereum crawler" | ||||
|     ) | ||||
|     parser_ethcrawler.set_defaults(func=lambda _: parser_ethcrawler.print_help()) | ||||
|     subcommands_ethcrawler = parser_ethcrawler.add_subparsers( | ||||
|         description="Ethereum crawler commands" | ||||
|     ) | ||||
| 
 | ||||
|     # Ethereum blocks parser | ||||
|     parser_ethcrawler_blocks = subcommands_ethcrawler.add_parser( | ||||
|         "blocks", description="Ethereum blocks commands" | ||||
|     ) | ||||
|     parser_ethcrawler_blocks.set_defaults( | ||||
|         func=lambda _: parser_ethcrawler_blocks.print_help() | ||||
|     ) | ||||
|     subcommands_ethcrawler_blocks = parser_ethcrawler_blocks.add_subparsers( | ||||
|         description="Ethereum blocks commands" | ||||
|     ) | ||||
| 
 | ||||
|     parser_ethcrawler_blocks_add = subcommands_ethcrawler_blocks.add_parser( | ||||
|         "add", description="Add ethereum blocks commands" | ||||
|     ) | ||||
|     parser_ethcrawler_blocks_add.add_argument( | ||||
|         "-b", | ||||
|         "--blocks", | ||||
|         required=True, | ||||
|         help="List of blocks range in format {bottom_block}-{top_block}", | ||||
|     ) | ||||
|     parser_ethcrawler_blocks_add.add_argument( | ||||
|         "-t", | ||||
|         "--transactions", | ||||
|         choices=["True", "False"], | ||||
|         default="False", | ||||
|         help="Add or not block transactions", | ||||
|     ) | ||||
| 
 | ||||
|     parser_ethcrawler_blocks_add.set_defaults(func=ethcrawler_blocks_add_handler) | ||||
| 
 | ||||
|     args = parser.parse_args() | ||||
|     args.func(args) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | @ -0,0 +1,113 @@ | |||
| from concurrent.futures import ProcessPoolExecutor | ||||
| from typing import List, Optional | ||||
| 
 | ||||
| from web3 import Web3 | ||||
| from web3.types import BlockData | ||||
| 
 | ||||
| from .settings import MOONSTREAM_IPC_PATH, MOONSTREAM_CRAWL_WORKERS | ||||
| from moonstreamdb.db import yield_db_session_ctx | ||||
| from moonstreamdb.models import EthereumBlock, EthereumTransaction | ||||
| 
 | ||||
| 
 | ||||
| def connect(ipc_path: Optional[str] = MOONSTREAM_IPC_PATH): | ||||
|     web3_client = Web3(Web3.IPCProvider(ipc_path)) | ||||
|     return web3_client | ||||
| 
 | ||||
| 
 | ||||
| def add_block(db_session, block: BlockData, block_number: int) -> None: | ||||
|     """ | ||||
|     Add block if doesn't presented in database. | ||||
|     """ | ||||
|     block_exist = ( | ||||
|         db_session.query(EthereumBlock) | ||||
|         .filter(EthereumBlock.block_number == block_number) | ||||
|         .one_or_none() | ||||
|     ) | ||||
|     if block_exist is not None and block_exist.hash == block.hash.hex(): | ||||
|         print(f"Block: {block_number} exists") | ||||
|         return | ||||
|     if block_exist is not None and block_exist.hash != block.hash.hex(): | ||||
|         print(f"Block: {block_number} exists, but incorrect") | ||||
|         db_session.delete(block_exist) | ||||
| 
 | ||||
|     block_obj = EthereumBlock( | ||||
|         block_number=block.number, | ||||
|         difficulty=block.difficulty, | ||||
|         extra_data=block.extraData.hex(), | ||||
|         gas_limit=block.gasLimit, | ||||
|         gas_used=block.gasUsed, | ||||
|         hash=block.hash.hex(), | ||||
|         logs_bloom=block.logsBloom.hex(), | ||||
|         miner=block.miner, | ||||
|         nonce=block.nonce.hex(), | ||||
|         parent_hash=block.parentHash.hex(), | ||||
|         receipt_root=block.get("receiptRoot", ""), | ||||
|         uncles=block.sha3Uncles.hex(), | ||||
|         size=block.size, | ||||
|         state_root=block.stateRoot.hex(), | ||||
|         timestamp=block.timestamp, | ||||
|         total_difficulty=block.totalDifficulty, | ||||
|         transactions_root=block.transactionsRoot.hex(), | ||||
|     ) | ||||
|     db_session.add(block_obj) | ||||
|     print(f"Added new block: {block_number}") | ||||
|     return | ||||
| 
 | ||||
| 
 | ||||
| def add_block_transaction(db_session, block_number, tx) -> None: | ||||
|     """ | ||||
|     Add block transaction if doesn't presented in database. | ||||
|     """ | ||||
|     tx_exist = ( | ||||
|         db_session.query(EthereumTransaction) | ||||
|         .filter(EthereumTransaction.hash == tx.hash.hex()) | ||||
|         .one_or_none() | ||||
|     ) | ||||
|     if tx_exist is not None: | ||||
|         return | ||||
|     tx_obj = EthereumTransaction( | ||||
|         hash=tx.hash.hex(), | ||||
|         block_number=block_number, | ||||
|         from_address=tx["from"], | ||||
|         to_address=tx.to, | ||||
|         gas=tx.gas, | ||||
|         gas_price=tx.gasPrice, | ||||
|         input=tx.input, | ||||
|         nonce=tx.nonce, | ||||
|         transaction_index=tx.transactionIndex, | ||||
|         value=tx.value, | ||||
|     ) | ||||
|     db_session.add(tx_obj) | ||||
| 
 | ||||
| 
 | ||||
| def process_blocks(blocks_numbers: List[int], with_transactions: bool = False): | ||||
|     """ | ||||
|     Open database and geth sessions and fetch block data from blockchain. | ||||
|     """ | ||||
|     web3_client = connect() | ||||
|     for block_number in blocks_numbers: | ||||
|         with yield_db_session_ctx() as db_session: | ||||
|             block: BlockData = web3_client.eth.get_block( | ||||
|                 block_number, full_transactions=with_transactions | ||||
|             ) | ||||
|             add_block(db_session, block, block_number) | ||||
|             if with_transactions: | ||||
|                 for tx in block.transactions: | ||||
|                     add_block_transaction(db_session, block.number, tx) | ||||
|             db_session.commit() | ||||
| 
 | ||||
| 
 | ||||
| def crawl(block_numbers_list: List[int], with_transactions: bool = False): | ||||
|     """ | ||||
|     Execute crawler. | ||||
|     """ | ||||
|     with ProcessPoolExecutor(max_workers=MOONSTREAM_CRAWL_WORKERS) as executor: | ||||
|         for worker in range(1, MOONSTREAM_CRAWL_WORKERS + 1): | ||||
|             print( | ||||
|                 f"Added executor for list of blocks with len: {len(block_numbers_list[worker-1::MOONSTREAM_CRAWL_WORKERS])}" | ||||
|             ) | ||||
|             executor.submit( | ||||
|                 process_blocks, | ||||
|                 block_numbers_list[worker - 1 :: MOONSTREAM_CRAWL_WORKERS], | ||||
|                 with_transactions, | ||||
|             ) | ||||
|  | @ -0,0 +1,13 @@ | |||
| import os | ||||
| 
 | ||||
| MOONSTREAM_IPC_PATH = os.environ.get("MOONSTREAM_IPC_PATH", None) | ||||
| 
 | ||||
| MOONSTREAM_CRAWL_WORKERS = 4 | ||||
| MOONSTREAM_CRAWL_WORKERS_RAW = os.environ.get("MOONSTREAM_CRAWL_WORKERS") | ||||
| try: | ||||
|     if MOONSTREAM_CRAWL_WORKERS_RAW is not None: | ||||
|         MOONSTREAM_CRAWL_WORKERS = int(MOONSTREAM_CRAWL_WORKERS_RAW) | ||||
| except: | ||||
|     raise Exception( | ||||
|         f"Could not parse MOONSTREAM_CRAWL_WORKERS as int: {MOONSTREAM_CRAWL_WORKERS_RAW}" | ||||
|     ) | ||||
										
											Plik binarny nie jest wyświetlany.
										
									
								
							|  | @ -0,0 +1,2 @@ | |||
| export MOONSTREAM_IPC_PATH=null | ||||
| export MOONSTREAM_CRAWL_WORKERS_RAW=4 | ||||
|  | @ -0,0 +1,38 @@ | |||
| from setuptools import find_packages, setup | ||||
| 
 | ||||
| long_description = "" | ||||
| with open("README.md") as ifp: | ||||
|     long_description = ifp.read() | ||||
| 
 | ||||
| setup( | ||||
|     name="moonstreamcrawlers", | ||||
|     version="0.0.1", | ||||
|     author="Bugout.dev", | ||||
|     author_email="engineers@bugout.dev", | ||||
|     license="Apache License 2.0", | ||||
|     description="Moonstream crawlers", | ||||
|     long_description=long_description, | ||||
|     long_description_content_type="text/markdown", | ||||
|     url="https://github.com/bugout-dev/moonstream", | ||||
|     platforms="all", | ||||
|     classifiers=[ | ||||
|         "Development Status :: 2 - Pre-Alpha", | ||||
|         "Intended Audience :: Developers", | ||||
|         "Natural Language :: English", | ||||
|         "Programming Language :: Python", | ||||
|         "Programming Language :: Python :: 3", | ||||
|         "Programming Language :: Python :: 3.8", | ||||
|         "Programming Language :: Python :: Implementation :: CPython", | ||||
|         "Topic :: Software Development :: Libraries", | ||||
|         "Topic :: Software Development :: Libraries :: Python Modules", | ||||
|     ], | ||||
|     python_requires=">=3.6", | ||||
|     packages=find_packages(), | ||||
|     package_data={"bugout": ["py.typed"]}, | ||||
|     zip_safe=False, | ||||
|     install_requires=["web3"], | ||||
|     extras_require={"dev": ["black", "mypy"]}, | ||||
|     entry_points={ | ||||
|         "console_scripts": ["moonstreamcrawlers=moonstreamcrawlers.cli:main"] | ||||
|     }, | ||||
| ) | ||||
		Ładowanie…
	
		Reference in New Issue
	
	 kompotkot
						kompotkot