kopia lustrzana https://github.com/bugout-dev/moonstream
Add initial working state.
rodzic
07ad71fd9c
commit
cf93f99fb1
|
@ -20,6 +20,7 @@ from .crawler import (
|
||||||
make_event_crawl_jobs,
|
make_event_crawl_jobs,
|
||||||
make_function_call_crawl_jobs,
|
make_function_call_crawl_jobs,
|
||||||
find_all_deployed_blocks,
|
find_all_deployed_blocks,
|
||||||
|
update_job_state_with_filters,
|
||||||
)
|
)
|
||||||
from .db import get_first_labeled_block_number, get_last_labeled_block_number
|
from .db import get_first_labeled_block_number, get_last_labeled_block_number
|
||||||
from .historical_crawler import historical_crawler
|
from .historical_crawler import historical_crawler
|
||||||
|
@ -37,23 +38,45 @@ def handle_crawl(args: argparse.Namespace) -> None:
|
||||||
subscription_type,
|
subscription_type,
|
||||||
"event",
|
"event",
|
||||||
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
),
|
)
|
||||||
moonworm=True,
|
|
||||||
)
|
)
|
||||||
logger.info(f"Initial event crawl jobs count: {len(initial_event_jobs)}")
|
logger.info(f"Initial event crawl jobs count: {len(initial_event_jobs)}")
|
||||||
|
|
||||||
|
if len(initial_event_jobs) > 0:
|
||||||
|
initial_event_jobs = update_job_state_with_filters( # type: ignore
|
||||||
|
events=initial_event_jobs,
|
||||||
|
address_filter=[],
|
||||||
|
required_tags=[
|
||||||
|
"historical_crawl_status:pending",
|
||||||
|
"moonworm_task_pikedup:False",
|
||||||
|
],
|
||||||
|
tags_to_add=["moonworm_task_pikedup:True"],
|
||||||
|
tags_to_delete=["moonworm_task_pikedup:False"],
|
||||||
|
)
|
||||||
|
|
||||||
initial_function_call_jobs = make_function_call_crawl_jobs(
|
initial_function_call_jobs = make_function_call_crawl_jobs(
|
||||||
get_crawl_job_entries(
|
get_crawl_job_entries(
|
||||||
subscription_type,
|
subscription_type,
|
||||||
"function",
|
"function",
|
||||||
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
),
|
)
|
||||||
moonworm=True,
|
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Initial function call crawl jobs count: {len(initial_function_call_jobs)}"
|
f"Initial function call crawl jobs count: {len(initial_function_call_jobs)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if len(initial_function_call_jobs) > 0:
|
||||||
|
initial_event_jobs = update_job_state_with_filters( # type: ignore
|
||||||
|
events=initial_event_jobs,
|
||||||
|
address_filter=[],
|
||||||
|
required_tags=[
|
||||||
|
"historical_crawl_status:pending",
|
||||||
|
"moonworm_task_pikedup:False",
|
||||||
|
],
|
||||||
|
tags_to_add=["moonworm_task_pikedup:True"],
|
||||||
|
tags_to_delete=["moonworm_task_pikedup:False"],
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(f"Blockchain type: {blockchain_type.value}")
|
logger.info(f"Blockchain type: {blockchain_type.value}")
|
||||||
with yield_db_session_ctx() as db_session:
|
with yield_db_session_ctx() as db_session:
|
||||||
web3: Optional[Web3] = None
|
web3: Optional[Web3] = None
|
||||||
|
@ -143,7 +166,6 @@ def handle_historical_crawl(args: argparse.Namespace) -> None:
|
||||||
[
|
[
|
||||||
"moonworm_task_pikedup:True",
|
"moonworm_task_pikedup:True",
|
||||||
"historical_crawl_status:pending",
|
"historical_crawl_status:pending",
|
||||||
"progress:0",
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -190,16 +212,46 @@ def handle_historical_crawl(args: argparse.Namespace) -> None:
|
||||||
filtered_function_call_jobs = []
|
filtered_function_call_jobs = []
|
||||||
logger.info(f"Removing function call crawl jobs since --only-events is set")
|
logger.info(f"Removing function call crawl jobs since --only-events is set")
|
||||||
|
|
||||||
|
if args.only_functions:
|
||||||
|
filtered_event_jobs = []
|
||||||
|
logger.info(
|
||||||
|
f"Removing event crawl jobs since --only-functions is set. Function call jobs count: {len(filtered_function_call_jobs)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.only_events and args.only_functions:
|
||||||
|
raise ValueError(
|
||||||
|
"--only-events and --only-functions cannot be set at the same time"
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.tasks_journal:
|
||||||
|
if len(filtered_event_jobs) > 0:
|
||||||
|
filtered_event_jobs = update_job_state_with_filters( # type: ignore
|
||||||
|
events=filtered_event_jobs,
|
||||||
|
address_filter=[],
|
||||||
|
required_tags=[
|
||||||
|
"historical_crawl_status:pending",
|
||||||
|
"moonworm_task_pikedup:True",
|
||||||
|
],
|
||||||
|
tags_to_add=["historical_crawl_status:in_progress"],
|
||||||
|
tags_to_delete=["historical_crawl_status:pending"],
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(filtered_function_call_jobs) > 0:
|
||||||
|
filtered_function_call_jobs = update_job_state_with_filters( # type: ignore
|
||||||
|
function_calls=filtered_function_call_jobs,
|
||||||
|
address_filter=[],
|
||||||
|
required_tags=[
|
||||||
|
"historical_crawl_status:pending",
|
||||||
|
"moonworm_task_pikedup:True",
|
||||||
|
],
|
||||||
|
tags_to_add=["historical_crawl_status:in_progress"],
|
||||||
|
tags_to_delete=["historical_crawl_status:pending"],
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Initial function call crawl jobs count: {len(filtered_function_call_jobs)}"
|
f"Initial function call crawl jobs count: {len(filtered_function_call_jobs)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
addresses_set = set()
|
|
||||||
for job in filtered_event_jobs:
|
|
||||||
addresses_set.update(job.contracts)
|
|
||||||
for function_job in filtered_function_call_jobs:
|
|
||||||
addresses_set.add(function_job.contract_address)
|
|
||||||
|
|
||||||
logger.info(f"Blockchain type: {blockchain_type.value}")
|
logger.info(f"Blockchain type: {blockchain_type.value}")
|
||||||
with yield_db_session_ctx() as db_session:
|
with yield_db_session_ctx() as db_session:
|
||||||
web3: Optional[Web3] = None
|
web3: Optional[Web3] = None
|
||||||
|
@ -224,14 +276,23 @@ def handle_historical_crawl(args: argparse.Namespace) -> None:
|
||||||
)
|
)
|
||||||
logger.info(f"Last labeled block: {last_labeled_block}")
|
logger.info(f"Last labeled block: {last_labeled_block}")
|
||||||
|
|
||||||
if args.tasks_journal:
|
addresses_deployment_blocks = None
|
||||||
start_block = int(web3.eth.blockNumber) - 1
|
|
||||||
end_block = min(
|
# get set of addresses from event jobs and function call jobs
|
||||||
find_all_deployed_blocks(blockchain_type, list(addresses_set))
|
if args.find_deployed_blocks:
|
||||||
|
addresses_set = set()
|
||||||
|
for job in filtered_event_jobs:
|
||||||
|
addresses_set.update(job.contracts)
|
||||||
|
for function_job in filtered_function_call_jobs:
|
||||||
|
addresses_set.add(function_job.contract_address)
|
||||||
|
|
||||||
|
if args.start is None:
|
||||||
|
start_block = web3.eth.blockNumber - 1
|
||||||
|
|
||||||
|
addresses_deployment_blocks = find_all_deployed_blocks(
|
||||||
|
blockchain_type, list(addresses_set)
|
||||||
)
|
)
|
||||||
else:
|
end_block = min(addresses_deployment_blocks.values())
|
||||||
start_block = args.start
|
|
||||||
end_block = args.end
|
|
||||||
|
|
||||||
if start_block is None:
|
if start_block is None:
|
||||||
logger.info("No start block provided")
|
logger.info("No start block provided")
|
||||||
|
@ -276,6 +337,7 @@ def handle_historical_crawl(args: argparse.Namespace) -> None:
|
||||||
args.max_blocks_batch,
|
args.max_blocks_batch,
|
||||||
args.min_sleep_time,
|
args.min_sleep_time,
|
||||||
access_id=args.access_id,
|
access_id=args.access_id,
|
||||||
|
addresses_deployment_blocks=addresses_deployment_blocks,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -454,6 +516,18 @@ def main() -> None:
|
||||||
default=False,
|
default=False,
|
||||||
help="Only crawl events",
|
help="Only crawl events",
|
||||||
)
|
)
|
||||||
|
historical_crawl_parser.add_argument(
|
||||||
|
"--only-functions",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Only crawl function calls",
|
||||||
|
)
|
||||||
|
historical_crawl_parser.add_argument(
|
||||||
|
"--find-deployed-blocks",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="Find all deployed blocks",
|
||||||
|
)
|
||||||
historical_crawl_parser.add_argument(
|
historical_crawl_parser.add_argument(
|
||||||
"--tasks-journal",
|
"--tasks-journal",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|
|
@ -220,6 +220,30 @@ def continuous_crawler(
|
||||||
event_crawl_jobs, function_call_crawl_jobs = _refetch_new_jobs(
|
event_crawl_jobs, function_call_crawl_jobs = _refetch_new_jobs(
|
||||||
event_crawl_jobs, function_call_crawl_jobs, blockchain_type
|
event_crawl_jobs, function_call_crawl_jobs, blockchain_type
|
||||||
)
|
)
|
||||||
|
if len(event_crawl_jobs) > 0:
|
||||||
|
event_crawl_jobs = update_job_state_with_filters( # type: ignore
|
||||||
|
events=event_crawl_jobs,
|
||||||
|
address_filter=[],
|
||||||
|
required_tags=[
|
||||||
|
"historical_crawl_status:pending",
|
||||||
|
"moonworm_task_pikedup:False",
|
||||||
|
],
|
||||||
|
tags_to_add=["moonworm_task_pikedup:True"],
|
||||||
|
tags_to_delete=["moonworm_task_pikedup:False"],
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(function_call_crawl_jobs) > 0:
|
||||||
|
function_call_crawl_jobs = update_job_state_with_filters( # type: ignore
|
||||||
|
events=function_call_crawl_jobs,
|
||||||
|
address_filter=[],
|
||||||
|
required_tags=[
|
||||||
|
"historical_crawl_status:pending",
|
||||||
|
"moonworm_task_pikedup:False",
|
||||||
|
],
|
||||||
|
tags_to_add=["moonworm_task_pikedup:True"],
|
||||||
|
tags_to_delete=["moonworm_task_pikedup:False"],
|
||||||
|
)
|
||||||
|
|
||||||
jobs_refetchet_time = current_time
|
jobs_refetchet_time = current_time
|
||||||
|
|
||||||
if current_time - last_heartbeat_time > timedelta(
|
if current_time - last_heartbeat_time > timedelta(
|
||||||
|
|
|
@ -5,7 +5,7 @@ import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Callable, Dict, List, Optional, cast
|
from typing import Any, Callable, Dict, List, Optional, cast, Union
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from bugout.data import BugoutSearchResult
|
from bugout.data import BugoutSearchResult
|
||||||
|
@ -21,6 +21,8 @@ from ..settings import (
|
||||||
MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
bugout_client,
|
bugout_client,
|
||||||
|
HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES,
|
||||||
|
HISTORICAL_CRAWLER_STATUSES,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
@ -146,6 +148,7 @@ class EventCrawlJob:
|
||||||
event_abi_hash: str
|
event_abi_hash: str
|
||||||
event_abi: Dict[str, Any]
|
event_abi: Dict[str, Any]
|
||||||
contracts: List[ChecksumAddress]
|
contracts: List[ChecksumAddress]
|
||||||
|
entries_ids: Dict[ChecksumAddress, Dict[UUID, List[str]]]
|
||||||
created_at: int
|
created_at: int
|
||||||
|
|
||||||
|
|
||||||
|
@ -153,6 +156,7 @@ class EventCrawlJob:
|
||||||
class FunctionCallCrawlJob:
|
class FunctionCallCrawlJob:
|
||||||
contract_abi: List[Dict[str, Any]]
|
contract_abi: List[Dict[str, Any]]
|
||||||
contract_address: ChecksumAddress
|
contract_address: ChecksumAddress
|
||||||
|
entries_tags: Dict[UUID, List[str]]
|
||||||
created_at: int
|
created_at: int
|
||||||
|
|
||||||
|
|
||||||
|
@ -209,13 +213,13 @@ def get_crawl_job_entries(
|
||||||
|
|
||||||
def find_all_deployed_blocks(
|
def find_all_deployed_blocks(
|
||||||
blockchain_type: AvailableBlockchainType, addresses_set: List[ChecksumAddress]
|
blockchain_type: AvailableBlockchainType, addresses_set: List[ChecksumAddress]
|
||||||
):
|
) -> Dict[ChecksumAddress, int]:
|
||||||
"""
|
"""
|
||||||
find all deployed blocks for given addresses
|
find all deployed blocks for given addresses
|
||||||
"""
|
"""
|
||||||
|
|
||||||
web3 = _retry_connect_web3(blockchain_type)
|
web3 = _retry_connect_web3(blockchain_type)
|
||||||
all_deployed_blocks = []
|
all_deployed_blocks = {}
|
||||||
for address in addresses_set:
|
for address in addresses_set:
|
||||||
try:
|
try:
|
||||||
code = web3.eth.getCode(address)
|
code = web3.eth.getCode(address)
|
||||||
|
@ -226,8 +230,7 @@ def find_all_deployed_blocks(
|
||||||
web3_interval=0.5,
|
web3_interval=0.5,
|
||||||
)
|
)
|
||||||
if block is not None:
|
if block is not None:
|
||||||
all_deployed_blocks.append(address)
|
all_deployed_blocks[address] = block
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to get code for {address}: {e}")
|
logger.error(f"Failed to get code for {address}: {e}")
|
||||||
return all_deployed_blocks
|
return all_deployed_blocks
|
||||||
|
@ -240,9 +243,7 @@ def _get_tag(entry: BugoutSearchResult, tag: str) -> str:
|
||||||
raise ValueError(f"Tag {tag} not found in {entry}")
|
raise ValueError(f"Tag {tag} not found in {entry}")
|
||||||
|
|
||||||
|
|
||||||
def make_event_crawl_jobs(
|
def make_event_crawl_jobs(entries: List[BugoutSearchResult]) -> List[EventCrawlJob]:
|
||||||
entries: List[BugoutSearchResult], moonworm: bool = False
|
|
||||||
) -> List[EventCrawlJob]:
|
|
||||||
"""
|
"""
|
||||||
Create EventCrawlJob objects from bugout entries.
|
Create EventCrawlJob objects from bugout entries.
|
||||||
"""
|
"""
|
||||||
|
@ -253,27 +254,23 @@ def make_event_crawl_jobs(
|
||||||
abi_hash = _get_tag(entry, "abi_method_hash")
|
abi_hash = _get_tag(entry, "abi_method_hash")
|
||||||
contract_address = Web3().toChecksumAddress(_get_tag(entry, "address"))
|
contract_address = Web3().toChecksumAddress(_get_tag(entry, "address"))
|
||||||
|
|
||||||
# if entry.tags not contain moonworm_task_pikedup:True
|
entry_id = UUID(entry.entry_url.split("/")[-1]) # crying emoji
|
||||||
if "moonworm_task_pikedup:True" not in entry.tags and moonworm:
|
|
||||||
# Update the tag to pickedup
|
|
||||||
bugout_client.update_tags(
|
|
||||||
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
|
||||||
journal_id=MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
|
||||||
entry_id=entry.entry_url.split("/")[-1],
|
|
||||||
tags=["moonworm_task_pikedup:True"],
|
|
||||||
timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS,
|
|
||||||
)
|
|
||||||
|
|
||||||
existing_crawl_job = crawl_job_by_hash.get(abi_hash)
|
existing_crawl_job = crawl_job_by_hash.get(abi_hash)
|
||||||
if existing_crawl_job is not None:
|
if existing_crawl_job is not None:
|
||||||
if contract_address not in existing_crawl_job.contracts:
|
if contract_address not in existing_crawl_job.contracts:
|
||||||
existing_crawl_job.contracts.append(contract_address)
|
existing_crawl_job.contracts.append(contract_address)
|
||||||
|
existing_crawl_job.entries_ids[contract_address] = {
|
||||||
|
entry_id: entry.tags
|
||||||
|
}
|
||||||
|
|
||||||
else:
|
else:
|
||||||
abi = cast(str, entry.content)
|
abi = cast(str, entry.content)
|
||||||
new_crawl_job = EventCrawlJob(
|
new_crawl_job = EventCrawlJob(
|
||||||
event_abi_hash=abi_hash,
|
event_abi_hash=abi_hash,
|
||||||
event_abi=json.loads(abi),
|
event_abi=json.loads(abi),
|
||||||
contracts=[contract_address],
|
contracts=[contract_address],
|
||||||
|
entries_ids={contract_address: {entry_id: entry.tags}},
|
||||||
created_at=int(datetime.fromisoformat(entry.created_at).timestamp()),
|
created_at=int(datetime.fromisoformat(entry.created_at).timestamp()),
|
||||||
)
|
)
|
||||||
crawl_job_by_hash[abi_hash] = new_crawl_job
|
crawl_job_by_hash[abi_hash] = new_crawl_job
|
||||||
|
@ -283,7 +280,6 @@ def make_event_crawl_jobs(
|
||||||
|
|
||||||
def make_function_call_crawl_jobs(
|
def make_function_call_crawl_jobs(
|
||||||
entries: List[BugoutSearchResult],
|
entries: List[BugoutSearchResult],
|
||||||
moonworm: bool = False,
|
|
||||||
) -> List[FunctionCallCrawlJob]:
|
) -> List[FunctionCallCrawlJob]:
|
||||||
"""
|
"""
|
||||||
Create FunctionCallCrawlJob objects from bugout entries.
|
Create FunctionCallCrawlJob objects from bugout entries.
|
||||||
|
@ -293,26 +289,18 @@ def make_function_call_crawl_jobs(
|
||||||
method_signature_by_address: Dict[str, List[str]] = {}
|
method_signature_by_address: Dict[str, List[str]] = {}
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
entry_id = UUID(entry.entry_url.split("/")[-1]) # crying emoji
|
||||||
contract_address = Web3().toChecksumAddress(_get_tag(entry, "address"))
|
contract_address = Web3().toChecksumAddress(_get_tag(entry, "address"))
|
||||||
abi = json.loads(cast(str, entry.content))
|
abi = json.loads(cast(str, entry.content))
|
||||||
method_signature = encode_function_signature(abi)
|
method_signature = encode_function_signature(abi)
|
||||||
if method_signature is None:
|
if method_signature is None:
|
||||||
raise ValueError(f"{abi} is not a function ABI")
|
raise ValueError(f"{abi} is not a function ABI")
|
||||||
|
|
||||||
if "moonworm_task_pikedup:True" not in entry.tags and moonworm:
|
|
||||||
# Update the tag to pickedup
|
|
||||||
bugout_client.update_tags(
|
|
||||||
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
|
||||||
journal_id=MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
|
||||||
entry_id=entry.entry_url.split("/")[-1],
|
|
||||||
tags=["moonworm_task_pikedup:True"],
|
|
||||||
timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS,
|
|
||||||
)
|
|
||||||
|
|
||||||
if contract_address not in crawl_job_by_address:
|
if contract_address not in crawl_job_by_address:
|
||||||
crawl_job_by_address[contract_address] = FunctionCallCrawlJob(
|
crawl_job_by_address[contract_address] = FunctionCallCrawlJob(
|
||||||
contract_abi=[abi],
|
contract_abi=[abi],
|
||||||
contract_address=contract_address,
|
contract_address=contract_address,
|
||||||
|
entries_tags={entry_id: entry.tags},
|
||||||
created_at=int(datetime.fromisoformat(entry.created_at).timestamp()),
|
created_at=int(datetime.fromisoformat(entry.created_at).timestamp()),
|
||||||
)
|
)
|
||||||
method_signature_by_address[contract_address] = [method_signature]
|
method_signature_by_address[contract_address] = [method_signature]
|
||||||
|
@ -321,6 +309,9 @@ def make_function_call_crawl_jobs(
|
||||||
if method_signature not in method_signature_by_address[contract_address]:
|
if method_signature not in method_signature_by_address[contract_address]:
|
||||||
crawl_job_by_address[contract_address].contract_abi.append(abi)
|
crawl_job_by_address[contract_address].contract_abi.append(abi)
|
||||||
method_signature_by_address[contract_address].append(method_signature)
|
method_signature_by_address[contract_address].append(method_signature)
|
||||||
|
crawl_job_by_address[contract_address].entries_tags[
|
||||||
|
entry_id
|
||||||
|
] = entry.tags
|
||||||
|
|
||||||
return [crawl_job for crawl_job in crawl_job_by_address.values()]
|
return [crawl_job for crawl_job in crawl_job_by_address.values()]
|
||||||
|
|
||||||
|
@ -449,3 +440,212 @@ def heartbeat(
|
||||||
tags=[crawler_type, "heartbeat", blockchain_type.value, "dead"],
|
tags=[crawler_type, "heartbeat", blockchain_type.value, "dead"],
|
||||||
timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS,
|
timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def bugout_state_update(
|
||||||
|
entries_tags_add: List[Dict[str, Any]],
|
||||||
|
entries_tags_delete: List[Dict[str, Any]],
|
||||||
|
) -> Any:
|
||||||
|
if len(entries_tags_add) > 0:
|
||||||
|
new_entreis_state = bugout_client.update_entries_tags( # type: ignore
|
||||||
|
entries_tags=entries_tags_add,
|
||||||
|
timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(entries_tags_delete) > 0:
|
||||||
|
new_entreis_state = bugout_client.delete_entries_tags( # type: ignore
|
||||||
|
entries_tags=entries_tags_delete,
|
||||||
|
timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
|
||||||
|
return new_entreis_state
|
||||||
|
|
||||||
|
|
||||||
|
def update_job_tags(
|
||||||
|
events: Union[List[EventCrawlJob], List[FunctionCallCrawlJob]],
|
||||||
|
new_entreis_state: Any,
|
||||||
|
):
|
||||||
|
for entry in new_entreis_state:
|
||||||
|
for event in events:
|
||||||
|
if isinstance(event, EventCrawlJob):
|
||||||
|
for contract_address, entries_ids in event.entries_ids.items():
|
||||||
|
for entry_id, tags in entries_ids.items():
|
||||||
|
if entry_id == entry["journal_entry_id"]:
|
||||||
|
event.entries_ids[contract_address][entry_id] = tags
|
||||||
|
|
||||||
|
if isinstance(event, FunctionCallCrawlJob):
|
||||||
|
for entry_id, tags in event.entries_tags.items():
|
||||||
|
if entry_id == entry["journal_entry_id"]:
|
||||||
|
event.entries_tags[entry_id] = tags
|
||||||
|
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def update_job_state_with_filters(
|
||||||
|
events: Union[List[EventCrawlJob], List[FunctionCallCrawlJob]],
|
||||||
|
address_filter: List[ChecksumAddress],
|
||||||
|
required_tags: List[str],
|
||||||
|
tags_to_add: List[str] = [],
|
||||||
|
tags_to_delete: List[str] = [],
|
||||||
|
) -> Union[List[EventCrawlJob], List[FunctionCallCrawlJob]]:
|
||||||
|
"""
|
||||||
|
Function that updates the state of the job in bugout.
|
||||||
|
"""
|
||||||
|
|
||||||
|
entries_ids_to_update: List[UUID] = []
|
||||||
|
|
||||||
|
### TODO: refactor this function
|
||||||
|
|
||||||
|
if len(tags_to_add) == 0 and len(tags_to_delete) == 0:
|
||||||
|
return events
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
# functions
|
||||||
|
if isinstance(event, EventCrawlJob):
|
||||||
|
for contract_address, entries_ids in event.entries_ids.items():
|
||||||
|
if address_filter and contract_address not in address_filter:
|
||||||
|
continue
|
||||||
|
for entry_id, tags in entries_ids.items():
|
||||||
|
if set(required_tags).issubset(set(tags)):
|
||||||
|
entries_ids_to_update.append(entry_id)
|
||||||
|
|
||||||
|
event.entries_ids[contract_address][entry_id].extend(
|
||||||
|
tags_to_add
|
||||||
|
)
|
||||||
|
|
||||||
|
# events
|
||||||
|
if isinstance(event, FunctionCallCrawlJob):
|
||||||
|
if address_filter and event.contract_address not in address_filter:
|
||||||
|
continue
|
||||||
|
for entry_id, tags in event.entries_tags.items():
|
||||||
|
if set(required_tags).issubset(set(tags)):
|
||||||
|
entries_ids_to_update.append(entry_id)
|
||||||
|
|
||||||
|
if len(entries_ids_to_update) == 0:
|
||||||
|
return events
|
||||||
|
|
||||||
|
new_entries_state = bugout_state_update(
|
||||||
|
entries_tags_add=[
|
||||||
|
{"journal_entry_id": entry_id, "tags": tags_to_add}
|
||||||
|
for entry_id in entries_ids_to_update
|
||||||
|
],
|
||||||
|
entries_tags_delete=[
|
||||||
|
{"journal_entry_id": entry_id, "tags": tags_to_delete}
|
||||||
|
for entry_id in entries_ids_to_update
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
events = update_job_tags(events, new_entries_state)
|
||||||
|
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def update_entries_status_and_proggress(
|
||||||
|
events: Union[List[EventCrawlJob], List[FunctionCallCrawlJob]],
|
||||||
|
progess_map: Dict[ChecksumAddress, float],
|
||||||
|
) -> Union[List[EventCrawlJob], List[FunctionCallCrawlJob]]:
|
||||||
|
"""
|
||||||
|
Update entries status and proggress in mooncrawl bugout journal
|
||||||
|
"""
|
||||||
|
|
||||||
|
entries_tags_delete = []
|
||||||
|
|
||||||
|
entries_tags_add = []
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
if isinstance(event, EventCrawlJob):
|
||||||
|
for contract_address, entries_ids in event.entries_ids.items():
|
||||||
|
proggress = int(progess_map.get(contract_address, 0)) * 100
|
||||||
|
|
||||||
|
for entry_id, tags in entries_ids.items():
|
||||||
|
# proggress
|
||||||
|
|
||||||
|
if (
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['historical_crawl_status']}:{HISTORICAL_CRAWLER_STATUSES['finished']}"
|
||||||
|
in tags
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries_tags_delete.append(
|
||||||
|
{
|
||||||
|
"journal_entry_id": entry_id,
|
||||||
|
"tags": [
|
||||||
|
tag
|
||||||
|
for tag in tags
|
||||||
|
if tag.startswith(
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['proggress']}"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
entries_tags_add.append(
|
||||||
|
{
|
||||||
|
"journal_entry_id": entry_id,
|
||||||
|
"tags": [
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['proggress']}:{proggress}"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if proggress >= 100:
|
||||||
|
entries_tags_add.append(
|
||||||
|
{
|
||||||
|
"journal_entry_id": entry_id,
|
||||||
|
"tags": [
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['historical_crawl_status']}:{HISTORICAL_CRAWLER_STATUSES['finished']}"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(event, FunctionCallCrawlJob):
|
||||||
|
proggress = int(progess_map.get(event.contract_address, 0)) * 100
|
||||||
|
|
||||||
|
for entry_id, tags in event.entries_tags.items():
|
||||||
|
if (
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['historical_crawl_status']}:{HISTORICAL_CRAWLER_STATUSES['finished']}"
|
||||||
|
in tags
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# proggress
|
||||||
|
entries_tags_delete.append(
|
||||||
|
{
|
||||||
|
"journal_entry_id": entry_id,
|
||||||
|
"tags": [
|
||||||
|
tag
|
||||||
|
for tag in tags
|
||||||
|
if tag.startswith(
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['proggress']}"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
entries_tags_add.append(
|
||||||
|
{
|
||||||
|
"journal_entry_id": entry_id,
|
||||||
|
"tags": [
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['proggress']}:{proggress}"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if proggress >= 100:
|
||||||
|
entries_tags_add.append(
|
||||||
|
{
|
||||||
|
"journal_entry_id": entry_id,
|
||||||
|
"tags": [
|
||||||
|
f"{HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES['historical_crawl_status']}:{HISTORICAL_CRAWLER_STATUSES['finished']}"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
new_entries_state = bugout_state_update(
|
||||||
|
entries_tags_add=entries_tags_add,
|
||||||
|
entries_tags_delete=entries_tags_delete,
|
||||||
|
)
|
||||||
|
|
||||||
|
events = update_job_tags(events, new_entries_state)
|
||||||
|
|
||||||
|
return events
|
||||||
|
|
|
@ -3,6 +3,7 @@ import time
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
|
from eth_typing.evm import ChecksumAddress
|
||||||
from moonstreamdb.blockchain import AvailableBlockchainType
|
from moonstreamdb.blockchain import AvailableBlockchainType
|
||||||
from moonworm.crawler.moonstream_ethereum_state_provider import ( # type: ignore
|
from moonworm.crawler.moonstream_ethereum_state_provider import ( # type: ignore
|
||||||
MoonstreamEthereumStateProvider,
|
MoonstreamEthereumStateProvider,
|
||||||
|
@ -11,7 +12,12 @@ from moonworm.crawler.networks import Network # type: ignore
|
||||||
from sqlalchemy.orm.session import Session
|
from sqlalchemy.orm.session import Session
|
||||||
from web3 import Web3
|
from web3 import Web3
|
||||||
|
|
||||||
from .crawler import EventCrawlJob, FunctionCallCrawlJob, _retry_connect_web3
|
from .crawler import (
|
||||||
|
EventCrawlJob,
|
||||||
|
FunctionCallCrawlJob,
|
||||||
|
_retry_connect_web3,
|
||||||
|
update_entries_status_and_proggress,
|
||||||
|
)
|
||||||
from .db import add_events_to_session, add_function_calls_to_session, commit_session
|
from .db import add_events_to_session, add_function_calls_to_session, commit_session
|
||||||
from .event_crawler import _crawl_events, _autoscale_crawl_events
|
from .event_crawler import _crawl_events, _autoscale_crawl_events
|
||||||
from .function_call_crawler import _crawl_functions
|
from .function_call_crawler import _crawl_functions
|
||||||
|
@ -31,6 +37,7 @@ def historical_crawler(
|
||||||
max_blocks_batch: int = 100,
|
max_blocks_batch: int = 100,
|
||||||
min_sleep_time: float = 0.1,
|
min_sleep_time: float = 0.1,
|
||||||
access_id: Optional[UUID] = None,
|
access_id: Optional[UUID] = None,
|
||||||
|
addresses_deployment_blocks: Optional[Dict[ChecksumAddress, int]] = None,
|
||||||
):
|
):
|
||||||
assert max_blocks_batch > 0, "max_blocks_batch must be greater than 0"
|
assert max_blocks_batch > 0, "max_blocks_batch must be greater than 0"
|
||||||
assert min_sleep_time > 0, "min_sleep_time must be greater than 0"
|
assert min_sleep_time > 0, "min_sleep_time must be greater than 0"
|
||||||
|
@ -60,6 +67,10 @@ def historical_crawler(
|
||||||
blocks_cache: Dict[int, int] = {}
|
blocks_cache: Dict[int, int] = {}
|
||||||
failed_count = 0
|
failed_count = 0
|
||||||
|
|
||||||
|
original_start_block = start_block
|
||||||
|
|
||||||
|
progess_map: Dict[ChecksumAddress, float] = {}
|
||||||
|
|
||||||
while start_block >= end_block:
|
while start_block >= end_block:
|
||||||
try:
|
try:
|
||||||
time.sleep(min_sleep_time)
|
time.sleep(min_sleep_time)
|
||||||
|
@ -119,6 +130,27 @@ def historical_crawler(
|
||||||
db_session, all_function_calls, blockchain_type
|
db_session, all_function_calls, blockchain_type
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if addresses_deployment_blocks:
|
||||||
|
for address, deployment_block in addresses_deployment_blocks.items():
|
||||||
|
current_position = end_block
|
||||||
|
|
||||||
|
progess = original_start_block - current_position / (
|
||||||
|
original_start_block - deployment_block
|
||||||
|
)
|
||||||
|
progess_map[address] = progess
|
||||||
|
|
||||||
|
if len(function_call_crawl_jobs) > 0:
|
||||||
|
function_call_crawl_jobs = update_entries_status_and_proggress( # type: ignore
|
||||||
|
events=function_call_crawl_jobs,
|
||||||
|
progess_map=progess_map,
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(event_crawl_jobs) > 0:
|
||||||
|
event_crawl_jobs = update_entries_status_and_proggress( # type: ignore
|
||||||
|
events=event_crawl_jobs,
|
||||||
|
progess_map=progess_map,
|
||||||
|
)
|
||||||
|
|
||||||
# Commiting to db
|
# Commiting to db
|
||||||
commit_session(db_session)
|
commit_session(db_session)
|
||||||
|
|
||||||
|
|
|
@ -279,3 +279,26 @@ infura_networks = {
|
||||||
BUGOUT_RESOURCE_TYPE_SUBSCRIPTION = "subscription"
|
BUGOUT_RESOURCE_TYPE_SUBSCRIPTION = "subscription"
|
||||||
BUGOUT_RESOURCE_TYPE_ENTITY_SUBSCRIPTION = "entity_subscription"
|
BUGOUT_RESOURCE_TYPE_ENTITY_SUBSCRIPTION = "entity_subscription"
|
||||||
BUGOUT_RESOURCE_TYPE_DASHBOARD = "dashboards"
|
BUGOUT_RESOURCE_TYPE_DASHBOARD = "dashboards"
|
||||||
|
|
||||||
|
|
||||||
|
# Historical crawler status config
|
||||||
|
|
||||||
|
HISTORICAL_CRAWLER_STATUSES = {
|
||||||
|
"pending": "pending",
|
||||||
|
"running": "running",
|
||||||
|
"finished": "finished",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Historical crawler moonworm status config
|
||||||
|
|
||||||
|
HISTORICAL_CRAWLER_MOONWORM_STATUSES = {
|
||||||
|
"pickedup": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Statuses tags prefixes
|
||||||
|
|
||||||
|
HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES = {
|
||||||
|
"moonworm_status": "moonworm_task_pickedup",
|
||||||
|
"historical_crawl_status": "historical_crawl_status",
|
||||||
|
"progress_status": "progress",
|
||||||
|
}
|
||||||
|
|
Ładowanie…
Reference in New Issue