kopia lustrzana https://github.com/bugout-dev/moonstream
Add insert for v3 and fix jobs defenitions.
rodzic
05bd67a350
commit
c6f40085f3
|
@ -6,7 +6,11 @@ from urllib.parse import urlparse, urlunparse
|
||||||
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from moonstreamdbv3.db import MoonstreamDBEngine, MoonstreamDBIndexesEngine
|
from moonstreamdbv3.db import (
|
||||||
|
MoonstreamDBEngine,
|
||||||
|
MoonstreamDBIndexesEngine,
|
||||||
|
MoonstreamCustomDBEngine,
|
||||||
|
)
|
||||||
from moonstreamtypes.blockchain import AvailableBlockchainType
|
from moonstreamtypes.blockchain import AvailableBlockchainType
|
||||||
from moonstreamtypes.subscriptions import blockchain_type_to_subscription_type
|
from moonstreamtypes.subscriptions import blockchain_type_to_subscription_type
|
||||||
from web3 import Web3
|
from web3 import Web3
|
||||||
|
@ -160,8 +164,7 @@ def get_db_connection(uuid):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(f"Unhandled exception, error: {str(e)}")
|
raise Exception(f"Unhandled exception, error: {str(e)}")
|
||||||
|
|
||||||
connection_string = response.text.strip('"')
|
connection_string = response.text
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connection_string = ensure_port_in_connection_string(connection_string)
|
connection_string = ensure_port_in_connection_string(connection_string)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
@ -173,11 +176,18 @@ def get_db_connection(uuid):
|
||||||
|
|
||||||
|
|
||||||
def ensure_port_in_connection_string(connection_string):
|
def ensure_port_in_connection_string(connection_string):
|
||||||
|
# Parse the connection string into components
|
||||||
parsed_url = urlparse(connection_string)
|
parsed_url = urlparse(connection_string)
|
||||||
|
|
||||||
|
# Check if a port is specified, and if not, add the default port
|
||||||
if parsed_url.port is None:
|
if parsed_url.port is None:
|
||||||
host = f"{parsed_url.hostname}:5432" # Assuming default port for PostgreSQL
|
# Append default port 5432 for PostgreSQL if no port specified
|
||||||
parsed_url = parsed_url._replace(netloc=host)
|
|
||||||
connection_string = urlunparse(parsed_url)
|
connection_string = connection_string.replace(
|
||||||
|
"/" + connection_string.split("/")[-1],
|
||||||
|
f":5432" + "/" + connection_string.split("/")[-1],
|
||||||
|
)
|
||||||
|
connection_string = connection_string.replace('"', "")
|
||||||
return connection_string
|
return connection_string
|
||||||
|
|
||||||
|
|
||||||
|
@ -529,7 +539,9 @@ def handle_historical_crawl_v3(args: argparse.Namespace) -> None:
|
||||||
|
|
||||||
customer_connection = get_db_connection(args.customer_uuid)
|
customer_connection = get_db_connection(args.customer_uuid)
|
||||||
|
|
||||||
filtered_function_call_jobs = [] # v1
|
if customer_connection == "":
|
||||||
|
raise ValueError("No connection string found for the customer")
|
||||||
|
|
||||||
if args.only_events:
|
if args.only_events:
|
||||||
filtered_function_call_jobs = []
|
filtered_function_call_jobs = []
|
||||||
logger.info(f"Removing function call crawl jobs since --only-events is set")
|
logger.info(f"Removing function call crawl jobs since --only-events is set")
|
||||||
|
@ -555,7 +567,7 @@ def handle_historical_crawl_v3(args: argparse.Namespace) -> None:
|
||||||
|
|
||||||
logger.info(f"Blockchain type: {blockchain_type.value}")
|
logger.info(f"Blockchain type: {blockchain_type.value}")
|
||||||
|
|
||||||
customer_engine = MoonstreamDBEngine()
|
customer_engine = MoonstreamCustomDBEngine(customer_connection)
|
||||||
|
|
||||||
with customer_engine.yield_db_session_ctx() as db_session:
|
with customer_engine.yield_db_session_ctx() as db_session:
|
||||||
web3: Optional[Web3] = None
|
web3: Optional[Web3] = None
|
||||||
|
|
|
@ -135,7 +135,6 @@ def continuous_crawler(
|
||||||
evm_state_provider = Web3StateProvider(web3)
|
evm_state_provider = Web3StateProvider(web3)
|
||||||
|
|
||||||
if version == 2:
|
if version == 2:
|
||||||
|
|
||||||
evm_state_provider = MoonstreamEthereumStateProvider(
|
evm_state_provider = MoonstreamEthereumStateProvider(
|
||||||
web3,
|
web3,
|
||||||
network, # type: ignore
|
network, # type: ignore
|
||||||
|
|
|
@ -2,7 +2,7 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import binascii
|
import binascii
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
@ -16,7 +16,7 @@ from moonstreamdb.subscriptions import SubscriptionTypes
|
||||||
from moonstreamtypes.subscriptions import SubscriptionTypes
|
from moonstreamtypes.subscriptions import SubscriptionTypes
|
||||||
from moonstreamdbv3.models_indexes import AbiJobs
|
from moonstreamdbv3.models_indexes import AbiJobs
|
||||||
from moonworm.deployment import find_deployment_block # type: ignore
|
from moonworm.deployment import find_deployment_block # type: ignore
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func, cast, JSON
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from web3.main import Web3
|
from web3.main import Web3
|
||||||
|
|
||||||
|
@ -139,6 +139,7 @@ class FunctionCallCrawlJob:
|
||||||
contract_address: ChecksumAddress
|
contract_address: ChecksumAddress
|
||||||
entries_tags: Dict[UUID, List[str]]
|
entries_tags: Dict[UUID, List[str]]
|
||||||
created_at: int
|
created_at: int
|
||||||
|
existing_selectors: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
def get_crawl_job_entries(
|
def get_crawl_job_entries(
|
||||||
|
@ -778,7 +779,11 @@ def get_function_call_crawl_job_records(
|
||||||
query = (
|
query = (
|
||||||
db_session.query(AbiJobs)
|
db_session.query(AbiJobs)
|
||||||
.filter(AbiJobs.chain == blockchain_type.value)
|
.filter(AbiJobs.chain == blockchain_type.value)
|
||||||
.filter(func.length(AbiJobs.abi_selector) == 8)
|
.filter(func.length(AbiJobs.abi_selector) == 10)
|
||||||
|
.filter(
|
||||||
|
cast(AbiJobs.abi, JSON).op("->>")("type") == "function",
|
||||||
|
cast(AbiJobs.abi, JSON).op("->>")("stateMutability") != "view",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(addresses) != 0:
|
if len(addresses) != 0:
|
||||||
|
@ -790,30 +795,31 @@ def get_function_call_crawl_job_records(
|
||||||
|
|
||||||
# Iterate over each record fetched from the database
|
# Iterate over each record fetched from the database
|
||||||
for crawl_job_record in crawl_job_records:
|
for crawl_job_record in crawl_job_records:
|
||||||
str_address = crawl_job_record.address.hex() # Convert address to hex string
|
str_address = "0x" + crawl_job_record.address.hex()
|
||||||
|
|
||||||
if crawl_job_record.abi_selector in existing_crawl_job_records:
|
if str_address not in existing_crawl_job_records:
|
||||||
# If abi_selector already exists in the records, append new address if not already listed
|
existing_crawl_job_records[str_address] = FunctionCallCrawlJob(
|
||||||
current_job = existing_crawl_job_records[crawl_job_record.abi_selector]
|
|
||||||
if str_address not in current_job.contract_address:
|
|
||||||
# Since it should handle a single address, we ensure not to append but update if necessary.
|
|
||||||
current_job.contract_address = str_address
|
|
||||||
else:
|
|
||||||
# Create a new FunctionCallCrawlJob record if the abi_selector is not found in existing records.
|
|
||||||
new_crawl_job = FunctionCallCrawlJob(
|
|
||||||
contract_abi=[json.loads(str(crawl_job_record.abi))],
|
contract_abi=[json.loads(str(crawl_job_record.abi))],
|
||||||
contract_address=str_address,
|
contract_address=Web3.toChecksumAddress(str_address),
|
||||||
entries_tags={
|
entries_tags={
|
||||||
UUID(str(crawl_job_record.id)): [
|
UUID(str(crawl_job_record.id)): [
|
||||||
str(crawl_job_record.status),
|
str(crawl_job_record.status),
|
||||||
str(crawl_job_record.progress),
|
str(crawl_job_record.progress),
|
||||||
],
|
]
|
||||||
},
|
},
|
||||||
created_at=int(crawl_job_record.created_at.timestamp()),
|
created_at=int(crawl_job_record.created_at.timestamp()),
|
||||||
|
existing_selectors=[str(crawl_job_record.abi_selector)],
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
existing_crawl_job_records[str(crawl_job_record.abi_selector)] = (
|
if (
|
||||||
new_crawl_job
|
crawl_job_record.abi_selector
|
||||||
)
|
not in existing_crawl_job_records[str_address].existing_selectors
|
||||||
|
):
|
||||||
|
existing_crawl_job_records[str_address].contract_abi.append(
|
||||||
|
json.loads(str(crawl_job_record.abi))
|
||||||
|
)
|
||||||
|
existing_crawl_job_records[str_address].existing_selectors.append(
|
||||||
|
str(crawl_job_record.abi_selector)
|
||||||
|
)
|
||||||
|
|
||||||
return existing_crawl_job_records
|
return existing_crawl_job_records
|
||||||
|
|
|
@ -91,7 +91,7 @@ def _function_call_to_label(
|
||||||
).replace(r"\u0000", "")
|
).replace(r"\u0000", "")
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(blockchain_type, AvailableBlockchainType):
|
if db_version == 2:
|
||||||
|
|
||||||
label = label_model(
|
label = label_model(
|
||||||
label=label_name,
|
label=label_name,
|
||||||
|
@ -102,6 +102,26 @@ def _function_call_to_label(
|
||||||
block_timestamp=function_call.block_timestamp,
|
block_timestamp=function_call.block_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
del sanityzed_label_data["type"]
|
||||||
|
del sanityzed_label_data["name"]
|
||||||
|
|
||||||
|
label = label_model(
|
||||||
|
label=label_name,
|
||||||
|
label_name=function_call.function_name,
|
||||||
|
label_data=sanityzed_label_data,
|
||||||
|
address=function_call.contract_address,
|
||||||
|
block_number=function_call.block_number,
|
||||||
|
block_hash=function_call.block_hash.hex(), # type: ignore
|
||||||
|
transaction_hash=function_call.transaction_hash,
|
||||||
|
block_timestamp=function_call.block_timestamp,
|
||||||
|
caller_address=function_call.caller_address,
|
||||||
|
origin_address=function_call.caller_address,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(label)
|
||||||
|
|
||||||
return label
|
return label
|
||||||
|
|
||||||
|
|
||||||
|
@ -278,35 +298,84 @@ def add_function_calls_to_session(
|
||||||
if len(function_calls) == 0:
|
if len(function_calls) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
label_model = get_label_model(blockchain_type, version=db_version)
|
if db_version == 2:
|
||||||
|
|
||||||
transactions_hashes_to_save = list(
|
label_model = get_label_model(blockchain_type, version=db_version)
|
||||||
set([function_call.transaction_hash for function_call in function_calls])
|
|
||||||
)
|
|
||||||
|
|
||||||
# Define a CTE VALUES expression to escape big IN clause
|
transactions_hashes_to_save = list(
|
||||||
hashes_cte = select(
|
set([function_call.transaction_hash for function_call in function_calls])
|
||||||
values(column("transaction_hash", String), name="hashes").data(
|
|
||||||
[(hash,) for hash in transactions_hashes_to_save]
|
|
||||||
)
|
)
|
||||||
).cte()
|
|
||||||
|
|
||||||
# Retrieve existing transaction hashes
|
# Define a CTE VALUES expression to escape big IN clause
|
||||||
query = db_session.query(
|
hashes_cte = select(
|
||||||
label_model.transaction_hash.label("transaction_hash")
|
values(column("transaction_hash", String), name="hashes").data(
|
||||||
).filter(
|
[(hash,) for hash in transactions_hashes_to_save]
|
||||||
label_model.label == label_name,
|
)
|
||||||
label_model.log_index.is_(None),
|
).cte()
|
||||||
exists().where(label_model.transaction_hash == hashes_cte.c.transaction_hash),
|
|
||||||
)
|
|
||||||
|
|
||||||
existing_tx_hashes = [row.transaction_hash for row in query]
|
# Retrieve existing transaction hashes
|
||||||
|
query = db_session.query(
|
||||||
|
label_model.transaction_hash.label("transaction_hash")
|
||||||
|
).filter(
|
||||||
|
label_model.label == label_name,
|
||||||
|
label_model.log_index.is_(None),
|
||||||
|
exists().where(
|
||||||
|
label_model.transaction_hash == hashes_cte.c.transaction_hash
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
labels_to_save = [
|
existing_tx_hashes = [row.transaction_hash for row in query]
|
||||||
_function_call_to_label(blockchain_type, function_call)
|
|
||||||
for function_call in function_calls
|
|
||||||
if function_call.transaction_hash not in existing_tx_hashes
|
|
||||||
]
|
|
||||||
|
|
||||||
logger.info(f"Saving {len(labels_to_save)} labels to session")
|
labels_to_save = [
|
||||||
db_session.add_all(labels_to_save)
|
_function_call_to_label(blockchain_type, function_call)
|
||||||
|
for function_call in function_calls
|
||||||
|
if function_call.transaction_hash not in existing_tx_hashes
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"Saving {len(labels_to_save)} labels to session")
|
||||||
|
db_session.add_all(labels_to_save)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
label_model = get_label_model(blockchain_type, version=db_version)
|
||||||
|
|
||||||
|
# Define the table name and columns based on the blockchain type
|
||||||
|
table = label_model.__table__
|
||||||
|
|
||||||
|
# Create a list of dictionaries representing new records
|
||||||
|
records = []
|
||||||
|
for function_call in function_calls:
|
||||||
|
|
||||||
|
label_function_call = _function_call_to_label(
|
||||||
|
blockchain_type, function_call, db_version
|
||||||
|
)
|
||||||
|
|
||||||
|
record = {
|
||||||
|
"label": label_function_call.label,
|
||||||
|
"transaction_hash": label_function_call.transaction_hash,
|
||||||
|
"log_index": None,
|
||||||
|
"block_number": label_function_call.block_number,
|
||||||
|
"block_hash": None,
|
||||||
|
"block_timestamp": label_function_call.block_timestamp,
|
||||||
|
"caller_address": label_function_call.label_data["caller"],
|
||||||
|
"origin_address": None,
|
||||||
|
"address": label_function_call.address,
|
||||||
|
"label_name": label_function_call.label_name,
|
||||||
|
"label_type": "tx_call",
|
||||||
|
"label_data": label_function_call.label_data,
|
||||||
|
}
|
||||||
|
|
||||||
|
records.append(record)
|
||||||
|
|
||||||
|
# Insert records using a single batched query with an ON CONFLICT clause
|
||||||
|
statement = insert(table).values(records)
|
||||||
|
do_nothing_statement = statement.on_conflict_do_nothing(
|
||||||
|
index_elements=["transaction_hash", "log_index"],
|
||||||
|
index_where=(table.c.label == "seer") & (table.c.label_type == "tx_call"),
|
||||||
|
)
|
||||||
|
|
||||||
|
db_session.execute(do_nothing_statement)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Batch inserted {len(records)} function call labels into {table.name}"
|
||||||
|
)
|
||||||
|
|
|
@ -43,6 +43,7 @@ def _crawl_functions(
|
||||||
"function_call", blockchain_type
|
"function_call", blockchain_type
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
print(f"Processing job {function_call_crawler.whitelisted_methods}")
|
||||||
function_call_crawler.crawl(
|
function_call_crawler.crawl(
|
||||||
from_block,
|
from_block,
|
||||||
to_block,
|
to_block,
|
||||||
|
|
Ładowanie…
Reference in New Issue