Merge pull request #1163 from moonstream-to/metadata-crawler-update

Metadata crawler update
pull/1164/head
Andrey Dolgolev 2025-01-15 12:21:00 +02:00 zatwierdzone przez GitHub
commit d8c20bd503
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
17 zmienionych plików z 902 dodań i 191 usunięć

Wyświetl plik

@ -63,6 +63,15 @@ XAI_SEPOLIA_STATE_CLEAN_TIMER_FILE="xai-sepolia-state-clean.timer"
XAI_SEPOLIA_METADATA_SERVICE_FILE="xai-sepolia-metadata.service" XAI_SEPOLIA_METADATA_SERVICE_FILE="xai-sepolia-metadata.service"
XAI_SEPOLIA_METADATA_TIMER_FILE="xai-sepolia-metadata.timer" XAI_SEPOLIA_METADATA_TIMER_FILE="xai-sepolia-metadata.timer"
# Game7
GAME7_METADATA_SERVICE_FILE="game7-metadata.service"
GAME7_METADATA_TIMER_FILE="game7-metadata.timer"
# Game7 testnet
GAME7_TESTNET_METADATA_SERVICE_FILE="game7-testnet-metadata.service"
GAME7_TESTNET_METADATA_TIMER_FILE="game7-testnet-metadata.timer"
set -eu set -eu
echo echo
@ -230,3 +239,21 @@ cp "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/sy
cp "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${XAI_SEPOLIA_METADATA_TIMER_FILE}" cp "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${XAI_SEPOLIA_METADATA_TIMER_FILE}"
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${XAI_SEPOLIA_METADATA_TIMER_FILE}" XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${XAI_SEPOLIA_METADATA_TIMER_FILE}"
echo
echo
echo -e "${PREFIX_INFO} Replacing existing Game7 metadata service and timer with: ${GAME7_METADATA_SERVICE_FILE}, ${GAME7_METADATA_TIMER_FILE}"
chmod 644 "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}"
cp "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_SERVICE_FILE}"
cp "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_TIMER_FILE}"
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_METADATA_TIMER_FILE}"
echo
echo
echo -e "${PREFIX_INFO} Replacing existing Game7 testnet metadata service and timer with: ${GAME7_TESTNET_METADATA_SERVICE_FILE}, ${GAME7_TESTNET_METADATA_TIMER_FILE}"
chmod 644 "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}"
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_SERVICE_FILE}"
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_TIMER_FILE}"
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_TESTNET_METADATA_TIMER_FILE}"

Wyświetl plik

@ -217,6 +217,14 @@ MANTLE_SEPOLIA_HISTORICAL_CRAWL_EVENTS_TIMER_FILE="mantle-sepolia-historical-cra
MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_SERVICE_FILE="mantle-sepolia-historical-crawl-transactions.service" MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_SERVICE_FILE="mantle-sepolia-historical-crawl-transactions.service"
MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE="mantle-sepolia-historical-crawl-transactions.timer" MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE="mantle-sepolia-historical-crawl-transactions.timer"
# Game7
GAME7_METADATA_SERVICE_FILE="game7-metadata.service"
GAME7_METADATA_TIMER_FILE="game7-metadata.timer"
# Game7 testnet
GAME7_TESTNET_METADATA_SERVICE_FILE="game7-testnet-metadata.service"
GAME7_TESTNET_METADATA_TIMER_FILE="game7-testnet-metadata.timer"
set -eu set -eu
echo echo
@ -1109,3 +1117,24 @@ cp "${SCRIPT_DIR}/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_SERVICE_FILE}"
cp "${SCRIPT_DIR}/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}" cp "${SCRIPT_DIR}/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}"
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}" XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}"
# Game7
echo
echo
echo -e "${PREFIX_INFO} Replacing existing Game7 metadata service and timer with: ${GAME7_METADATA_SERVICE_FILE}, ${GAME7_METADATA_TIMER_FILE}"
chmod 644 "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}"
cp "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_SERVICE_FILE}"
cp "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_TIMER_FILE}"
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_METADATA_TIMER_FILE}"
# Game7 testnet
echo
echo
echo -e "${PREFIX_INFO} Replacing existing Game7 testnet metadata service and timer with: ${GAME7_TESTNET_METADATA_SERVICE_FILE}, ${GAME7_TESTNET_METADATA_TIMER_FILE}"
chmod 644 "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}"
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_SERVICE_FILE}"
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_TIMER_FILE}"
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_TESTNET_METADATA_TIMER_FILE}"

Wyświetl plik

@ -0,0 +1,11 @@
[Unit]
Description=Execute metadata crawler
After=network.target
[Service]
Type=oneshot
WorkingDirectory=/home/ubuntu/moonstream/crawlers/mooncrawl
EnvironmentFile=/home/ubuntu/moonstream-secrets/app.env
ExecStart=/home/ubuntu/moonstream-env/bin/python -m mooncrawl.metadata_crawler.cli crawl --blockchain game7
CPUWeight=60
SyslogIdentifier=game7-metadata

Wyświetl plik

@ -0,0 +1,9 @@
[Unit]
Description=Execute Game7 metadata crawler each 10m
[Timer]
OnBootSec=20s
OnUnitActiveSec=60m
[Install]
WantedBy=timers.target

Wyświetl plik

@ -0,0 +1,11 @@
[Unit]
Description=Execute metadata crawler
After=network.target
[Service]
Type=oneshot
WorkingDirectory=/home/ubuntu/moonstream/crawlers/mooncrawl
EnvironmentFile=/home/ubuntu/moonstream-secrets/app.env
ExecStart=/home/ubuntu/moonstream-env/bin/python -m mooncrawl.metadata_crawler.cli crawl --blockchain game7_testnet
CPUWeight=60
SyslogIdentifier=game7-testnet-metadata

Wyświetl plik

@ -0,0 +1,9 @@
[Unit]
Description=Execute Game7 testnet metadata crawler each 10m
[Timer]
OnBootSec=20s
OnUnitActiveSec=60m
[Install]
WantedBy=timers.target

Wyświetl plik

@ -116,6 +116,7 @@ def recive_S3_data_from_query(
client: Moonstream, client: Moonstream,
token: Union[str, uuid.UUID], token: Union[str, uuid.UUID],
query_name: str, query_name: str,
query_params: Dict[str, Any] = {},
params: Dict[str, Any] = {}, params: Dict[str, Any] = {},
time_await: int = 2, time_await: int = 2,
max_retries: int = 30, max_retries: int = 30,
@ -136,15 +137,18 @@ def recive_S3_data_from_query(
if custom_body: if custom_body:
headers = { headers = {
"Authorization": f"Bearer {token}", "Authorization": f"Bearer {token}",
"Content-Type": "application/json",
} }
json = custom_body json = custom_body
response = requests.post( response = requests.post(
url=f"{client.api.endpoints[ENDPOINT_QUERIES]}/{query_name}/update_data", url=f"{client.api.endpoints[ENDPOINT_QUERIES]}/{query_name}/update_data",
headers=headers, headers=headers,
params=query_params,
json=json, json=json,
timeout=5, timeout=5,
) )
data_url = MoonstreamQueryResultUrl(url=response.json()["url"]) data_url = MoonstreamQueryResultUrl(url=response.json()["url"])
else: else:
data_url = client.exec_query( data_url = client.exec_query(
@ -226,3 +230,27 @@ def get_customer_db_uri(
except Exception as e: except Exception as e:
logger.error(f"Error get customer db uri: {str(e)}") logger.error(f"Error get customer db uri: {str(e)}")
raise MoonstreamHTTPException(status_code=500, internal_error=e) raise MoonstreamHTTPException(status_code=500, internal_error=e)
## DB V3
def request_connection_string(
customer_id: str,
instance_id: int,
token: str,
user: str = "seer", # token with write access
) -> str:
"""
Request connection string from the Moonstream DB V3 Controller API.
Default user is seer with write access
"""
response = requests.get(
f"{MOONSTREAM_DB_V3_CONTROLLER_API}/customers/{customer_id}/instances/{instance_id}/creds/{user}/url",
headers={"Authorization": f"Bearer {token}"},
)
response.raise_for_status()
return response.text.replace('"', "")

Wyświetl plik

@ -13,12 +13,7 @@ import boto3 # type: ignore
from bugout.data import BugoutJournalEntity, BugoutResource from bugout.data import BugoutJournalEntity, BugoutResource
from fastapi import BackgroundTasks, FastAPI from fastapi import BackgroundTasks, FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from moonstreamdb.blockchain import ( from moonstreamtypes.blockchain import AvailableBlockchainType, get_block_model, get_label_model, get_transaction_model
AvailableBlockchainType,
get_block_model,
get_label_model,
get_transaction_model,
)
from sqlalchemy import text from sqlalchemy import text
from . import data from . import data
@ -232,6 +227,11 @@ async def queries_data_update_handler(
requested_query = request_data.query requested_query = request_data.query
version = 2
if request_data.customer_id and request_data.instance_id:
version = 3
blockchain_table = "polygon_labels" blockchain_table = "polygon_labels"
if request_data.blockchain: if request_data.blockchain:
if request_data.blockchain not in [i.value for i in AvailableBlockchainType]: if request_data.blockchain not in [i.value for i in AvailableBlockchainType]:
@ -240,22 +240,23 @@ async def queries_data_update_handler(
blockchain = AvailableBlockchainType(request_data.blockchain) blockchain = AvailableBlockchainType(request_data.blockchain)
requested_query = ( blockchain_table = get_label_model(blockchain, version).__tablename__
requested_query.replace( requested_query = requested_query.replace(
"__transactions_table__",
get_transaction_model(blockchain).__tablename__,
)
.replace(
"__blocks_table__",
get_block_model(blockchain).__tablename__,
)
.replace(
"__labels_table__", "__labels_table__",
get_label_model(blockchain).__tablename__, blockchain_table
)
) )
if version == 2:
(
requested_query.replace(
"__transactions_table__",
get_transaction_model(blockchain).__tablename__,
)
.replace(
"__blocks_table__",
get_block_model(blockchain).__tablename__,
)
blockchain_table = get_label_model(blockchain).__tablename__ )
# Check if it can transform to TextClause # Check if it can transform to TextClause
try: try:

Wyświetl plik

@ -60,6 +60,7 @@ class TokenURIs(BaseModel):
block_number: str block_number: str
block_timestamp: str block_timestamp: str
address: str address: str
block_hash: Optional[str] = None # for v3 only
class ViewTasks(BaseModel): class ViewTasks(BaseModel):

Wyświetl plik

@ -0,0 +1,180 @@
# Metadata Crawler Architecture
## Overview
The metadata crawler is designed to fetch and store metadata for NFTs (Non-Fungible Tokens) from various blockchains. It supports both traditional database TokenURI view methods queries and Spire journal-based job configurations, with the ability to handle both v2 and v3 database structures.
## Core Components
### 1. Update Strategies
#### Leak-Based Strategy (Legacy v2)
- Uses probabilistic approach to determine which tokens to update
- Controlled by `max_recrawl` parameter
- Suitable for large collections with infrequent updates
#### SQL-Based Strategy (v3)
- Uses SQL queries to determine which tokens need updates
- More precise tracking of token updates
- Better suited for active collections
### 2. Database Connections
The crawler supports multiple database connection strategies:
- Default Moonstream database connection
- Custom database URI via `--custom-db-uri`
- Per-customer instance connections (v3)
```json
{
"customer_id": "...",
"instance_id": "...",
"blockchain": "ethereum",
"v3": true
}
```
### 3. Job Configuration
Jobs can be configured in two ways:
- Through Spire journal entries with tags `#metadata-job #{blockchain}`
- Direct database queries (legacy mode) using TokenURI view method
Example Spire journal entry:
```json
{
"type": "metadata-job",
"query_api": {
"name": "new_tokens_to_crawl",
"params": {
"address": "0x...",
"blockchain": "ethereum"
}
},
"contract_address": "0x...",
"blockchain": "ethereum",
"update_existing": false,
"v3": true,
"customer_id": "...", // Optional, for custom database
"instance_id": "..." // Optional, for custom database
}
```
### 2. Data Flow
1. **Token Discovery**
- Query API integration for dynamic token discovery
- Database queries for existing tokens
- Support for multiple addresses per job
2. **Metadata Fetching**
- Parallel processing with ThreadPoolExecutor
- IPFS gateway support
- Automatic retry mechanism
- Rate limiting and batch processing
3. **Storage**
- Supports both v2 and v3 database structures
- Batch upsert operations
- Efficient cleaning of old labels
### 3. Database Structures
v2:
```python
{
"label": METADATA_CRAWLER_LABEL,
"label_data": {
"type": "metadata",
"token_id": "...",
"metadata": {...}
},
"block_number": 1234567890
"block_timestamp": 456
}
```
v3:
```python
{
"label": METADATA_CRAWLER_LABEL,
"label_type": "metadata",
"label_data": {
"token_id": "...",
"metadata": {...}
},
"address": "0x...",
"block_number": 123,
"block_timestamp": 456,
"block_hash": "0x..."
}
```
## Key Features
1. **Flexible Token Selection**
- Query API integration
- Support for multiple addresses
- Configurable update strategies
2. **Efficient Processing**
- Batch processing
- Parallel metadata fetching
- Optimized database operations
3. **Error Handling**
- Retry mechanism for failed requests
- Transaction management
- Detailed logging
4. **Database Management**
- Efficient upsert operations
- Label cleaning
- Version compatibility (v2/v3)
## Usage
### CLI Options
```bash
metadata-crawler crawl \
--blockchain ethereum \
--commit-batch-size 50 \
--max-recrawl 300 \
--threads 4 \
--spire true \
--custom-db-uri "postgresql://..." # Optional
```
### Environment Variables
- `MOONSTREAM_ADMIN_ACCESS_TOKEN`: Required for API access
- `METADATA_CRAWLER_LABEL`: Label for database entries
- `METADATA_TASKS_JOURNAL_ID`: Journal ID for metadata tasks
### Database Modes
1. **Legacy Mode (v2)**
- Uses leak-based update strategy
- Single database connection
- Simple metadata structure
2. **Modern Mode (v3)**
- SQL-based update tracking
- Support for multiple database instances
- Enhanced metadata structure
- Per-customer database isolation
## Best Practices
1. **Job Configuration**
- Use descriptive job names
- Group related addresses
- Set appropriate update intervals
2. **Performance Optimization**
- Adjust batch sizes based on network conditions
- Monitor thread count vs. performance
- Use appropriate IPFS gateways
3. **Maintenance**
- Regular cleaning of old labels
- Monitor database size
- Check for failed metadata fetches

Wyświetl plik

@ -3,21 +3,32 @@ import json
import logging import logging
import random import random
import urllib.request import urllib.request
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Dict, List, Optional from sqlalchemy.orm import Session
from typing import Any, Dict, List, Optional, Tuple
from urllib.error import HTTPError from urllib.error import HTTPError
from moonstreamdb.blockchain import AvailableBlockchainType from bugout.exceptions import BugoutResponseException
from moonstreamtypes.blockchain import AvailableBlockchainType
from moonstreamdb.blockchain import AvailableBlockchainType as AvailableBlockchainTypeV2
from ..db import yield_db_preping_session_ctx, yield_db_read_only_preping_session_ctx
from ..actions import get_all_entries_from_search, request_connection_string
from ..settings import MOONSTREAM_ADMIN_ACCESS_TOKEN, MOONSTREAM_METADATA_TASKS_JOURNAL, MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN
from ..db import yield_db_preping_session_ctx, yield_db_read_only_preping_session_ctx, create_moonstream_engine, sessionmaker
from ..data import TokenURIs
from .db import ( from .db import (
clean_labels_from_db, clean_labels_from_db,
get_current_metadata_for_address, get_current_metadata_for_address,
get_tokens_id_wich_may_updated, get_tokens_id_wich_may_updated,
get_uris_of_tokens, get_uris_of_tokens,
metadata_to_label, metadata_to_label,
get_tokens_to_crawl,
upsert_metadata_labels,
) )
from ..settings import moonstream_client as mc
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -50,7 +61,6 @@ def crawl_uri(metadata_uri: str) -> Any:
result = None result = None
while retry < 3: while retry < 3:
try: try:
if metadata_uri.startswith("ipfs://"): if metadata_uri.startswith("ipfs://"):
metadata_uri = metadata_uri.replace( metadata_uri = metadata_uri.replace(
"ipfs://", "https://ipfs.io/ipfs/", 1 "ipfs://", "https://ipfs.io/ipfs/", 1
@ -61,10 +71,7 @@ def crawl_uri(metadata_uri: str) -> Any:
response = urllib.request.urlopen(req, timeout=10) response = urllib.request.urlopen(req, timeout=10)
if ( if metadata_uri.startswith("data:application/json") or response.status == 200:
metadata_uri.startswith("data:application/json")
or response.status == 200
):
result = json.loads(response.read()) result = json.loads(response.read())
break break
retry += 1 retry += 1
@ -72,6 +79,8 @@ def crawl_uri(metadata_uri: str) -> Any:
except HTTPError as error: except HTTPError as error:
logger.error(f"request end with error statuscode: {error.code}") logger.error(f"request end with error statuscode: {error.code}")
retry += 1 retry += 1
if error.code == 404:
return None
continue continue
except Exception as err: except Exception as err:
logger.error(err) logger.error(err)
@ -81,167 +90,329 @@ def crawl_uri(metadata_uri: str) -> Any:
return result return result
def process_address_metadata_with_leak(
address: str,
blockchain_type: AvailableBlockchainType,
batch_size: int,
max_recrawl: int,
threads: int,
tokens: List[TokenURIs],
) -> None:
"""
Process metadata for a single address with v3 support
"""
with yield_db_read_only_preping_session_ctx() as db_session_read_only:
try:
already_parsed = get_current_metadata_for_address(
db_session=db_session_read_only,
blockchain_type=blockchain_type,
address=address,
)
maybe_updated = get_tokens_id_wich_may_updated(
db_session=db_session_read_only,
blockchain_type=blockchain_type,
address=address,
)
except Exception as err:
logger.warning(f"Error while getting metadata state for address {address}: {err}")
return
with yield_db_preping_session_ctx() as db_session:
try:
logger.info(f"Starting to crawl metadata for address: {address}")
logger.info(f"Maybe updated: {len(maybe_updated)}")
# Calculate how many tokens we can 'leak' so total recrawled (maybe_updated + leaked) <= max_recrawl
num_already_parsed = len(already_parsed)
num_maybe_updated = len(maybe_updated)
free_spots = max(0, max_recrawl - num_maybe_updated)
if num_already_parsed > 0 and free_spots > 0:
leak_rate = free_spots / num_already_parsed
else:
leak_rate = 0
logger.info(
f"Leak rate: {leak_rate} for {address} with maybe updated {len(maybe_updated)}"
)
# TODO: Fully random leak is not correct, we should leak based on created_at
parsed_with_leak = leak_of_crawled_uri(
already_parsed, leak_rate, maybe_updated
)
logger.info(f"Already parsed: {len(already_parsed)} for {address}")
logger.info(f"Amount of tokens to parse: {len(tokens)} for {address}")
# Remove already parsed tokens
new_tokens = [
token for token in tokens
if token.token_id not in parsed_with_leak
]
for requests_chunk in [
new_tokens[i : i + batch_size]
for i in range(0, len(new_tokens), batch_size)
]:
metadata_batch = []
try:
# Gather all metadata in parallel
with ThreadPoolExecutor(max_workers=threads) as executor:
future_to_token = {
executor.submit(crawl_uri, token.token_uri): token
for token in requests_chunk
}
for future in as_completed(future_to_token):
token = future_to_token[future]
try:
metadata = future.result(timeout=10)
if metadata:
metadata_batch.append((token, metadata))
except Exception as e:
logger.error(f"Error fetching metadata for token {token.token_id}: {e}")
continue
if metadata_batch:
# Batch upsert all metadata
upsert_metadata_labels(
db_session=db_session,
blockchain_type=blockchain_type,
metadata_batch=metadata_batch,
v3=False
)
clean_labels_from_db(
db_session=db_session,
blockchain_type=blockchain_type,
address=address,
)
logger.info(f"Write {len(metadata_batch)} labels for {address}")
except Exception as err:
logger.warning(f"Error while writing labels for address {address}: {err}")
db_session.rollback()
except Exception as err:
logger.warning(f"Error while crawling metadata for address {address}: {err}")
db_session.rollback()
def process_address_metadata(
address: str,
blockchain_type: AvailableBlockchainType,
db_session: Session,
batch_size: int,
max_recrawl: int,
threads: int,
tokens: List[TokenURIs],
) -> None:
"""
Process metadata for a single address with v3 support
Leak logic is implemented in sql statement
"""
logger.info(f"Processing address {address} with {len(tokens)} tokens")
total_tokens = len(tokens)
total_chunks = (total_tokens + batch_size - 1) // batch_size
for chunk_index, requests_chunk in enumerate([
tokens[i : i + batch_size]
for i in range(0, len(tokens), batch_size)
]):
logger.info(
f"Processing chunk {chunk_index + 1}/{total_chunks} "
f"({len(requests_chunk)} tokens) for address {address}"
)
metadata_batch = []
with ThreadPoolExecutor(max_workers=threads) as executor:
future_to_token = {
executor.submit(crawl_uri, token.token_uri): token
for token in requests_chunk
}
for future in as_completed(future_to_token):
token = future_to_token[future]
metadata = future.result(timeout=10)
metadata_batch.append((token, metadata))
upsert_metadata_labels(
db_session=db_session,
blockchain_type=blockchain_type,
metadata_batch=metadata_batch,
v3=True
)
logger.info(f"Wrote {len(metadata_batch)} labels for {address}")
db_session.commit()
clean_labels_from_db(
db_session=db_session,
blockchain_type=blockchain_type,
address=address,
version=3
)
db_session.commit()
def parse_metadata( def parse_metadata(
blockchain_type: AvailableBlockchainType, blockchain_type: AvailableBlockchainType,
batch_size: int, batch_size: int,
max_recrawl: int, max_recrawl: int,
threads: int, threads: int,
custom_db_uri: Optional[str] = None,
): ):
""" """
Parse all metadata of tokens. Parse all metadata of tokens.
""" """
logger.info("Starting metadata crawler") logger.info("Starting metadata crawler")
logger.info(f"Processing blockchain {blockchain_type.value}") logger.info(f"Processing blockchain {blockchain_type.value}")
# run crawling of levels # Check if blockchain exists in v2 package
with yield_db_read_only_preping_session_ctx() as db_session_read_only: if blockchain_type.value in [chain.value for chain in AvailableBlockchainTypeV2]:
try: try:
# get all tokens with uri logger.info(f"Processing v2 blockchain: {blockchain_type.value}")
logger.info("Requesting all tokens with uri from database") # Get tokens to crawl v2 flow
uris_of_tokens = get_uris_of_tokens(db_session_read_only, blockchain_type) with yield_db_read_only_preping_session_ctx() as db_session_read_only:
tokens_uri_by_address = get_tokens_to_crawl(
tokens_uri_by_address: Dict[str, Any] = {} db_session_read_only,
blockchain_type,
for token_uri_data in uris_of_tokens: {},
if token_uri_data.address not in tokens_uri_by_address: )
tokens_uri_by_address[token_uri_data.address] = []
tokens_uri_by_address[token_uri_data.address].append(token_uri_data)
# Process each address
for address, tokens in tokens_uri_by_address.items():
process_address_metadata_with_leak(
address=address,
blockchain_type=blockchain_type,
batch_size=batch_size,
max_recrawl=max_recrawl,
threads=threads,
tokens=tokens,
)
except Exception as err: except Exception as err:
logger.error(f"Error while requesting tokens with uri from database: {err}") logger.error(f"V2 flow failed: {err}, continuing with Spire flow")
return
for address in tokens_uri_by_address: # Continue with Spire flow regardless of v2 result
with yield_db_read_only_preping_session_ctx() as db_session_read_only: spire_jobs = []
# Get all jobs for this blockchain from Spire
search_query = f"#metadata-job #{blockchain_type.value}"
try:
entries = get_all_entries_from_search(
journal_id=MOONSTREAM_METADATA_TASKS_JOURNAL,
search_query=search_query,
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
content=True,
limit=1000,
)
logger.info(f"Found {len(entries)} metadata jobs for blockchain {blockchain_type.value}")
for entry in entries:
try: try:
already_parsed = get_current_metadata_for_address( if not entry.content:
db_session=db_session_read_only, continue
blockchain_type=blockchain_type,
address=address,
)
maybe_updated = get_tokens_id_wich_may_updated( job = json.loads(entry.content)
db_session=db_session_read_only, if job.get("blockchain") != blockchain_type.value:
blockchain_type=blockchain_type, logger.warning(f"Skipping job with mismatched blockchain: {job.get('blockchain')} != {blockchain_type.value}")
address=address, continue
) spire_jobs.append(job)
except Exception as err: except Exception as err:
logger.warning(err) id = entry.entry_url.split("/")[-1]
logger.warning( logger.error(f"Error parsing job from entry {id}: {err}")
f"Error while requesting metadata for address: {address}"
)
continue continue
except BugoutResponseException as err:
logger.error(f"Bugout error fetching jobs from journal: {err.detail}")
except Exception as err:
logger.error(f"Error fetching jobs from journal: {err}")
return
with yield_db_preping_session_ctx() as db_session: # Process each job
# sessions list for each customer and instance
sessions_by_customer: Dict[Tuple[str, str], Session] = {}
# all sessions in one try block
try:
for job in spire_jobs:
try: try:
logger.info(f"Starting to crawl metadata for address: {address}") customer_id = job.get("customer_id")
instance_id = job.get("instance_id")
leak_rate = 0.0 if (customer_id, instance_id) not in sessions_by_customer:
# Create session
if len(maybe_updated) > 0: # Assume fetch_connection_string fetches the connection string
free_spots = len(maybe_updated) / max_recrawl if custom_db_uri:
connection_string = custom_db_uri
if free_spots > 1:
leak_rate = 0
else: else:
leak_rate = 1 - ( connection_string = request_connection_string(
len(already_parsed) - max_recrawl + len(maybe_updated) customer_id=customer_id,
) / len(already_parsed) instance_id=instance_id,
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
parsed_with_leak = leak_of_crawled_uri(
already_parsed, leak_rate, maybe_updated
)
logger.info(
f"Leak rate: {leak_rate} for {address} with maybe updated {len(maybe_updated)}"
)
logger.info(f"Already parsed: {len(already_parsed)} for {address}")
logger.info(
f"Amount of state in database: {len(tokens_uri_by_address[address])} for {address}"
)
logger.info(
f"Amount of tokens parsed with leak: {len(parsed_with_leak)} for {address}"
)
# Remove already parsed tokens
new_tokens_uri_by_address = [
token_uri_data
for token_uri_data in tokens_uri_by_address[address]
if token_uri_data.token_id not in parsed_with_leak
]
logger.info(
f"Amount of tokens to parse: {len(new_tokens_uri_by_address)} for {address}"
)
for requests_chunk in [
new_tokens_uri_by_address[i : i + batch_size]
for i in range(0, len(new_tokens_uri_by_address), batch_size)
]:
writed_labels = 0
db_session.commit()
try:
with db_session.begin():
for token_uri_data in requests_chunk:
with ThreadPoolExecutor(
max_workers=threads
) as executor:
future = executor.submit(
crawl_uri, token_uri_data.token_uri
)
metadata = future.result(timeout=10)
db_session.add(
metadata_to_label(
blockchain_type=blockchain_type,
metadata=metadata,
token_uri_data=token_uri_data,
)
)
writed_labels += 1
if writed_labels > 0:
clean_labels_from_db(
db_session=db_session,
blockchain_type=blockchain_type,
address=address,
)
logger.info(
f"Write {writed_labels} labels for {address}"
)
# trasaction is commited here
except Exception as err:
logger.warning(err)
logger.warning(
f"Error while writing labels for address: {address}"
) )
db_session.rollback() engine = create_moonstream_engine(connection_string, 2, 100000)
session = sessionmaker(bind=engine)
try:
sessions_by_customer[(customer_id, instance_id)] = session()
except Exception as e:
logger.error(f"Connection to {engine} failed: {e}")
continue
clean_labels_from_db(
db_session=db_session, # Get tokens to crawl
blockchain_type=blockchain_type, tokens_uri_by_address = get_tokens_to_crawl(
address=address, sessions_by_customer[(customer_id, instance_id)],
blockchain_type,
job,
) )
for address, tokens in tokens_uri_by_address.items():
process_address_metadata(
address=address,
blockchain_type=blockchain_type,
db_session=sessions_by_customer[(customer_id, instance_id)],
batch_size=batch_size,
max_recrawl=max_recrawl,
threads=threads,
tokens=tokens,
)
except Exception as err: except Exception as err:
logger.warning(err) logger.error(f"Error processing job: {err}")
logger.warning(f"Error while crawling metadata for address: {address}")
db_session.rollback()
continue continue
except Exception as err:
logger.error(f"Error processing jobs: {err}")
raise err
finally:
for session in sessions_by_customer.values():
try:
session.close()
except Exception as err:
logger.error(f"Error closing session: {err}")
def handle_crawl(args: argparse.Namespace) -> None: def handle_crawl(args: argparse.Namespace) -> None:
""" """
Parse all metadata of tokens. Parse all metadata of tokens.
""" """
blockchain_type = AvailableBlockchainType(args.blockchain) blockchain_type = AvailableBlockchainType(args.blockchain)
parse_metadata( parse_metadata(
blockchain_type, args.commit_batch_size, args.max_recrawl, args.threads blockchain_type,
args.commit_batch_size,
args.max_recrawl,
args.threads,
args.custom_db_uri,
) )
@ -259,7 +430,7 @@ def main() -> None:
"--blockchain", "--blockchain",
"-b", "-b",
type=str, type=str,
help="Type of blockchain wich writng in database", help="Type of blockchain which writing in database",
required=True, required=True,
) )
metadata_crawler_parser.add_argument( metadata_crawler_parser.add_argument(
@ -283,6 +454,11 @@ def main() -> None:
default=4, default=4,
help="Amount of threads for crawling", help="Amount of threads for crawling",
) )
metadata_crawler_parser.add_argument(
"--custom-db-uri",
type=str,
help="Custom db uri to use for crawling",
)
metadata_crawler_parser.set_defaults(func=handle_crawl) metadata_crawler_parser.set_defaults(func=handle_crawl)
args = parser.parse_args() args = parser.parse_args()

Wyświetl plik

@ -1,13 +1,29 @@
import json import json
import logging import logging
from typing import Any, Dict, List, Optional from hexbytes import HexBytes
from typing import Any, Dict, List, Optional, Tuple
###from sqlalchemy import
from sqlalchemy.dialects.postgresql import insert
from moonstreamdb.blockchain import AvailableBlockchainType, get_label_model from datetime import datetime
from moonstreamtypes.blockchain import AvailableBlockchainType, get_label_model
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy.sql import text from sqlalchemy.sql import text
from ..actions import recive_S3_data_from_query
from ..data import TokenURIs from ..data import TokenURIs
from ..settings import CRAWLER_LABEL, METADATA_CRAWLER_LABEL, VIEW_STATE_CRAWLER_LABEL from ..settings import (
CRAWLER_LABEL,
METADATA_CRAWLER_LABEL,
VIEW_STATE_CRAWLER_LABEL,
MOONSTREAM_ADMIN_ACCESS_TOKEN,
MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN,
bugout_client as bc,
moonstream_client as mc,
)
from moonstream.client import Moonstream # type: ignore
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -18,11 +34,13 @@ def metadata_to_label(
metadata: Optional[Dict[str, Any]], metadata: Optional[Dict[str, Any]],
token_uri_data: TokenURIs, token_uri_data: TokenURIs,
label_name=METADATA_CRAWLER_LABEL, label_name=METADATA_CRAWLER_LABEL,
v3: bool = False,
): ):
""" """
Creates a label model. Creates a label model with support for v2 and v3 database structures.
""" """
label_model = get_label_model(blockchain_type) version = 3 if v3 else 2
label_model = get_label_model(blockchain_type, version=version)
sanityzed_label_data = json.loads( sanityzed_label_data = json.loads(
json.dumps( json.dumps(
@ -34,14 +52,34 @@ def metadata_to_label(
).replace(r"\u0000", "") ).replace(r"\u0000", "")
) )
label = label_model( if v3:
label=label_name, # V3 structure similar to state crawler
label_data=sanityzed_label_data, label_data = {
address=token_uri_data.address, "token_id": token_uri_data.token_id,
block_number=token_uri_data.block_number, "metadata": metadata,
transaction_hash=None, }
block_timestamp=token_uri_data.block_timestamp,
) label = label_model(
label=label_name,
label_name="metadata", # Fixed name for metadata labels
label_type="metadata",
label_data=label_data,
address=HexBytes(token_uri_data.address),
block_number=token_uri_data.block_number,
# Use a fixed tx hash for metadata since it's not from a transaction
block_timestamp=token_uri_data.block_timestamp,
block_hash=token_uri_data.block_hash if hasattr(token_uri_data, 'block_hash') else None,
)
else:
# Original v2 structure
label = label_model(
label=label_name,
label_data=sanityzed_label_data,
address=token_uri_data.address,
block_number=token_uri_data.block_number,
transaction_hash=None,
block_timestamp=token_uri_data.block_timestamp,
)
return label return label
@ -60,13 +98,13 @@ def commit_session(db_session: Session) -> None:
def get_uris_of_tokens( def get_uris_of_tokens(
db_session: Session, blockchain_type: AvailableBlockchainType db_session: Session, blockchain_type: AvailableBlockchainType, version: int = 2
) -> List[TokenURIs]: ) -> List[TokenURIs]:
""" """
Get meatadata URIs. Get meatadata URIs.
""" """
label_model = get_label_model(blockchain_type) label_model = get_label_model(blockchain_type, version=version)
table = label_model.__tablename__ table = label_model.__tablename__
@ -113,13 +151,13 @@ def get_uris_of_tokens(
def get_current_metadata_for_address( def get_current_metadata_for_address(
db_session: Session, blockchain_type: AvailableBlockchainType, address: str db_session: Session, blockchain_type: AvailableBlockchainType, address: str, version: int = 2
): ):
""" """
Get existing metadata. Get existing metadata.
""" """
label_model = get_label_model(blockchain_type) label_model = get_label_model(blockchain_type, version=version)
table = label_model.__tablename__ table = label_model.__tablename__
@ -149,7 +187,7 @@ def get_current_metadata_for_address(
def get_tokens_id_wich_may_updated( def get_tokens_id_wich_may_updated(
db_session: Session, blockchain_type: AvailableBlockchainType, address: str db_session: Session, blockchain_type: AvailableBlockchainType, address: str, version: int = 2
): ):
""" """
Returns a list of tokens which may have updated information. Returns a list of tokens which may have updated information.
@ -163,7 +201,7 @@ def get_tokens_id_wich_may_updated(
Required integration with entity API and opcodes crawler. Required integration with entity API and opcodes crawler.
""" """
label_model = get_label_model(blockchain_type) label_model = get_label_model(blockchain_type, version=version)
table = label_model.__tablename__ table = label_model.__tablename__
@ -233,14 +271,14 @@ def get_tokens_id_wich_may_updated(
def clean_labels_from_db( def clean_labels_from_db(
db_session: Session, blockchain_type: AvailableBlockchainType, address: str db_session: Session, blockchain_type: AvailableBlockchainType, address: str, version: int = 2
): ):
""" """
Remove existing labels. Remove existing labels.
But keep the latest one for each token. But keep the latest one for each token.
""" """
label_model = get_label_model(blockchain_type) label_model = get_label_model(blockchain_type, version=version)
table = label_model.__tablename__ table = label_model.__tablename__
@ -273,3 +311,165 @@ def clean_labels_from_db(
), ),
{"address": address, "label": METADATA_CRAWLER_LABEL}, {"address": address, "label": METADATA_CRAWLER_LABEL},
) )
def get_tokens_from_query_api(
client: Moonstream,
blockchain_type: AvailableBlockchainType,
query_name: str,
params: dict,
token: str,
customer_id: Optional[str] = None,
instance_id: Optional[str] = None,
) -> List[TokenURIs]:
"""
Get token URIs from Query API results
"""
query_params = {}
if customer_id and instance_id:
query_params["customer_id"] = customer_id
query_params["instance_id"] = instance_id
try:
data = recive_S3_data_from_query(
client=client,
token=token,
query_name=query_name,
params={},
query_params=query_params,
custom_body={
"blockchain": blockchain_type.value,
"params": params,
}
)
# Convert query results to TokenURIs format
results = []
for item in data.get("data", []):
results.append(
TokenURIs(
token_id=str(item.get("token_id")),
address=item.get("address"),
token_uri=item.get("token_uri"),
block_number=item.get("block_number"),
block_timestamp=item.get("block_timestamp"),
)
)
return results
except Exception as err:
logger.error(f"Error fetching data from Query API: {err}")
return []
def get_tokens_to_crawl(
db_session: Session,
blockchain_type: AvailableBlockchainType,
spire_job: Optional[dict] = None,
) -> Dict[str, List[TokenURIs]]:
"""`
Get tokens to crawl either from Query API (if specified in Spire job) or database
"""
tokens_uri_by_address = {}
if spire_job:
if "query_api" not in spire_job:
raise ValueError("Query API is not specified in Spire job")
# Get tokens from Query API
query_config = spire_job["query_api"]
client = Moonstream()
tokens = get_tokens_from_query_api(
client=client,
blockchain_type=blockchain_type,
query_name=query_config["name"],
params=query_config["params"],
token=MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN,
customer_id=spire_job["customer_id"],
instance_id=spire_job["instance_id"],
)
# Group by address
for token in tokens:
if token.address not in tokens_uri_by_address:
tokens_uri_by_address[token.address] = []
tokens_uri_by_address[token.address].append(token)
else:
# Get tokens from database (existing logic)
uris_of_tokens = get_uris_of_tokens(db_session, blockchain_type)
for token_uri_data in uris_of_tokens:
if token_uri_data.address not in tokens_uri_by_address:
tokens_uri_by_address[token_uri_data.address] = []
tokens_uri_by_address[token_uri_data.address].append(token_uri_data)
return tokens_uri_by_address
def upsert_metadata_labels(
db_session: Session,
blockchain_type: AvailableBlockchainType,
metadata_batch: List[Tuple[TokenURIs, Optional[Dict[str, Any]]]],
v3: bool = False,
db_batch_size: int = 100,
) -> None:
"""
Batch upsert metadata labels - update if exists, insert if not.
"""
try:
version = 3 if v3 else 2
label_model = get_label_model(blockchain_type, version=version)
# Prepare batch of labels
labels_data = []
for token_uri_data, metadata in metadata_batch:
if v3:
# V3 structure
label_data = {
"token_id": token_uri_data.token_id,
"metadata": metadata,
}
labels_data.append({
"label": METADATA_CRAWLER_LABEL,
"label_name": "metadata",
"label_type": "metadata",
"label_data": label_data,
"address": HexBytes(token_uri_data.address),
"block_number": token_uri_data.block_number,
"block_timestamp": token_uri_data.block_timestamp,
"block_hash": getattr(token_uri_data, 'block_hash', None),
})
else:
# V2 structure
label_data = {
"type": "metadata",
"token_id": token_uri_data.token_id,
"metadata": metadata,
}
labels_data.append({
"label": METADATA_CRAWLER_LABEL,
"label_data": label_data,
"address": token_uri_data.address,
"block_number": token_uri_data.block_number,
"transaction_hash": None,
"block_timestamp": token_uri_data.block_timestamp,
})
if not labels_data:
return
# Create insert statement
insert_stmt = insert(label_model).values(labels_data)
result_stmt = insert_stmt.on_conflict_do_nothing(
)
db_session.execute(result_stmt)
db_session.commit()
except Exception as err:
logger.error(f"Error batch upserting metadata labels: {err}")
raise

Wyświetl plik

@ -3,21 +3,16 @@ from typing import Dict, Optional
from uuid import UUID from uuid import UUID
from bugout.app import Bugout from bugout.app import Bugout
from moonstreamtypes.blockchain import AvailableBlockchainType from moonstreamtypes.blockchain import AvailableBlockchainType # type: ignore
from moonstream.client import Moonstream # type: ignore
# Bugout # APIs
## Bugout
BUGOUT_BROOD_URL = os.environ.get("BUGOUT_BROOD_URL", "https://auth.bugout.dev") BUGOUT_BROOD_URL = os.environ.get("BUGOUT_BROOD_URL", "https://auth.bugout.dev")
BUGOUT_SPIRE_URL = os.environ.get("BUGOUT_SPIRE_URL", "https://spire.bugout.dev") BUGOUT_SPIRE_URL = os.environ.get("BUGOUT_SPIRE_URL", "https://spire.bugout.dev")
bugout_client = Bugout(brood_api_url=BUGOUT_BROOD_URL, spire_api_url=BUGOUT_SPIRE_URL) bugout_client = Bugout(brood_api_url=BUGOUT_BROOD_URL, spire_api_url=BUGOUT_SPIRE_URL)
MOONSTREAM_API_URL = os.environ.get("MOONSTREAM_API_URL", "https://api.moonstream.to")
MOONSTREAM_ENGINE_URL = os.environ.get(
"MOONSTREAM_ENGINE_URL", "https://engineapi.moonstream.to"
)
BUGOUT_REQUEST_TIMEOUT_SECONDS_RAW = os.environ.get( BUGOUT_REQUEST_TIMEOUT_SECONDS_RAW = os.environ.get(
"MOONSTREAM_BUGOUT_TIMEOUT_SECONDS", 30 "MOONSTREAM_BUGOUT_TIMEOUT_SECONDS", 30
) )
@ -31,6 +26,24 @@ except:
HUMBUG_REPORTER_CRAWLERS_TOKEN = os.environ.get("HUMBUG_REPORTER_CRAWLERS_TOKEN") HUMBUG_REPORTER_CRAWLERS_TOKEN = os.environ.get("HUMBUG_REPORTER_CRAWLERS_TOKEN")
## Moonstream
MOONSTREAM_API_URL = os.environ.get("MOONSTREAM_API_URL", "https://api.moonstream.to")
moonstream_client = Moonstream()
## Moonstream Engine
MOONSTREAM_ENGINE_URL = os.environ.get(
"MOONSTREAM_ENGINE_URL", "https://engineapi.moonstream.to"
)
## Moonstream DB
MOONSTREAM_DB_V3_CONTROLLER_API = os.environ.get(
"MOONSTREAM_DB_V3_CONTROLLER_API", "https://mdb-v3-api.moonstream.to"
)
# Origin # Origin
RAW_ORIGINS = os.environ.get("MOONSTREAM_CORS_ALLOWED_ORIGINS") RAW_ORIGINS = os.environ.get("MOONSTREAM_CORS_ALLOWED_ORIGINS")
if RAW_ORIGINS is None: if RAW_ORIGINS is None:
@ -490,3 +503,19 @@ MOONSTREAM_DB_V3_CONTROLLER_API = os.environ.get(
MOONSTREAM_DB_V3_SCHEMA_NAME = os.environ.get( MOONSTREAM_DB_V3_SCHEMA_NAME = os.environ.get(
"MOONSTREAM_DB_V3_SCHEMA_NAME", "blockchain" "MOONSTREAM_DB_V3_SCHEMA_NAME", "blockchain"
) )
MOONSTREAM_METADATA_TASKS_JOURNAL = os.environ.get(
"MOONSTREAM_METADATA_TASKS_JOURNAL", ""
)
### MOONSTREAM_PUBLIC_QUERIES_USER_TOKEN
MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN = os.environ.get(
"MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN", ""
)
if MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN == "":
raise ValueError(
"MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN environment variable must be set"
)

Wyświetl plik

@ -2,4 +2,4 @@
Moonstream crawlers version. Moonstream crawlers version.
""" """
MOONCRAWL_VERSION = "0.5.1" MOONCRAWL_VERSION = "0.5.3"

Wyświetl plik

@ -38,8 +38,8 @@ setup(
"chardet", "chardet",
"fastapi", "fastapi",
"moonstreamdb>=0.4.6", "moonstreamdb>=0.4.6",
"moonstreamdb-v3>=0.1.3", "moonstreamdb-v3>=0.1.4",
"moonstream-types>=0.0.10", "moonstream-types>=0.0.11",
"moonstream>=0.1.2", "moonstream>=0.1.2",
"moonworm[moonstream]>=0.9.3", "moonworm[moonstream]>=0.9.3",
"humbug", "humbug",

Wyświetl plik

@ -16,7 +16,7 @@ from bugout.data import (
) )
from bugout.exceptions import BugoutResponseException from bugout.exceptions import BugoutResponseException
from fastapi import APIRouter, Body, Path, Query, Request from fastapi import APIRouter, Body, Path, Query, Request
from moonstreamdb.blockchain import AvailableBlockchainType from moonstreamtypes.blockchain import AvailableBlockchainType
from sqlalchemy import text from sqlalchemy import text
from .. import data from .. import data