kopia lustrzana https://github.com/bugout-dev/moonstream
Merge pull request #1163 from moonstream-to/metadata-crawler-update
Metadata crawler updatepull/1164/head
commit
d8c20bd503
|
@ -63,6 +63,15 @@ XAI_SEPOLIA_STATE_CLEAN_TIMER_FILE="xai-sepolia-state-clean.timer"
|
||||||
XAI_SEPOLIA_METADATA_SERVICE_FILE="xai-sepolia-metadata.service"
|
XAI_SEPOLIA_METADATA_SERVICE_FILE="xai-sepolia-metadata.service"
|
||||||
XAI_SEPOLIA_METADATA_TIMER_FILE="xai-sepolia-metadata.timer"
|
XAI_SEPOLIA_METADATA_TIMER_FILE="xai-sepolia-metadata.timer"
|
||||||
|
|
||||||
|
# Game7
|
||||||
|
GAME7_METADATA_SERVICE_FILE="game7-metadata.service"
|
||||||
|
GAME7_METADATA_TIMER_FILE="game7-metadata.timer"
|
||||||
|
|
||||||
|
# Game7 testnet
|
||||||
|
GAME7_TESTNET_METADATA_SERVICE_FILE="game7-testnet-metadata.service"
|
||||||
|
GAME7_TESTNET_METADATA_TIMER_FILE="game7-testnet-metadata.timer"
|
||||||
|
|
||||||
|
|
||||||
set -eu
|
set -eu
|
||||||
|
|
||||||
echo
|
echo
|
||||||
|
@ -229,4 +238,22 @@ chmod 644 "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${
|
||||||
cp "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${XAI_SEPOLIA_METADATA_SERVICE_FILE}"
|
cp "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${XAI_SEPOLIA_METADATA_SERVICE_FILE}"
|
||||||
cp "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${XAI_SEPOLIA_METADATA_TIMER_FILE}"
|
cp "${SCRIPT_DIR}/${XAI_SEPOLIA_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${XAI_SEPOLIA_METADATA_TIMER_FILE}"
|
||||||
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
||||||
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${XAI_SEPOLIA_METADATA_TIMER_FILE}"
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${XAI_SEPOLIA_METADATA_TIMER_FILE}"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo
|
||||||
|
echo -e "${PREFIX_INFO} Replacing existing Game7 metadata service and timer with: ${GAME7_METADATA_SERVICE_FILE}, ${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
chmod 644 "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_SERVICE_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo
|
||||||
|
echo -e "${PREFIX_INFO} Replacing existing Game7 testnet metadata service and timer with: ${GAME7_TESTNET_METADATA_SERVICE_FILE}, ${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
chmod 644 "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_SERVICE_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
|
|
@ -217,6 +217,14 @@ MANTLE_SEPOLIA_HISTORICAL_CRAWL_EVENTS_TIMER_FILE="mantle-sepolia-historical-cra
|
||||||
MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_SERVICE_FILE="mantle-sepolia-historical-crawl-transactions.service"
|
MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_SERVICE_FILE="mantle-sepolia-historical-crawl-transactions.service"
|
||||||
MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE="mantle-sepolia-historical-crawl-transactions.timer"
|
MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE="mantle-sepolia-historical-crawl-transactions.timer"
|
||||||
|
|
||||||
|
# Game7
|
||||||
|
GAME7_METADATA_SERVICE_FILE="game7-metadata.service"
|
||||||
|
GAME7_METADATA_TIMER_FILE="game7-metadata.timer"
|
||||||
|
|
||||||
|
# Game7 testnet
|
||||||
|
GAME7_TESTNET_METADATA_SERVICE_FILE="game7-testnet-metadata.service"
|
||||||
|
GAME7_TESTNET_METADATA_TIMER_FILE="game7-testnet-metadata.timer"
|
||||||
|
|
||||||
set -eu
|
set -eu
|
||||||
|
|
||||||
echo
|
echo
|
||||||
|
@ -1109,3 +1117,24 @@ cp "${SCRIPT_DIR}/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_SERVICE_FILE}"
|
||||||
cp "${SCRIPT_DIR}/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}"
|
cp "${SCRIPT_DIR}/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}"
|
||||||
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
||||||
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}"
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${MANTLE_SEPOLIA_HISTORICAL_CRAWL_TRANSACTIONS_TIMER_FILE}"
|
||||||
|
|
||||||
|
|
||||||
|
# Game7
|
||||||
|
echo
|
||||||
|
echo
|
||||||
|
echo -e "${PREFIX_INFO} Replacing existing Game7 metadata service and timer with: ${GAME7_METADATA_SERVICE_FILE}, ${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
chmod 644 "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_SERVICE_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_METADATA_TIMER_FILE}"
|
||||||
|
|
||||||
|
# Game7 testnet
|
||||||
|
echo
|
||||||
|
echo
|
||||||
|
echo -e "${PREFIX_INFO} Replacing existing Game7 testnet metadata service and timer with: ${GAME7_TESTNET_METADATA_SERVICE_FILE}, ${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
chmod 644 "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_SERVICE_FILE}"
|
||||||
|
cp "${SCRIPT_DIR}/${GAME7_TESTNET_METADATA_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload
|
||||||
|
XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${GAME7_TESTNET_METADATA_TIMER_FILE}"
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Execute metadata crawler
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
WorkingDirectory=/home/ubuntu/moonstream/crawlers/mooncrawl
|
||||||
|
EnvironmentFile=/home/ubuntu/moonstream-secrets/app.env
|
||||||
|
ExecStart=/home/ubuntu/moonstream-env/bin/python -m mooncrawl.metadata_crawler.cli crawl --blockchain game7
|
||||||
|
CPUWeight=60
|
||||||
|
SyslogIdentifier=game7-metadata
|
|
@ -0,0 +1,9 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Execute Game7 metadata crawler each 10m
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnBootSec=20s
|
||||||
|
OnUnitActiveSec=60m
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
|
@ -0,0 +1,11 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Execute metadata crawler
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
WorkingDirectory=/home/ubuntu/moonstream/crawlers/mooncrawl
|
||||||
|
EnvironmentFile=/home/ubuntu/moonstream-secrets/app.env
|
||||||
|
ExecStart=/home/ubuntu/moonstream-env/bin/python -m mooncrawl.metadata_crawler.cli crawl --blockchain game7_testnet
|
||||||
|
CPUWeight=60
|
||||||
|
SyslogIdentifier=game7-testnet-metadata
|
|
@ -0,0 +1,9 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Execute Game7 testnet metadata crawler each 10m
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnBootSec=20s
|
||||||
|
OnUnitActiveSec=60m
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
|
@ -116,6 +116,7 @@ def recive_S3_data_from_query(
|
||||||
client: Moonstream,
|
client: Moonstream,
|
||||||
token: Union[str, uuid.UUID],
|
token: Union[str, uuid.UUID],
|
||||||
query_name: str,
|
query_name: str,
|
||||||
|
query_params: Dict[str, Any] = {},
|
||||||
params: Dict[str, Any] = {},
|
params: Dict[str, Any] = {},
|
||||||
time_await: int = 2,
|
time_await: int = 2,
|
||||||
max_retries: int = 30,
|
max_retries: int = 30,
|
||||||
|
@ -136,15 +137,18 @@ def recive_S3_data_from_query(
|
||||||
if custom_body:
|
if custom_body:
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
json = custom_body
|
json = custom_body
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
url=f"{client.api.endpoints[ENDPOINT_QUERIES]}/{query_name}/update_data",
|
url=f"{client.api.endpoints[ENDPOINT_QUERIES]}/{query_name}/update_data",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
params=query_params,
|
||||||
json=json,
|
json=json,
|
||||||
timeout=5,
|
timeout=5,
|
||||||
)
|
)
|
||||||
|
|
||||||
data_url = MoonstreamQueryResultUrl(url=response.json()["url"])
|
data_url = MoonstreamQueryResultUrl(url=response.json()["url"])
|
||||||
else:
|
else:
|
||||||
data_url = client.exec_query(
|
data_url = client.exec_query(
|
||||||
|
@ -226,3 +230,27 @@ def get_customer_db_uri(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error get customer db uri: {str(e)}")
|
logger.error(f"Error get customer db uri: {str(e)}")
|
||||||
raise MoonstreamHTTPException(status_code=500, internal_error=e)
|
raise MoonstreamHTTPException(status_code=500, internal_error=e)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## DB V3
|
||||||
|
|
||||||
|
def request_connection_string(
|
||||||
|
customer_id: str,
|
||||||
|
instance_id: int,
|
||||||
|
token: str,
|
||||||
|
user: str = "seer", # token with write access
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Request connection string from the Moonstream DB V3 Controller API.
|
||||||
|
Default user is seer with write access
|
||||||
|
"""
|
||||||
|
response = requests.get(
|
||||||
|
f"{MOONSTREAM_DB_V3_CONTROLLER_API}/customers/{customer_id}/instances/{instance_id}/creds/{user}/url",
|
||||||
|
headers={"Authorization": f"Bearer {token}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
return response.text.replace('"', "")
|
||||||
|
|
||||||
|
|
|
@ -13,12 +13,7 @@ import boto3 # type: ignore
|
||||||
from bugout.data import BugoutJournalEntity, BugoutResource
|
from bugout.data import BugoutJournalEntity, BugoutResource
|
||||||
from fastapi import BackgroundTasks, FastAPI
|
from fastapi import BackgroundTasks, FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from moonstreamdb.blockchain import (
|
from moonstreamtypes.blockchain import AvailableBlockchainType, get_block_model, get_label_model, get_transaction_model
|
||||||
AvailableBlockchainType,
|
|
||||||
get_block_model,
|
|
||||||
get_label_model,
|
|
||||||
get_transaction_model,
|
|
||||||
)
|
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
from . import data
|
from . import data
|
||||||
|
@ -232,6 +227,11 @@ async def queries_data_update_handler(
|
||||||
|
|
||||||
requested_query = request_data.query
|
requested_query = request_data.query
|
||||||
|
|
||||||
|
version = 2
|
||||||
|
|
||||||
|
if request_data.customer_id and request_data.instance_id:
|
||||||
|
version = 3
|
||||||
|
|
||||||
blockchain_table = "polygon_labels"
|
blockchain_table = "polygon_labels"
|
||||||
if request_data.blockchain:
|
if request_data.blockchain:
|
||||||
if request_data.blockchain not in [i.value for i in AvailableBlockchainType]:
|
if request_data.blockchain not in [i.value for i in AvailableBlockchainType]:
|
||||||
|
@ -240,22 +240,23 @@ async def queries_data_update_handler(
|
||||||
|
|
||||||
blockchain = AvailableBlockchainType(request_data.blockchain)
|
blockchain = AvailableBlockchainType(request_data.blockchain)
|
||||||
|
|
||||||
requested_query = (
|
blockchain_table = get_label_model(blockchain, version).__tablename__
|
||||||
requested_query.replace(
|
requested_query = requested_query.replace(
|
||||||
"__transactions_table__",
|
|
||||||
get_transaction_model(blockchain).__tablename__,
|
|
||||||
)
|
|
||||||
.replace(
|
|
||||||
"__blocks_table__",
|
|
||||||
get_block_model(blockchain).__tablename__,
|
|
||||||
)
|
|
||||||
.replace(
|
|
||||||
"__labels_table__",
|
"__labels_table__",
|
||||||
get_label_model(blockchain).__tablename__,
|
blockchain_table
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
if version == 2:
|
||||||
|
(
|
||||||
|
requested_query.replace(
|
||||||
|
"__transactions_table__",
|
||||||
|
get_transaction_model(blockchain).__tablename__,
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
"__blocks_table__",
|
||||||
|
get_block_model(blockchain).__tablename__,
|
||||||
|
)
|
||||||
|
|
||||||
blockchain_table = get_label_model(blockchain).__tablename__
|
)
|
||||||
|
|
||||||
# Check if it can transform to TextClause
|
# Check if it can transform to TextClause
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -60,6 +60,7 @@ class TokenURIs(BaseModel):
|
||||||
block_number: str
|
block_number: str
|
||||||
block_timestamp: str
|
block_timestamp: str
|
||||||
address: str
|
address: str
|
||||||
|
block_hash: Optional[str] = None # for v3 only
|
||||||
|
|
||||||
|
|
||||||
class ViewTasks(BaseModel):
|
class ViewTasks(BaseModel):
|
||||||
|
|
|
@ -0,0 +1,180 @@
|
||||||
|
# Metadata Crawler Architecture
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
The metadata crawler is designed to fetch and store metadata for NFTs (Non-Fungible Tokens) from various blockchains. It supports both traditional database TokenURI view methods queries and Spire journal-based job configurations, with the ability to handle both v2 and v3 database structures.
|
||||||
|
|
||||||
|
|
||||||
|
## Core Components
|
||||||
|
|
||||||
|
### 1. Update Strategies
|
||||||
|
|
||||||
|
#### Leak-Based Strategy (Legacy v2)
|
||||||
|
- Uses probabilistic approach to determine which tokens to update
|
||||||
|
- Controlled by `max_recrawl` parameter
|
||||||
|
- Suitable for large collections with infrequent updates
|
||||||
|
|
||||||
|
#### SQL-Based Strategy (v3)
|
||||||
|
- Uses SQL queries to determine which tokens need updates
|
||||||
|
- More precise tracking of token updates
|
||||||
|
- Better suited for active collections
|
||||||
|
|
||||||
|
### 2. Database Connections
|
||||||
|
|
||||||
|
The crawler supports multiple database connection strategies:
|
||||||
|
- Default Moonstream database connection
|
||||||
|
- Custom database URI via `--custom-db-uri`
|
||||||
|
- Per-customer instance connections (v3)
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"customer_id": "...",
|
||||||
|
"instance_id": "...",
|
||||||
|
"blockchain": "ethereum",
|
||||||
|
"v3": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Job Configuration
|
||||||
|
Jobs can be configured in two ways:
|
||||||
|
- Through Spire journal entries with tags `#metadata-job #{blockchain}`
|
||||||
|
- Direct database queries (legacy mode) using TokenURI view method
|
||||||
|
Example Spire journal entry:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "metadata-job",
|
||||||
|
"query_api": {
|
||||||
|
"name": "new_tokens_to_crawl",
|
||||||
|
"params": {
|
||||||
|
"address": "0x...",
|
||||||
|
"blockchain": "ethereum"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"contract_address": "0x...",
|
||||||
|
"blockchain": "ethereum",
|
||||||
|
"update_existing": false,
|
||||||
|
"v3": true,
|
||||||
|
"customer_id": "...", // Optional, for custom database
|
||||||
|
"instance_id": "..." // Optional, for custom database
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Data Flow
|
||||||
|
1. **Token Discovery**
|
||||||
|
- Query API integration for dynamic token discovery
|
||||||
|
- Database queries for existing tokens
|
||||||
|
- Support for multiple addresses per job
|
||||||
|
|
||||||
|
2. **Metadata Fetching**
|
||||||
|
- Parallel processing with ThreadPoolExecutor
|
||||||
|
- IPFS gateway support
|
||||||
|
- Automatic retry mechanism
|
||||||
|
- Rate limiting and batch processing
|
||||||
|
|
||||||
|
3. **Storage**
|
||||||
|
- Supports both v2 and v3 database structures
|
||||||
|
- Batch upsert operations
|
||||||
|
- Efficient cleaning of old labels
|
||||||
|
|
||||||
|
### 3. Database Structures
|
||||||
|
|
||||||
|
v2:
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"label": METADATA_CRAWLER_LABEL,
|
||||||
|
"label_data": {
|
||||||
|
"type": "metadata",
|
||||||
|
"token_id": "...",
|
||||||
|
"metadata": {...}
|
||||||
|
},
|
||||||
|
"block_number": 1234567890
|
||||||
|
"block_timestamp": 456
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
v3:
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"label": METADATA_CRAWLER_LABEL,
|
||||||
|
"label_type": "metadata",
|
||||||
|
"label_data": {
|
||||||
|
"token_id": "...",
|
||||||
|
"metadata": {...}
|
||||||
|
},
|
||||||
|
"address": "0x...",
|
||||||
|
"block_number": 123,
|
||||||
|
"block_timestamp": 456,
|
||||||
|
"block_hash": "0x..."
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key Features
|
||||||
|
|
||||||
|
1. **Flexible Token Selection**
|
||||||
|
- Query API integration
|
||||||
|
- Support for multiple addresses
|
||||||
|
- Configurable update strategies
|
||||||
|
|
||||||
|
2. **Efficient Processing**
|
||||||
|
- Batch processing
|
||||||
|
- Parallel metadata fetching
|
||||||
|
- Optimized database operations
|
||||||
|
|
||||||
|
3. **Error Handling**
|
||||||
|
- Retry mechanism for failed requests
|
||||||
|
- Transaction management
|
||||||
|
- Detailed logging
|
||||||
|
|
||||||
|
4. **Database Management**
|
||||||
|
- Efficient upsert operations
|
||||||
|
- Label cleaning
|
||||||
|
- Version compatibility (v2/v3)
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### CLI Options
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metadata-crawler crawl \
|
||||||
|
--blockchain ethereum \
|
||||||
|
--commit-batch-size 50 \
|
||||||
|
--max-recrawl 300 \
|
||||||
|
--threads 4 \
|
||||||
|
--spire true \
|
||||||
|
--custom-db-uri "postgresql://..." # Optional
|
||||||
|
```
|
||||||
|
### Environment Variables
|
||||||
|
- `MOONSTREAM_ADMIN_ACCESS_TOKEN`: Required for API access
|
||||||
|
- `METADATA_CRAWLER_LABEL`: Label for database entries
|
||||||
|
- `METADATA_TASKS_JOURNAL_ID`: Journal ID for metadata tasks
|
||||||
|
|
||||||
|
|
||||||
|
### Database Modes
|
||||||
|
|
||||||
|
1. **Legacy Mode (v2)**
|
||||||
|
- Uses leak-based update strategy
|
||||||
|
- Single database connection
|
||||||
|
- Simple metadata structure
|
||||||
|
|
||||||
|
2. **Modern Mode (v3)**
|
||||||
|
- SQL-based update tracking
|
||||||
|
- Support for multiple database instances
|
||||||
|
- Enhanced metadata structure
|
||||||
|
- Per-customer database isolation
|
||||||
|
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Job Configuration**
|
||||||
|
- Use descriptive job names
|
||||||
|
- Group related addresses
|
||||||
|
- Set appropriate update intervals
|
||||||
|
|
||||||
|
2. **Performance Optimization**
|
||||||
|
- Adjust batch sizes based on network conditions
|
||||||
|
- Monitor thread count vs. performance
|
||||||
|
- Use appropriate IPFS gateways
|
||||||
|
|
||||||
|
3. **Maintenance**
|
||||||
|
- Regular cleaning of old labels
|
||||||
|
- Monitor database size
|
||||||
|
- Check for failed metadata fetches
|
|
@ -3,21 +3,32 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import random
|
import random
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from typing import Any, Dict, List, Optional
|
from sqlalchemy.orm import Session
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
from moonstreamdb.blockchain import AvailableBlockchainType
|
from bugout.exceptions import BugoutResponseException
|
||||||
|
from moonstreamtypes.blockchain import AvailableBlockchainType
|
||||||
|
from moonstreamdb.blockchain import AvailableBlockchainType as AvailableBlockchainTypeV2
|
||||||
|
|
||||||
from ..db import yield_db_preping_session_ctx, yield_db_read_only_preping_session_ctx
|
|
||||||
|
from ..actions import get_all_entries_from_search, request_connection_string
|
||||||
|
from ..settings import MOONSTREAM_ADMIN_ACCESS_TOKEN, MOONSTREAM_METADATA_TASKS_JOURNAL, MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN
|
||||||
|
from ..db import yield_db_preping_session_ctx, yield_db_read_only_preping_session_ctx, create_moonstream_engine, sessionmaker
|
||||||
|
from ..data import TokenURIs
|
||||||
from .db import (
|
from .db import (
|
||||||
clean_labels_from_db,
|
clean_labels_from_db,
|
||||||
get_current_metadata_for_address,
|
get_current_metadata_for_address,
|
||||||
get_tokens_id_wich_may_updated,
|
get_tokens_id_wich_may_updated,
|
||||||
get_uris_of_tokens,
|
get_uris_of_tokens,
|
||||||
metadata_to_label,
|
metadata_to_label,
|
||||||
|
get_tokens_to_crawl,
|
||||||
|
upsert_metadata_labels,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from ..settings import moonstream_client as mc
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -50,7 +61,6 @@ def crawl_uri(metadata_uri: str) -> Any:
|
||||||
result = None
|
result = None
|
||||||
while retry < 3:
|
while retry < 3:
|
||||||
try:
|
try:
|
||||||
|
|
||||||
if metadata_uri.startswith("ipfs://"):
|
if metadata_uri.startswith("ipfs://"):
|
||||||
metadata_uri = metadata_uri.replace(
|
metadata_uri = metadata_uri.replace(
|
||||||
"ipfs://", "https://ipfs.io/ipfs/", 1
|
"ipfs://", "https://ipfs.io/ipfs/", 1
|
||||||
|
@ -61,10 +71,7 @@ def crawl_uri(metadata_uri: str) -> Any:
|
||||||
|
|
||||||
response = urllib.request.urlopen(req, timeout=10)
|
response = urllib.request.urlopen(req, timeout=10)
|
||||||
|
|
||||||
if (
|
if metadata_uri.startswith("data:application/json") or response.status == 200:
|
||||||
metadata_uri.startswith("data:application/json")
|
|
||||||
or response.status == 200
|
|
||||||
):
|
|
||||||
result = json.loads(response.read())
|
result = json.loads(response.read())
|
||||||
break
|
break
|
||||||
retry += 1
|
retry += 1
|
||||||
|
@ -72,6 +79,8 @@ def crawl_uri(metadata_uri: str) -> Any:
|
||||||
except HTTPError as error:
|
except HTTPError as error:
|
||||||
logger.error(f"request end with error statuscode: {error.code}")
|
logger.error(f"request end with error statuscode: {error.code}")
|
||||||
retry += 1
|
retry += 1
|
||||||
|
if error.code == 404:
|
||||||
|
return None
|
||||||
continue
|
continue
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error(err)
|
logger.error(err)
|
||||||
|
@ -81,167 +90,329 @@ def crawl_uri(metadata_uri: str) -> Any:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def process_address_metadata_with_leak(
|
||||||
|
address: str,
|
||||||
|
blockchain_type: AvailableBlockchainType,
|
||||||
|
batch_size: int,
|
||||||
|
max_recrawl: int,
|
||||||
|
threads: int,
|
||||||
|
tokens: List[TokenURIs],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Process metadata for a single address with v3 support
|
||||||
|
"""
|
||||||
|
with yield_db_read_only_preping_session_ctx() as db_session_read_only:
|
||||||
|
try:
|
||||||
|
|
||||||
|
already_parsed = get_current_metadata_for_address(
|
||||||
|
db_session=db_session_read_only,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
address=address,
|
||||||
|
)
|
||||||
|
|
||||||
|
maybe_updated = get_tokens_id_wich_may_updated(
|
||||||
|
db_session=db_session_read_only,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
address=address,
|
||||||
|
)
|
||||||
|
except Exception as err:
|
||||||
|
logger.warning(f"Error while getting metadata state for address {address}: {err}")
|
||||||
|
return
|
||||||
|
|
||||||
|
with yield_db_preping_session_ctx() as db_session:
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting to crawl metadata for address: {address}")
|
||||||
|
logger.info(f"Maybe updated: {len(maybe_updated)}")
|
||||||
|
|
||||||
|
# Calculate how many tokens we can 'leak' so total recrawled (maybe_updated + leaked) <= max_recrawl
|
||||||
|
num_already_parsed = len(already_parsed)
|
||||||
|
num_maybe_updated = len(maybe_updated)
|
||||||
|
free_spots = max(0, max_recrawl - num_maybe_updated)
|
||||||
|
|
||||||
|
if num_already_parsed > 0 and free_spots > 0:
|
||||||
|
leak_rate = free_spots / num_already_parsed
|
||||||
|
else:
|
||||||
|
leak_rate = 0
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Leak rate: {leak_rate} for {address} with maybe updated {len(maybe_updated)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: Fully random leak is not correct, we should leak based on created_at
|
||||||
|
parsed_with_leak = leak_of_crawled_uri(
|
||||||
|
already_parsed, leak_rate, maybe_updated
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Already parsed: {len(already_parsed)} for {address}")
|
||||||
|
logger.info(f"Amount of tokens to parse: {len(tokens)} for {address}")
|
||||||
|
|
||||||
|
# Remove already parsed tokens
|
||||||
|
new_tokens = [
|
||||||
|
token for token in tokens
|
||||||
|
if token.token_id not in parsed_with_leak
|
||||||
|
]
|
||||||
|
|
||||||
|
for requests_chunk in [
|
||||||
|
new_tokens[i : i + batch_size]
|
||||||
|
for i in range(0, len(new_tokens), batch_size)
|
||||||
|
]:
|
||||||
|
metadata_batch = []
|
||||||
|
try:
|
||||||
|
|
||||||
|
# Gather all metadata in parallel
|
||||||
|
with ThreadPoolExecutor(max_workers=threads) as executor:
|
||||||
|
future_to_token = {
|
||||||
|
executor.submit(crawl_uri, token.token_uri): token
|
||||||
|
for token in requests_chunk
|
||||||
|
}
|
||||||
|
for future in as_completed(future_to_token):
|
||||||
|
token = future_to_token[future]
|
||||||
|
try:
|
||||||
|
metadata = future.result(timeout=10)
|
||||||
|
if metadata:
|
||||||
|
metadata_batch.append((token, metadata))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching metadata for token {token.token_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if metadata_batch:
|
||||||
|
# Batch upsert all metadata
|
||||||
|
upsert_metadata_labels(
|
||||||
|
db_session=db_session,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
metadata_batch=metadata_batch,
|
||||||
|
v3=False
|
||||||
|
)
|
||||||
|
|
||||||
|
clean_labels_from_db(
|
||||||
|
db_session=db_session,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
address=address,
|
||||||
|
)
|
||||||
|
logger.info(f"Write {len(metadata_batch)} labels for {address}")
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
logger.warning(f"Error while writing labels for address {address}: {err}")
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
logger.warning(f"Error while crawling metadata for address {address}: {err}")
|
||||||
|
db_session.rollback()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def process_address_metadata(
|
||||||
|
address: str,
|
||||||
|
blockchain_type: AvailableBlockchainType,
|
||||||
|
db_session: Session,
|
||||||
|
batch_size: int,
|
||||||
|
max_recrawl: int,
|
||||||
|
threads: int,
|
||||||
|
tokens: List[TokenURIs],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Process metadata for a single address with v3 support
|
||||||
|
Leak logic is implemented in sql statement
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
logger.info(f"Processing address {address} with {len(tokens)} tokens")
|
||||||
|
|
||||||
|
total_tokens = len(tokens)
|
||||||
|
total_chunks = (total_tokens + batch_size - 1) // batch_size
|
||||||
|
|
||||||
|
for chunk_index, requests_chunk in enumerate([
|
||||||
|
tokens[i : i + batch_size]
|
||||||
|
for i in range(0, len(tokens), batch_size)
|
||||||
|
]):
|
||||||
|
logger.info(
|
||||||
|
f"Processing chunk {chunk_index + 1}/{total_chunks} "
|
||||||
|
f"({len(requests_chunk)} tokens) for address {address}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
metadata_batch = []
|
||||||
|
with ThreadPoolExecutor(max_workers=threads) as executor:
|
||||||
|
future_to_token = {
|
||||||
|
executor.submit(crawl_uri, token.token_uri): token
|
||||||
|
for token in requests_chunk
|
||||||
|
}
|
||||||
|
for future in as_completed(future_to_token):
|
||||||
|
token = future_to_token[future]
|
||||||
|
metadata = future.result(timeout=10)
|
||||||
|
metadata_batch.append((token, metadata))
|
||||||
|
|
||||||
|
|
||||||
|
upsert_metadata_labels(
|
||||||
|
db_session=db_session,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
metadata_batch=metadata_batch,
|
||||||
|
v3=True
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Wrote {len(metadata_batch)} labels for {address}")
|
||||||
|
|
||||||
|
db_session.commit()
|
||||||
|
|
||||||
|
clean_labels_from_db(
|
||||||
|
db_session=db_session,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
address=address,
|
||||||
|
version=3
|
||||||
|
)
|
||||||
|
|
||||||
|
db_session.commit()
|
||||||
|
|
||||||
|
|
||||||
def parse_metadata(
|
def parse_metadata(
|
||||||
blockchain_type: AvailableBlockchainType,
|
blockchain_type: AvailableBlockchainType,
|
||||||
batch_size: int,
|
batch_size: int,
|
||||||
max_recrawl: int,
|
max_recrawl: int,
|
||||||
threads: int,
|
threads: int,
|
||||||
|
custom_db_uri: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Parse all metadata of tokens.
|
Parse all metadata of tokens.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
logger.info("Starting metadata crawler")
|
logger.info("Starting metadata crawler")
|
||||||
logger.info(f"Processing blockchain {blockchain_type.value}")
|
logger.info(f"Processing blockchain {blockchain_type.value}")
|
||||||
|
|
||||||
# run crawling of levels
|
# Check if blockchain exists in v2 package
|
||||||
with yield_db_read_only_preping_session_ctx() as db_session_read_only:
|
if blockchain_type.value in [chain.value for chain in AvailableBlockchainTypeV2]:
|
||||||
try:
|
try:
|
||||||
# get all tokens with uri
|
logger.info(f"Processing v2 blockchain: {blockchain_type.value}")
|
||||||
logger.info("Requesting all tokens with uri from database")
|
# Get tokens to crawl v2 flow
|
||||||
uris_of_tokens = get_uris_of_tokens(db_session_read_only, blockchain_type)
|
with yield_db_read_only_preping_session_ctx() as db_session_read_only:
|
||||||
|
tokens_uri_by_address = get_tokens_to_crawl(
|
||||||
tokens_uri_by_address: Dict[str, Any] = {}
|
db_session_read_only,
|
||||||
|
blockchain_type,
|
||||||
for token_uri_data in uris_of_tokens:
|
{},
|
||||||
if token_uri_data.address not in tokens_uri_by_address:
|
)
|
||||||
tokens_uri_by_address[token_uri_data.address] = []
|
|
||||||
tokens_uri_by_address[token_uri_data.address].append(token_uri_data)
|
|
||||||
|
|
||||||
|
# Process each address
|
||||||
|
for address, tokens in tokens_uri_by_address.items():
|
||||||
|
process_address_metadata_with_leak(
|
||||||
|
address=address,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
batch_size=batch_size,
|
||||||
|
max_recrawl=max_recrawl,
|
||||||
|
threads=threads,
|
||||||
|
tokens=tokens,
|
||||||
|
)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error(f"Error while requesting tokens with uri from database: {err}")
|
logger.error(f"V2 flow failed: {err}, continuing with Spire flow")
|
||||||
return
|
|
||||||
|
|
||||||
for address in tokens_uri_by_address:
|
# Continue with Spire flow regardless of v2 result
|
||||||
with yield_db_read_only_preping_session_ctx() as db_session_read_only:
|
spire_jobs = []
|
||||||
|
|
||||||
|
# Get all jobs for this blockchain from Spire
|
||||||
|
search_query = f"#metadata-job #{blockchain_type.value}"
|
||||||
|
try:
|
||||||
|
entries = get_all_entries_from_search(
|
||||||
|
journal_id=MOONSTREAM_METADATA_TASKS_JOURNAL,
|
||||||
|
search_query=search_query,
|
||||||
|
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
|
content=True,
|
||||||
|
limit=1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(entries)} metadata jobs for blockchain {blockchain_type.value}")
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
try:
|
try:
|
||||||
already_parsed = get_current_metadata_for_address(
|
if not entry.content:
|
||||||
db_session=db_session_read_only,
|
continue
|
||||||
blockchain_type=blockchain_type,
|
|
||||||
address=address,
|
job = json.loads(entry.content)
|
||||||
)
|
if job.get("blockchain") != blockchain_type.value:
|
||||||
|
logger.warning(f"Skipping job with mismatched blockchain: {job.get('blockchain')} != {blockchain_type.value}")
|
||||||
maybe_updated = get_tokens_id_wich_may_updated(
|
continue
|
||||||
db_session=db_session_read_only,
|
spire_jobs.append(job)
|
||||||
blockchain_type=blockchain_type,
|
|
||||||
address=address,
|
|
||||||
)
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.warning(err)
|
id = entry.entry_url.split("/")[-1]
|
||||||
logger.warning(
|
logger.error(f"Error parsing job from entry {id}: {err}")
|
||||||
f"Error while requesting metadata for address: {address}"
|
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
|
except BugoutResponseException as err:
|
||||||
|
logger.error(f"Bugout error fetching jobs from journal: {err.detail}")
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Error fetching jobs from journal: {err}")
|
||||||
|
return
|
||||||
|
|
||||||
with yield_db_preping_session_ctx() as db_session:
|
# Process each job
|
||||||
|
|
||||||
|
# sessions list for each customer and instance
|
||||||
|
sessions_by_customer: Dict[Tuple[str, str], Session] = {}
|
||||||
|
|
||||||
|
# all sessions in one try block
|
||||||
|
try:
|
||||||
|
for job in spire_jobs:
|
||||||
try:
|
try:
|
||||||
logger.info(f"Starting to crawl metadata for address: {address}")
|
customer_id = job.get("customer_id")
|
||||||
|
instance_id = job.get("instance_id")
|
||||||
|
|
||||||
leak_rate = 0.0
|
if (customer_id, instance_id) not in sessions_by_customer:
|
||||||
|
# Create session
|
||||||
if len(maybe_updated) > 0:
|
# Assume fetch_connection_string fetches the connection string
|
||||||
free_spots = len(maybe_updated) / max_recrawl
|
if custom_db_uri:
|
||||||
|
connection_string = custom_db_uri
|
||||||
if free_spots > 1:
|
|
||||||
leak_rate = 0
|
|
||||||
else:
|
else:
|
||||||
leak_rate = 1 - (
|
connection_string = request_connection_string(
|
||||||
len(already_parsed) - max_recrawl + len(maybe_updated)
|
customer_id=customer_id,
|
||||||
) / len(already_parsed)
|
instance_id=instance_id,
|
||||||
|
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
parsed_with_leak = leak_of_crawled_uri(
|
|
||||||
already_parsed, leak_rate, maybe_updated
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Leak rate: {leak_rate} for {address} with maybe updated {len(maybe_updated)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Already parsed: {len(already_parsed)} for {address}")
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Amount of state in database: {len(tokens_uri_by_address[address])} for {address}"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Amount of tokens parsed with leak: {len(parsed_with_leak)} for {address}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove already parsed tokens
|
|
||||||
new_tokens_uri_by_address = [
|
|
||||||
token_uri_data
|
|
||||||
for token_uri_data in tokens_uri_by_address[address]
|
|
||||||
if token_uri_data.token_id not in parsed_with_leak
|
|
||||||
]
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Amount of tokens to parse: {len(new_tokens_uri_by_address)} for {address}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for requests_chunk in [
|
|
||||||
new_tokens_uri_by_address[i : i + batch_size]
|
|
||||||
for i in range(0, len(new_tokens_uri_by_address), batch_size)
|
|
||||||
]:
|
|
||||||
writed_labels = 0
|
|
||||||
db_session.commit()
|
|
||||||
|
|
||||||
try:
|
|
||||||
with db_session.begin():
|
|
||||||
for token_uri_data in requests_chunk:
|
|
||||||
with ThreadPoolExecutor(
|
|
||||||
max_workers=threads
|
|
||||||
) as executor:
|
|
||||||
future = executor.submit(
|
|
||||||
crawl_uri, token_uri_data.token_uri
|
|
||||||
)
|
|
||||||
metadata = future.result(timeout=10)
|
|
||||||
db_session.add(
|
|
||||||
metadata_to_label(
|
|
||||||
blockchain_type=blockchain_type,
|
|
||||||
metadata=metadata,
|
|
||||||
token_uri_data=token_uri_data,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
writed_labels += 1
|
|
||||||
|
|
||||||
if writed_labels > 0:
|
|
||||||
clean_labels_from_db(
|
|
||||||
db_session=db_session,
|
|
||||||
blockchain_type=blockchain_type,
|
|
||||||
address=address,
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"Write {writed_labels} labels for {address}"
|
|
||||||
)
|
|
||||||
# trasaction is commited here
|
|
||||||
except Exception as err:
|
|
||||||
logger.warning(err)
|
|
||||||
logger.warning(
|
|
||||||
f"Error while writing labels for address: {address}"
|
|
||||||
)
|
)
|
||||||
db_session.rollback()
|
engine = create_moonstream_engine(connection_string, 2, 100000)
|
||||||
|
session = sessionmaker(bind=engine)
|
||||||
|
try:
|
||||||
|
sessions_by_customer[(customer_id, instance_id)] = session()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Connection to {engine} failed: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
clean_labels_from_db(
|
|
||||||
db_session=db_session,
|
# Get tokens to crawl
|
||||||
blockchain_type=blockchain_type,
|
tokens_uri_by_address = get_tokens_to_crawl(
|
||||||
address=address,
|
sessions_by_customer[(customer_id, instance_id)],
|
||||||
)
|
blockchain_type,
|
||||||
|
job,
|
||||||
|
)
|
||||||
|
|
||||||
|
for address, tokens in tokens_uri_by_address.items():
|
||||||
|
process_address_metadata(
|
||||||
|
address=address,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
db_session=sessions_by_customer[(customer_id, instance_id)],
|
||||||
|
batch_size=batch_size,
|
||||||
|
max_recrawl=max_recrawl,
|
||||||
|
threads=threads,
|
||||||
|
tokens=tokens,
|
||||||
|
)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.warning(err)
|
logger.error(f"Error processing job: {err}")
|
||||||
logger.warning(f"Error while crawling metadata for address: {address}")
|
|
||||||
db_session.rollback()
|
|
||||||
continue
|
continue
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Error processing jobs: {err}")
|
||||||
|
raise err
|
||||||
|
|
||||||
|
finally:
|
||||||
|
for session in sessions_by_customer.values():
|
||||||
|
try:
|
||||||
|
session.close()
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Error closing session: {err}")
|
||||||
|
|
||||||
|
|
||||||
def handle_crawl(args: argparse.Namespace) -> None:
|
def handle_crawl(args: argparse.Namespace) -> None:
|
||||||
"""
|
"""
|
||||||
Parse all metadata of tokens.
|
Parse all metadata of tokens.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
blockchain_type = AvailableBlockchainType(args.blockchain)
|
blockchain_type = AvailableBlockchainType(args.blockchain)
|
||||||
|
|
||||||
parse_metadata(
|
parse_metadata(
|
||||||
blockchain_type, args.commit_batch_size, args.max_recrawl, args.threads
|
blockchain_type,
|
||||||
|
args.commit_batch_size,
|
||||||
|
args.max_recrawl,
|
||||||
|
args.threads,
|
||||||
|
args.custom_db_uri,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -259,7 +430,7 @@ def main() -> None:
|
||||||
"--blockchain",
|
"--blockchain",
|
||||||
"-b",
|
"-b",
|
||||||
type=str,
|
type=str,
|
||||||
help="Type of blockchain wich writng in database",
|
help="Type of blockchain which writing in database",
|
||||||
required=True,
|
required=True,
|
||||||
)
|
)
|
||||||
metadata_crawler_parser.add_argument(
|
metadata_crawler_parser.add_argument(
|
||||||
|
@ -283,6 +454,11 @@ def main() -> None:
|
||||||
default=4,
|
default=4,
|
||||||
help="Amount of threads for crawling",
|
help="Amount of threads for crawling",
|
||||||
)
|
)
|
||||||
|
metadata_crawler_parser.add_argument(
|
||||||
|
"--custom-db-uri",
|
||||||
|
type=str,
|
||||||
|
help="Custom db uri to use for crawling",
|
||||||
|
)
|
||||||
metadata_crawler_parser.set_defaults(func=handle_crawl)
|
metadata_crawler_parser.set_defaults(func=handle_crawl)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
@ -1,13 +1,29 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Optional
|
from hexbytes import HexBytes
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
###from sqlalchemy import
|
||||||
|
from sqlalchemy.dialects.postgresql import insert
|
||||||
|
|
||||||
from moonstreamdb.blockchain import AvailableBlockchainType, get_label_model
|
from datetime import datetime
|
||||||
|
|
||||||
|
from moonstreamtypes.blockchain import AvailableBlockchainType, get_label_model
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy.sql import text
|
from sqlalchemy.sql import text
|
||||||
|
|
||||||
|
from ..actions import recive_S3_data_from_query
|
||||||
from ..data import TokenURIs
|
from ..data import TokenURIs
|
||||||
from ..settings import CRAWLER_LABEL, METADATA_CRAWLER_LABEL, VIEW_STATE_CRAWLER_LABEL
|
from ..settings import (
|
||||||
|
CRAWLER_LABEL,
|
||||||
|
METADATA_CRAWLER_LABEL,
|
||||||
|
VIEW_STATE_CRAWLER_LABEL,
|
||||||
|
MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
|
MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN,
|
||||||
|
bugout_client as bc,
|
||||||
|
moonstream_client as mc,
|
||||||
|
)
|
||||||
|
from moonstream.client import Moonstream # type: ignore
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -18,11 +34,13 @@ def metadata_to_label(
|
||||||
metadata: Optional[Dict[str, Any]],
|
metadata: Optional[Dict[str, Any]],
|
||||||
token_uri_data: TokenURIs,
|
token_uri_data: TokenURIs,
|
||||||
label_name=METADATA_CRAWLER_LABEL,
|
label_name=METADATA_CRAWLER_LABEL,
|
||||||
|
v3: bool = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Creates a label model.
|
Creates a label model with support for v2 and v3 database structures.
|
||||||
"""
|
"""
|
||||||
label_model = get_label_model(blockchain_type)
|
version = 3 if v3 else 2
|
||||||
|
label_model = get_label_model(blockchain_type, version=version)
|
||||||
|
|
||||||
sanityzed_label_data = json.loads(
|
sanityzed_label_data = json.loads(
|
||||||
json.dumps(
|
json.dumps(
|
||||||
|
@ -34,14 +52,34 @@ def metadata_to_label(
|
||||||
).replace(r"\u0000", "")
|
).replace(r"\u0000", "")
|
||||||
)
|
)
|
||||||
|
|
||||||
label = label_model(
|
if v3:
|
||||||
label=label_name,
|
# V3 structure similar to state crawler
|
||||||
label_data=sanityzed_label_data,
|
label_data = {
|
||||||
address=token_uri_data.address,
|
"token_id": token_uri_data.token_id,
|
||||||
block_number=token_uri_data.block_number,
|
"metadata": metadata,
|
||||||
transaction_hash=None,
|
}
|
||||||
block_timestamp=token_uri_data.block_timestamp,
|
|
||||||
)
|
label = label_model(
|
||||||
|
label=label_name,
|
||||||
|
label_name="metadata", # Fixed name for metadata labels
|
||||||
|
label_type="metadata",
|
||||||
|
label_data=label_data,
|
||||||
|
address=HexBytes(token_uri_data.address),
|
||||||
|
block_number=token_uri_data.block_number,
|
||||||
|
# Use a fixed tx hash for metadata since it's not from a transaction
|
||||||
|
block_timestamp=token_uri_data.block_timestamp,
|
||||||
|
block_hash=token_uri_data.block_hash if hasattr(token_uri_data, 'block_hash') else None,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Original v2 structure
|
||||||
|
label = label_model(
|
||||||
|
label=label_name,
|
||||||
|
label_data=sanityzed_label_data,
|
||||||
|
address=token_uri_data.address,
|
||||||
|
block_number=token_uri_data.block_number,
|
||||||
|
transaction_hash=None,
|
||||||
|
block_timestamp=token_uri_data.block_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
return label
|
return label
|
||||||
|
|
||||||
|
@ -60,13 +98,13 @@ def commit_session(db_session: Session) -> None:
|
||||||
|
|
||||||
|
|
||||||
def get_uris_of_tokens(
|
def get_uris_of_tokens(
|
||||||
db_session: Session, blockchain_type: AvailableBlockchainType
|
db_session: Session, blockchain_type: AvailableBlockchainType, version: int = 2
|
||||||
) -> List[TokenURIs]:
|
) -> List[TokenURIs]:
|
||||||
"""
|
"""
|
||||||
Get meatadata URIs.
|
Get meatadata URIs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
label_model = get_label_model(blockchain_type)
|
label_model = get_label_model(blockchain_type, version=version)
|
||||||
|
|
||||||
table = label_model.__tablename__
|
table = label_model.__tablename__
|
||||||
|
|
||||||
|
@ -113,13 +151,13 @@ def get_uris_of_tokens(
|
||||||
|
|
||||||
|
|
||||||
def get_current_metadata_for_address(
|
def get_current_metadata_for_address(
|
||||||
db_session: Session, blockchain_type: AvailableBlockchainType, address: str
|
db_session: Session, blockchain_type: AvailableBlockchainType, address: str, version: int = 2
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Get existing metadata.
|
Get existing metadata.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
label_model = get_label_model(blockchain_type)
|
label_model = get_label_model(blockchain_type, version=version)
|
||||||
|
|
||||||
table = label_model.__tablename__
|
table = label_model.__tablename__
|
||||||
|
|
||||||
|
@ -149,7 +187,7 @@ def get_current_metadata_for_address(
|
||||||
|
|
||||||
|
|
||||||
def get_tokens_id_wich_may_updated(
|
def get_tokens_id_wich_may_updated(
|
||||||
db_session: Session, blockchain_type: AvailableBlockchainType, address: str
|
db_session: Session, blockchain_type: AvailableBlockchainType, address: str, version: int = 2
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Returns a list of tokens which may have updated information.
|
Returns a list of tokens which may have updated information.
|
||||||
|
@ -163,7 +201,7 @@ def get_tokens_id_wich_may_updated(
|
||||||
Required integration with entity API and opcodes crawler.
|
Required integration with entity API and opcodes crawler.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
label_model = get_label_model(blockchain_type)
|
label_model = get_label_model(blockchain_type, version=version)
|
||||||
|
|
||||||
table = label_model.__tablename__
|
table = label_model.__tablename__
|
||||||
|
|
||||||
|
@ -233,14 +271,14 @@ def get_tokens_id_wich_may_updated(
|
||||||
|
|
||||||
|
|
||||||
def clean_labels_from_db(
|
def clean_labels_from_db(
|
||||||
db_session: Session, blockchain_type: AvailableBlockchainType, address: str
|
db_session: Session, blockchain_type: AvailableBlockchainType, address: str, version: int = 2
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Remove existing labels.
|
Remove existing labels.
|
||||||
But keep the latest one for each token.
|
But keep the latest one for each token.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
label_model = get_label_model(blockchain_type)
|
label_model = get_label_model(blockchain_type, version=version)
|
||||||
|
|
||||||
table = label_model.__tablename__
|
table = label_model.__tablename__
|
||||||
|
|
||||||
|
@ -273,3 +311,165 @@ def clean_labels_from_db(
|
||||||
),
|
),
|
||||||
{"address": address, "label": METADATA_CRAWLER_LABEL},
|
{"address": address, "label": METADATA_CRAWLER_LABEL},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_tokens_from_query_api(
|
||||||
|
client: Moonstream,
|
||||||
|
blockchain_type: AvailableBlockchainType,
|
||||||
|
query_name: str,
|
||||||
|
params: dict,
|
||||||
|
token: str,
|
||||||
|
customer_id: Optional[str] = None,
|
||||||
|
instance_id: Optional[str] = None,
|
||||||
|
) -> List[TokenURIs]:
|
||||||
|
"""
|
||||||
|
Get token URIs from Query API results
|
||||||
|
"""
|
||||||
|
|
||||||
|
query_params = {}
|
||||||
|
|
||||||
|
if customer_id and instance_id:
|
||||||
|
query_params["customer_id"] = customer_id
|
||||||
|
query_params["instance_id"] = instance_id
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = recive_S3_data_from_query(
|
||||||
|
client=client,
|
||||||
|
token=token,
|
||||||
|
query_name=query_name,
|
||||||
|
params={},
|
||||||
|
query_params=query_params,
|
||||||
|
custom_body={
|
||||||
|
"blockchain": blockchain_type.value,
|
||||||
|
"params": params,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert query results to TokenURIs format
|
||||||
|
results = []
|
||||||
|
for item in data.get("data", []):
|
||||||
|
results.append(
|
||||||
|
TokenURIs(
|
||||||
|
token_id=str(item.get("token_id")),
|
||||||
|
address=item.get("address"),
|
||||||
|
token_uri=item.get("token_uri"),
|
||||||
|
block_number=item.get("block_number"),
|
||||||
|
block_timestamp=item.get("block_timestamp"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Error fetching data from Query API: {err}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_tokens_to_crawl(
|
||||||
|
db_session: Session,
|
||||||
|
blockchain_type: AvailableBlockchainType,
|
||||||
|
spire_job: Optional[dict] = None,
|
||||||
|
) -> Dict[str, List[TokenURIs]]:
|
||||||
|
"""`
|
||||||
|
Get tokens to crawl either from Query API (if specified in Spire job) or database
|
||||||
|
"""
|
||||||
|
tokens_uri_by_address = {}
|
||||||
|
|
||||||
|
if spire_job:
|
||||||
|
if "query_api" not in spire_job:
|
||||||
|
raise ValueError("Query API is not specified in Spire job")
|
||||||
|
|
||||||
|
# Get tokens from Query API
|
||||||
|
query_config = spire_job["query_api"]
|
||||||
|
client = Moonstream()
|
||||||
|
|
||||||
|
tokens = get_tokens_from_query_api(
|
||||||
|
client=client,
|
||||||
|
blockchain_type=blockchain_type,
|
||||||
|
query_name=query_config["name"],
|
||||||
|
params=query_config["params"],
|
||||||
|
token=MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN,
|
||||||
|
customer_id=spire_job["customer_id"],
|
||||||
|
instance_id=spire_job["instance_id"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Group by address
|
||||||
|
for token in tokens:
|
||||||
|
if token.address not in tokens_uri_by_address:
|
||||||
|
tokens_uri_by_address[token.address] = []
|
||||||
|
tokens_uri_by_address[token.address].append(token)
|
||||||
|
else:
|
||||||
|
# Get tokens from database (existing logic)
|
||||||
|
uris_of_tokens = get_uris_of_tokens(db_session, blockchain_type)
|
||||||
|
for token_uri_data in uris_of_tokens:
|
||||||
|
if token_uri_data.address not in tokens_uri_by_address:
|
||||||
|
tokens_uri_by_address[token_uri_data.address] = []
|
||||||
|
tokens_uri_by_address[token_uri_data.address].append(token_uri_data)
|
||||||
|
|
||||||
|
return tokens_uri_by_address
|
||||||
|
|
||||||
|
def upsert_metadata_labels(
|
||||||
|
db_session: Session,
|
||||||
|
blockchain_type: AvailableBlockchainType,
|
||||||
|
metadata_batch: List[Tuple[TokenURIs, Optional[Dict[str, Any]]]],
|
||||||
|
v3: bool = False,
|
||||||
|
db_batch_size: int = 100,
|
||||||
|
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Batch upsert metadata labels - update if exists, insert if not.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
version = 3 if v3 else 2
|
||||||
|
label_model = get_label_model(blockchain_type, version=version)
|
||||||
|
|
||||||
|
# Prepare batch of labels
|
||||||
|
labels_data = []
|
||||||
|
for token_uri_data, metadata in metadata_batch:
|
||||||
|
|
||||||
|
if v3:
|
||||||
|
# V3 structure
|
||||||
|
label_data = {
|
||||||
|
"token_id": token_uri_data.token_id,
|
||||||
|
"metadata": metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
labels_data.append({
|
||||||
|
"label": METADATA_CRAWLER_LABEL,
|
||||||
|
"label_name": "metadata",
|
||||||
|
"label_type": "metadata",
|
||||||
|
"label_data": label_data,
|
||||||
|
"address": HexBytes(token_uri_data.address),
|
||||||
|
"block_number": token_uri_data.block_number,
|
||||||
|
"block_timestamp": token_uri_data.block_timestamp,
|
||||||
|
"block_hash": getattr(token_uri_data, 'block_hash', None),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# V2 structure
|
||||||
|
label_data = {
|
||||||
|
"type": "metadata",
|
||||||
|
"token_id": token_uri_data.token_id,
|
||||||
|
"metadata": metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
labels_data.append({
|
||||||
|
"label": METADATA_CRAWLER_LABEL,
|
||||||
|
"label_data": label_data,
|
||||||
|
"address": token_uri_data.address,
|
||||||
|
"block_number": token_uri_data.block_number,
|
||||||
|
"transaction_hash": None,
|
||||||
|
"block_timestamp": token_uri_data.block_timestamp,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not labels_data:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create insert statement
|
||||||
|
insert_stmt = insert(label_model).values(labels_data)
|
||||||
|
result_stmt = insert_stmt.on_conflict_do_nothing(
|
||||||
|
)
|
||||||
|
|
||||||
|
db_session.execute(result_stmt)
|
||||||
|
|
||||||
|
db_session.commit()
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Error batch upserting metadata labels: {err}")
|
||||||
|
raise
|
|
@ -3,21 +3,16 @@ from typing import Dict, Optional
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from bugout.app import Bugout
|
from bugout.app import Bugout
|
||||||
from moonstreamtypes.blockchain import AvailableBlockchainType
|
from moonstreamtypes.blockchain import AvailableBlockchainType # type: ignore
|
||||||
|
from moonstream.client import Moonstream # type: ignore
|
||||||
|
|
||||||
# Bugout
|
# APIs
|
||||||
|
## Bugout
|
||||||
BUGOUT_BROOD_URL = os.environ.get("BUGOUT_BROOD_URL", "https://auth.bugout.dev")
|
BUGOUT_BROOD_URL = os.environ.get("BUGOUT_BROOD_URL", "https://auth.bugout.dev")
|
||||||
BUGOUT_SPIRE_URL = os.environ.get("BUGOUT_SPIRE_URL", "https://spire.bugout.dev")
|
BUGOUT_SPIRE_URL = os.environ.get("BUGOUT_SPIRE_URL", "https://spire.bugout.dev")
|
||||||
|
|
||||||
bugout_client = Bugout(brood_api_url=BUGOUT_BROOD_URL, spire_api_url=BUGOUT_SPIRE_URL)
|
bugout_client = Bugout(brood_api_url=BUGOUT_BROOD_URL, spire_api_url=BUGOUT_SPIRE_URL)
|
||||||
|
|
||||||
|
|
||||||
MOONSTREAM_API_URL = os.environ.get("MOONSTREAM_API_URL", "https://api.moonstream.to")
|
|
||||||
MOONSTREAM_ENGINE_URL = os.environ.get(
|
|
||||||
"MOONSTREAM_ENGINE_URL", "https://engineapi.moonstream.to"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
BUGOUT_REQUEST_TIMEOUT_SECONDS_RAW = os.environ.get(
|
BUGOUT_REQUEST_TIMEOUT_SECONDS_RAW = os.environ.get(
|
||||||
"MOONSTREAM_BUGOUT_TIMEOUT_SECONDS", 30
|
"MOONSTREAM_BUGOUT_TIMEOUT_SECONDS", 30
|
||||||
)
|
)
|
||||||
|
@ -31,6 +26,24 @@ except:
|
||||||
|
|
||||||
HUMBUG_REPORTER_CRAWLERS_TOKEN = os.environ.get("HUMBUG_REPORTER_CRAWLERS_TOKEN")
|
HUMBUG_REPORTER_CRAWLERS_TOKEN = os.environ.get("HUMBUG_REPORTER_CRAWLERS_TOKEN")
|
||||||
|
|
||||||
|
|
||||||
|
## Moonstream
|
||||||
|
MOONSTREAM_API_URL = os.environ.get("MOONSTREAM_API_URL", "https://api.moonstream.to")
|
||||||
|
|
||||||
|
moonstream_client = Moonstream()
|
||||||
|
|
||||||
|
|
||||||
|
## Moonstream Engine
|
||||||
|
MOONSTREAM_ENGINE_URL = os.environ.get(
|
||||||
|
"MOONSTREAM_ENGINE_URL", "https://engineapi.moonstream.to"
|
||||||
|
)
|
||||||
|
|
||||||
|
## Moonstream DB
|
||||||
|
MOONSTREAM_DB_V3_CONTROLLER_API = os.environ.get(
|
||||||
|
"MOONSTREAM_DB_V3_CONTROLLER_API", "https://mdb-v3-api.moonstream.to"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Origin
|
# Origin
|
||||||
RAW_ORIGINS = os.environ.get("MOONSTREAM_CORS_ALLOWED_ORIGINS")
|
RAW_ORIGINS = os.environ.get("MOONSTREAM_CORS_ALLOWED_ORIGINS")
|
||||||
if RAW_ORIGINS is None:
|
if RAW_ORIGINS is None:
|
||||||
|
@ -490,3 +503,19 @@ MOONSTREAM_DB_V3_CONTROLLER_API = os.environ.get(
|
||||||
MOONSTREAM_DB_V3_SCHEMA_NAME = os.environ.get(
|
MOONSTREAM_DB_V3_SCHEMA_NAME = os.environ.get(
|
||||||
"MOONSTREAM_DB_V3_SCHEMA_NAME", "blockchain"
|
"MOONSTREAM_DB_V3_SCHEMA_NAME", "blockchain"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MOONSTREAM_METADATA_TASKS_JOURNAL = os.environ.get(
|
||||||
|
"MOONSTREAM_METADATA_TASKS_JOURNAL", ""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
### MOONSTREAM_PUBLIC_QUERIES_USER_TOKEN
|
||||||
|
|
||||||
|
MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN = os.environ.get(
|
||||||
|
"MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN", ""
|
||||||
|
)
|
||||||
|
if MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN == "":
|
||||||
|
raise ValueError(
|
||||||
|
"MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN environment variable must be set"
|
||||||
|
)
|
||||||
|
|
|
@ -824,4 +824,4 @@ def main() -> None:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
|
@ -2,4 +2,4 @@
|
||||||
Moonstream crawlers version.
|
Moonstream crawlers version.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
MOONCRAWL_VERSION = "0.5.1"
|
MOONCRAWL_VERSION = "0.5.3"
|
||||||
|
|
|
@ -38,8 +38,8 @@ setup(
|
||||||
"chardet",
|
"chardet",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"moonstreamdb>=0.4.6",
|
"moonstreamdb>=0.4.6",
|
||||||
"moonstreamdb-v3>=0.1.3",
|
"moonstreamdb-v3>=0.1.4",
|
||||||
"moonstream-types>=0.0.10",
|
"moonstream-types>=0.0.11",
|
||||||
"moonstream>=0.1.2",
|
"moonstream>=0.1.2",
|
||||||
"moonworm[moonstream]>=0.9.3",
|
"moonworm[moonstream]>=0.9.3",
|
||||||
"humbug",
|
"humbug",
|
||||||
|
|
|
@ -16,7 +16,7 @@ from bugout.data import (
|
||||||
)
|
)
|
||||||
from bugout.exceptions import BugoutResponseException
|
from bugout.exceptions import BugoutResponseException
|
||||||
from fastapi import APIRouter, Body, Path, Query, Request
|
from fastapi import APIRouter, Body, Path, Query, Request
|
||||||
from moonstreamdb.blockchain import AvailableBlockchainType
|
from moonstreamtypes.blockchain import AvailableBlockchainType
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
from .. import data
|
from .. import data
|
||||||
|
|
Ładowanie…
Reference in New Issue