moonstream/crawlers/mooncrawl/mooncrawl/api.py

314 wiersze
9.8 KiB
Python
Czysty Zwykły widok Historia

2021-11-13 15:51:32 +00:00
"""
The Mooncrawl HTTP API
"""
2022-05-26 12:41:58 +00:00
import logging
import time
2022-03-08 13:18:27 +00:00
from cgi import test
2022-11-24 13:02:32 +00:00
from datetime import timedelta
2022-05-26 12:41:58 +00:00
from typing import Any, Dict, List
2021-12-21 15:23:26 +00:00
from uuid import UUID
2021-11-13 15:51:32 +00:00
2022-01-13 18:01:05 +00:00
import boto3 # type: ignore
2023-08-01 08:58:36 +00:00
from bugout.data import BugoutJournalEntity, BugoutResource
2022-05-26 12:41:58 +00:00
from fastapi import BackgroundTasks, FastAPI
2021-11-13 15:51:32 +00:00
from fastapi.middleware.cors import CORSMiddleware
2023-06-16 15:08:56 +00:00
from moonstreamdb.blockchain import (
AvailableBlockchainType,
get_block_model,
2023-08-01 08:58:36 +00:00
get_label_model,
2023-06-16 15:08:56 +00:00
get_transaction_model,
)
2022-02-17 10:22:20 +00:00
from sqlalchemy import text
2021-11-13 15:51:32 +00:00
2023-08-01 08:58:36 +00:00
from . import data
from .actions import (
2023-08-01 08:58:36 +00:00
EntityCollectionNotFoundException,
generate_s3_access_links,
get_entity_subscription_collection_id,
2023-08-01 08:58:36 +00:00
query_parameter_hash,
)
2021-11-13 15:51:32 +00:00
from .middleware import MoonstreamHTTPException
2021-12-21 15:23:26 +00:00
from .settings import (
BUGOUT_RESOURCE_TYPE_ENTITY_SUBSCRIPTION,
2022-05-26 12:41:58 +00:00
DOCS_TARGET_PATH,
2023-08-01 08:58:36 +00:00
LINKS_EXPIRATION_TIME,
MOONSTREAM_ADMIN_ACCESS_TOKEN,
MOONSTREAM_S3_QUERIES_BUCKET,
MOONSTREAM_S3_QUERIES_BUCKET_PREFIX,
2023-08-01 08:58:36 +00:00
MOONSTREAM_S3_SMARTCONTRACTS_ABI_BUCKET,
2021-12-21 15:23:26 +00:00
MOONSTREAM_S3_SMARTCONTRACTS_ABI_PREFIX,
2022-05-26 12:41:58 +00:00
ORIGINS,
2021-12-21 15:23:26 +00:00
)
2023-08-01 08:58:36 +00:00
from .settings import bugout_client as bc
2022-02-16 00:29:12 +00:00
from .stats_worker import dashboard, queries
2022-05-26 12:41:58 +00:00
from .version import MOONCRAWL_VERSION
2021-11-13 15:51:32 +00:00
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
tags_metadata = [
{"name": "jobs", "description": "Trigger crawler jobs."},
{"name": "time", "description": "Server timestamp endpoints."},
]
app = FastAPI(
title=f"Mooncrawl HTTP API",
description="Mooncrawl API endpoints.",
version=MOONCRAWL_VERSION,
openapi_tags=tags_metadata,
openapi_url="/openapi.json",
docs_url=None,
redoc_url=f"/{DOCS_TARGET_PATH}",
)
app.add_middleware(
CORSMiddleware,
allow_origins=ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/ping", response_model=data.PingResponse)
async def ping_handler() -> data.PingResponse:
"""
Check server status.
"""
return data.PingResponse(status="ok")
@app.get("/version", response_model=data.VersionResponse)
async def version_handler() -> data.VersionResponse:
"""
Get server version.
"""
return data.VersionResponse(version=MOONCRAWL_VERSION)
@app.get("/now", tags=["time"])
async def now_handler() -> data.NowResponse:
"""
Get server current time.
"""
return data.NowResponse(epoch_time=time.time())
2021-12-21 15:23:26 +00:00
@app.post("/jobs/stats_update", tags=["jobs"])
2021-12-16 13:26:04 +00:00
async def status_handler(
2022-03-08 15:10:06 +00:00
stats_update: data.StatsUpdateRequest,
background_tasks: BackgroundTasks,
2021-12-16 13:26:04 +00:00
):
2021-11-13 15:51:32 +00:00
"""
2021-12-16 13:26:04 +00:00
Update dashboard endpoint create are tasks for update.
2021-11-13 15:51:32 +00:00
"""
2021-12-21 15:23:26 +00:00
dashboard_resource: BugoutResource = bc.get_resource(
2022-03-08 15:10:06 +00:00
token=stats_update.token,
resource_id=stats_update.dashboard_id,
timeout=10,
2021-12-21 15:23:26 +00:00
)
try:
2023-08-01 08:58:36 +00:00
journal_id = get_entity_subscription_collection_id(
resource_type=BUGOUT_RESOURCE_TYPE_ENTITY_SUBSCRIPTION,
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
2023-04-25 14:10:46 +00:00
user_id=UUID(stats_update.user_id),
)
except EntityCollectionNotFoundException as e:
raise MoonstreamHTTPException(
status_code=404,
detail="User subscriptions collection not found",
internal_error=e,
)
except Exception as e:
logger.error(
f"Error listing subscriptions for user ({stats_update.user_id}) with token: {stats_update.token}, error: {str(e)}"
)
2021-12-21 15:23:26 +00:00
# get subscription entities
2021-12-21 15:23:26 +00:00
s3_client = boto3.client("s3")
2023-08-01 08:58:36 +00:00
subscription_by_id: Dict[str, BugoutJournalEntity] = {}
2023-04-25 14:10:46 +00:00
for dashboard_subscription_filters in dashboard_resource.resource_data[
"subscription_settings"
]:
# get subscription by id
2023-08-01 08:58:36 +00:00
subscription: BugoutJournalEntity = bc.get_entity(
2023-04-25 14:10:46 +00:00
token=stats_update.token,
2023-08-01 08:58:36 +00:00
journal_id=journal_id,
2023-04-25 14:10:46 +00:00
entity_id=dashboard_subscription_filters["subscription_id"],
)
2023-08-01 08:58:36 +00:00
subscription_by_id[str(subscription.id)] = subscription
2023-04-25 14:10:46 +00:00
2021-11-13 15:51:32 +00:00
try:
2021-12-16 13:26:04 +00:00
background_tasks.add_task(
dashboard.stats_generate_api_task,
2021-12-21 15:23:26 +00:00
timescales=stats_update.timescales,
dashboard=dashboard_resource,
subscription_by_id=subscription_by_id,
2021-12-16 13:26:04 +00:00
)
2021-11-13 15:51:32 +00:00
except Exception as e:
2022-03-08 13:18:27 +00:00
logger.error(
f"Unhandled /jobs/stats_update start background task exception, error: {e}"
)
2021-11-13 15:51:32 +00:00
raise MoonstreamHTTPException(status_code=500)
2021-12-21 15:23:26 +00:00
presigned_urls_response: Dict[UUID, Any] = {}
for dashboard_subscription_filters in dashboard_resource.resource_data[
"subscription_settings"
]:
# get subscription by id
2023-04-25 14:10:46 +00:00
subscription_entity = subscription_by_id[
dashboard_subscription_filters["subscription_id"]
]
2023-08-29 03:41:47 +00:00
for reqired_field in subscription.required_fields: # type: ignore
if "subscription_type_id" in reqired_field:
subscriprions_type = reqired_field["subscription_type_id"]
2021-12-21 15:23:26 +00:00
for timescale in stats_update.timescales:
2023-08-01 08:58:36 +00:00
presigned_urls_response[subscription_entity.id] = {}
2021-12-21 15:23:26 +00:00
try:
2023-04-25 14:10:46 +00:00
result_key = f"{MOONSTREAM_S3_SMARTCONTRACTS_ABI_PREFIX}/{dashboard.blockchain_by_subscription_id[subscriprions_type]}/contracts_data/{subscription_entity.address}/{stats_update.dashboard_id}/v1/{timescale}.json"
2022-01-10 10:46:46 +00:00
object = s3_client.head_object(
2023-04-27 16:04:07 +00:00
Bucket=MOONSTREAM_S3_SMARTCONTRACTS_ABI_BUCKET, Key=result_key
2022-01-10 10:46:46 +00:00
)
2021-12-21 15:23:26 +00:00
stats_presigned_url = s3_client.generate_presigned_url(
"get_object",
Params={
2023-04-27 16:04:07 +00:00
"Bucket": MOONSTREAM_S3_SMARTCONTRACTS_ABI_BUCKET,
2021-12-21 15:23:26 +00:00
"Key": result_key,
},
ExpiresIn=300,
HttpMethod="GET",
)
2022-01-10 10:46:46 +00:00
2023-08-01 08:58:36 +00:00
presigned_urls_response[subscription_entity.id][timescale] = {
2022-01-10 10:46:46 +00:00
"url": stats_presigned_url,
2022-01-12 08:50:53 +00:00
"headers": {
"If-Modified-Since": (
object["LastModified"] + timedelta(seconds=1)
).strftime("%c")
},
2022-01-10 10:46:46 +00:00
}
2021-12-21 15:23:26 +00:00
except Exception as err:
logger.warning(
2023-04-27 16:04:07 +00:00
f"Can't generate S3 presigned url in stats endpoint for Bucket:{MOONSTREAM_S3_SMARTCONTRACTS_ABI_BUCKET}, Key:{result_key} get error:{err}"
2021-12-21 15:23:26 +00:00
)
return presigned_urls_response
2022-02-16 00:29:12 +00:00
@app.post("/jobs/{query_id}/query_update", tags=["jobs"])
async def queries_data_update_handler(
2022-03-08 14:00:39 +00:00
query_id: str,
request_data: data.QueryDataUpdate,
background_tasks: BackgroundTasks,
2022-02-16 00:55:14 +00:00
) -> Dict[str, Any]:
2022-11-24 13:40:26 +00:00
# Check if query is valid
try:
queries.query_validation(request_data.query)
except queries.QueryNotValid:
logger.error(f"Query not pass validation check query id: {query_id}")
raise MoonstreamHTTPException(
status_code=401,
detail="Incorrect query is not valid with current restrictions",
)
except Exception as e:
logger.error(f"Unhandled query execute exception, error: {e}")
raise MoonstreamHTTPException(status_code=500)
2022-02-16 00:29:12 +00:00
2023-06-16 15:08:56 +00:00
requested_query = request_data.query
if request_data.blockchain:
2023-06-18 22:19:10 +00:00
if request_data.blockchain not in [i.value for i in AvailableBlockchainType]:
2023-06-16 15:08:56 +00:00
logger.error(f"Unknown blockchain {request_data.blockchain}")
raise MoonstreamHTTPException(status_code=403, detail="Unknown blockchain")
2023-06-16 15:15:39 +00:00
blockchain = AvailableBlockchainType(request_data.blockchain)
2023-06-16 15:08:56 +00:00
requested_query = (
requested_query.replace(
2023-06-18 22:19:10 +00:00
"__transactions_table__",
2023-06-16 15:15:39 +00:00
get_transaction_model(blockchain).__tablename__,
2023-06-16 15:08:56 +00:00
)
.replace(
2023-06-18 22:19:10 +00:00
"__blocks_table__",
2023-06-16 15:15:39 +00:00
get_block_model(blockchain).__tablename__,
2023-06-16 15:08:56 +00:00
)
.replace(
2023-06-18 22:19:10 +00:00
"__labels_table__",
2023-06-16 15:15:39 +00:00
get_label_model(blockchain).__tablename__,
2023-06-16 15:08:56 +00:00
)
)
2022-11-24 13:40:26 +00:00
# Check if it can transform to TextClause
try:
2023-06-18 22:19:10 +00:00
query = text(requested_query)
2022-11-24 13:40:26 +00:00
except Exception as e:
logger.error(
f"Can't parse query {query_id} to TextClause in drones /query_update endpoint, error: {e}"
)
raise MoonstreamHTTPException(status_code=500, detail="Can't parse query")
# Get requried keys for query
expected_query_parameters = query._bindparams.keys()
2022-03-08 13:18:27 +00:00
# request.params validations
passed_params = {
2022-11-29 13:06:27 +00:00
key: queries.from_json_types(value)
2022-03-08 14:00:39 +00:00
for key, value in request_data.params.items()
2022-03-08 13:18:27 +00:00
if key in expected_query_parameters
}
if len(passed_params) != len(expected_query_parameters):
logger.error(
f"Unmatched amount of applying query parameters: {passed_params}, query_id:{query_id}."
)
raise MoonstreamHTTPException(
status_code=500, detail="Unmatched amount of applying query parameters"
)
2022-11-30 15:31:19 +00:00
params_hash = query_parameter_hash(passed_params)
2022-11-24 13:02:32 +00:00
2022-11-28 14:52:30 +00:00
bucket = MOONSTREAM_S3_QUERIES_BUCKET
key = f"{MOONSTREAM_S3_QUERIES_BUCKET_PREFIX}/queries/{query_id}/{params_hash}/data.{request_data.file_type}"
2022-02-16 00:29:12 +00:00
try:
background_tasks.add_task(
queries.data_generate,
2022-02-16 16:12:42 +00:00
query_id=f"{query_id}",
2022-03-08 14:00:39 +00:00
file_type=request_data.file_type,
2022-11-28 14:52:30 +00:00
bucket=bucket,
key=key,
2022-11-24 13:40:26 +00:00
query=query,
2022-11-24 13:02:32 +00:00
params=passed_params,
params_hash=params_hash,
2022-02-16 00:29:12 +00:00
)
except Exception as e:
2022-03-08 13:18:27 +00:00
logger.error(f"Unhandled query execute exception, error: {e}")
2022-02-16 00:29:12 +00:00
raise MoonstreamHTTPException(status_code=500)
2022-11-24 13:40:26 +00:00
stats_presigned_url = generate_s3_access_links(
method_name="get_object",
2022-11-28 14:52:30 +00:00
bucket=bucket,
key=key,
2022-11-24 13:40:26 +00:00
expiration=LINKS_EXPIRATION_TIME,
http_method="GET",
2022-02-16 00:29:12 +00:00
)
2022-02-16 00:39:49 +00:00
return {"url": stats_presigned_url}