moonstream/moonstreamapi/moonstreamapi/routes/queries.py

453 wiersze
15 KiB
Python
Czysty Zwykły widok Historia

2022-02-15 22:55:20 +00:00
"""
2022-02-16 16:12:42 +00:00
The Moonstream queries HTTP API
2022-02-15 22:55:20 +00:00
"""
import logging
from typing import Any, Dict, List, Optional, Tuple, Union
from uuid import UUID
2022-02-15 22:55:20 +00:00
2022-02-17 10:32:23 +00:00
from bugout.data import BugoutResources, BugoutJournalEntryContent, BugoutJournalEntry
2022-02-17 10:22:20 +00:00
from bugout.exceptions import BugoutResponseException
from fastapi import APIRouter, Body, Request
import requests # type: ignore
2023-04-27 14:21:48 +00:00
from sqlalchemy import text
2022-02-15 22:55:20 +00:00
2022-02-16 16:12:42 +00:00
from .. import data
from ..actions import (
get_query_by_name,
name_normalization,
NameNormalizationException,
query_parameter_hash,
generate_s3_access_links,
)
2022-02-15 22:55:20 +00:00
from ..middleware import MoonstreamHTTPException
from ..settings import (
MOONSTREAM_ADMIN_ACCESS_TOKEN,
2022-02-17 10:22:20 +00:00
MOONSTREAM_APPLICATION_ID,
2022-02-15 22:55:20 +00:00
MOONSTREAM_CRAWLERS_SERVER_URL,
MOONSTREAM_CRAWLERS_SERVER_PORT,
MOONSTREAM_S3_QUERIES_BUCKET,
MOONSTREAM_S3_QUERIES_BUCKET_PREFIX,
2022-02-17 10:22:20 +00:00
MOONSTREAM_QUERIES_JOURNAL_ID,
2022-02-15 22:55:20 +00:00
)
from ..settings import bugout_client as bc
logger = logging.getLogger(__name__)
2022-03-08 15:10:06 +00:00
router = APIRouter(
prefix="/queries",
)
2022-02-15 22:55:20 +00:00
2022-03-09 13:05:31 +00:00
@router.get("/list", tags=["queries"])
2022-02-18 17:58:16 +00:00
async def get_list_of_queries_handler(request: Request) -> List[Dict[str, Any]]:
token = request.state.token
# Check already existed queries
params = {
2022-03-07 17:14:56 +00:00
"type": data.BUGOUT_RESOURCE_QUERY_RESOLVER,
}
try:
resources: BugoutResources = bc.list_resources(token=token, params=params)
except BugoutResponseException as e:
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-02-17 13:24:11 +00:00
users_queries: List[Dict[str, Any]] = [
resource.resource_data for resource in resources.resources
]
return users_queries
@router.post("/", tags=["queries"])
2022-02-17 10:22:20 +00:00
async def create_query_handler(
2022-03-08 13:18:27 +00:00
request: Request, query_applied: data.PreapprovedQuery = Body(...)
2022-02-17 10:32:23 +00:00
) -> BugoutJournalEntry:
2022-02-17 10:22:20 +00:00
"""
Create query in bugout journal
"""
token = request.state.token
user = request.state.user
# Check already existed queries
params = {
2022-03-07 17:14:56 +00:00
"type": data.BUGOUT_RESOURCE_QUERY_RESOLVER,
2022-02-17 10:22:20 +00:00
}
try:
resources: BugoutResources = bc.list_resources(token=token, params=params)
except BugoutResponseException as e:
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
used_queries: List[str] = [
resource.resource_data["name"] for resource in resources.resources
]
2022-03-08 15:06:50 +00:00
try:
query_name = name_normalization(query_applied.name)
except NameNormalizationException:
raise MoonstreamHTTPException(
status_code=403,
detail=f"Provided query name can't be normalize please select different.",
)
2022-02-17 10:22:20 +00:00
if query_name in used_queries:
raise MoonstreamHTTPException(
status_code=404,
2022-03-08 15:06:50 +00:00
detail=f"Provided query name already use. Please remove it or use PUT /{query_name} for update query",
2022-02-17 10:22:20 +00:00
)
try:
# Put query to journal
entry = bc.create_entry(
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
journal_id=MOONSTREAM_QUERIES_JOURNAL_ID,
title=f"Query:{query_name}",
tags=["type:query"],
content=query_applied.query,
)
except BugoutResponseException as e:
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
try:
# create resource query_name_resolver
bc.create_resource(
token=token,
application_id=MOONSTREAM_APPLICATION_ID,
resource_data={
2022-03-07 17:14:56 +00:00
"type": data.BUGOUT_RESOURCE_QUERY_RESOLVER,
2022-02-17 10:22:20 +00:00
"user_id": str(user.id),
"user": str(user.username),
2022-02-17 10:22:20 +00:00
"name": query_name,
"entry_id": str(entry.id),
},
)
except BugoutResponseException as e:
logger.error(f"Error creating name resolving resource: {str(e)}")
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
try:
bc.update_tags(
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
journal_id=MOONSTREAM_QUERIES_JOURNAL_ID,
entry_id=entry.id,
tags=[f"query_id:{entry.id}", f"preapprove"],
)
except BugoutResponseException as e:
logger.error(f"Error in applind tags to query entry: {str(e)}")
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-02-17 10:32:23 +00:00
return entry
2022-02-17 10:22:20 +00:00
@router.get("/{query_name}/query", tags=["queries"])
2023-04-27 14:21:48 +00:00
async def get_query_handler(
request: Request, query_name: str
) -> data.QueryInfoResponse:
2022-02-17 10:22:20 +00:00
token = request.state.token
2022-03-07 17:14:56 +00:00
try:
query_id = get_query_by_name(query_name, token)
2022-03-08 15:06:50 +00:00
except NameNormalizationException:
raise MoonstreamHTTPException(
status_code=403,
detail=f"Provided query name can't be normalize please select different.",
)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-03-07 17:14:56 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-02-17 10:22:20 +00:00
try:
entry = bc.get_entry(
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
journal_id=MOONSTREAM_QUERIES_JOURNAL_ID,
entry_id=query_id,
)
except BugoutResponseException as e:
2022-03-07 17:14:56 +00:00
logger.error(f"Error in get query: {str(e)}")
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2023-04-27 14:21:48 +00:00
try:
2023-05-03 14:00:20 +00:00
if entry.content is None:
raise MoonstreamHTTPException(
status_code=403, detail=f"Query is empty. Please update it."
)
2023-04-27 14:21:48 +00:00
query = text(entry.content)
except Exception as e:
raise MoonstreamHTTPException(
status_code=500, internal_error=e, detail="Error in query parsing"
)
2023-05-04 13:28:37 +00:00
query_parameters_names = list(query._bindparams.keys())
2023-04-27 14:21:48 +00:00
2023-05-04 13:28:37 +00:00
tags_dict = {
tag.split(":")[0]: (tag.split(":")[1] if ":" in tag else True)
for tag in entry.tags
}
query_parameters: Dict[str, Any] = {}
2023-04-27 14:21:48 +00:00
2023-05-04 13:28:37 +00:00
for param in query_parameters_names:
if param in tags_dict:
query_parameters[param] = tags_dict[param]
else:
query_parameters[param] = None
2023-04-27 14:21:48 +00:00
return data.QueryInfoResponse(
2023-05-03 13:48:40 +00:00
query=entry.content,
query_id=str(entry.id),
2023-05-04 13:28:37 +00:00
preapprove="preapprove" in tags_dict,
approved="approved" in tags_dict,
2023-04-27 14:21:48 +00:00
parameters=query_parameters,
created_at=entry.created_at,
updated_at=entry.updated_at,
)
2022-02-17 10:22:20 +00:00
@router.put("/{query_name}", tags=["queries"])
async def update_query_handler(
request: Request,
query_name: str,
request_update: data.UpdateQueryRequest = Body(...),
2022-02-17 10:32:23 +00:00
) -> BugoutJournalEntryContent:
2022-02-17 10:22:20 +00:00
token = request.state.token
2022-03-07 17:14:56 +00:00
try:
query_id = get_query_by_name(query_name, token)
2022-03-08 15:06:50 +00:00
except NameNormalizationException:
raise MoonstreamHTTPException(
status_code=403,
detail=f"Provided query name can't be normalize please select different.",
)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-03-07 17:14:56 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-02-17 10:22:20 +00:00
try:
entry = bc.update_entry_content(
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
journal_id=MOONSTREAM_QUERIES_JOURNAL_ID,
entry_id=query_id,
title=query_name,
content=request_update.query,
tags=["preapprove"],
)
except BugoutResponseException as e:
logger.error(f"Error in updating query: {str(e)}")
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
return entry
2022-03-07 17:14:56 +00:00
@router.post(
2022-03-08 15:10:06 +00:00
"/{query_name}/update_data",
tags=["queries"],
2022-03-07 17:14:56 +00:00
)
2022-02-15 22:55:20 +00:00
async def update_query_data_handler(
2022-02-17 10:22:20 +00:00
request: Request,
query_name: str,
request_update: data.UpdateDataRequest = Body(...),
2022-03-07 17:14:56 +00:00
) -> Optional[data.QueryPresignUrl]:
2022-02-15 22:55:20 +00:00
"""
Request update data on S3 bucket
"""
2022-02-17 10:22:20 +00:00
token = request.state.token
2022-03-07 17:14:56 +00:00
try:
query_id = get_query_by_name(query_name, token)
2022-03-08 15:06:50 +00:00
except NameNormalizationException:
raise MoonstreamHTTPException(
status_code=403,
detail=f"Provided query name can't be normalize please select different.",
)
2022-03-09 12:29:02 +00:00
except Exception as e:
2022-03-07 17:14:56 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-02-17 10:22:20 +00:00
2022-02-15 22:55:20 +00:00
try:
entries = bc.search(
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
journal_id=MOONSTREAM_QUERIES_JOURNAL_ID,
2022-03-08 13:18:27 +00:00
query=f"tag:approved tag:query_id:{query_id} !tag:preapprove",
2022-02-15 22:55:20 +00:00
limit=1,
2022-02-16 16:12:42 +00:00
timeout=5,
2022-02-15 22:55:20 +00:00
)
2022-03-07 17:14:56 +00:00
if len(entries.results) == 0:
raise MoonstreamHTTPException(
status_code=403, detail="Query not approved yet."
)
s3_response = None
if entries.results[0].content:
2022-02-15 22:55:20 +00:00
content = entries.results[0].content
2022-02-16 16:12:42 +00:00
tags = entries.results[0].tags
file_type = "json"
if "ext:csv" in tags:
file_type = "csv"
2022-02-15 22:55:20 +00:00
responce = requests.post(
2022-02-16 16:12:42 +00:00
f"{MOONSTREAM_CRAWLERS_SERVER_URL}:{MOONSTREAM_CRAWLERS_SERVER_PORT}/jobs/{query_id}/query_update",
json={
"query": content,
"params": request_update.params,
"file_type": file_type,
},
timeout=5,
2022-02-15 22:55:20 +00:00
)
if responce.status_code != 200:
raise MoonstreamHTTPException(
2022-03-08 15:10:06 +00:00
status_code=responce.status_code,
detail=responce.text,
2022-02-15 22:55:20 +00:00
)
2022-03-07 17:14:56 +00:00
s3_response = data.QueryPresignUrl(**responce.json())
2022-03-09 12:29:02 +00:00
except BugoutResponseException as e:
logger.error(f"Error in updating query: {str(e)}")
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
except Exception as e:
2022-02-16 00:55:14 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-03-09 12:29:02 +00:00
2022-03-07 17:14:56 +00:00
return s3_response
2022-02-17 10:22:20 +00:00
@router.post("/{query_name}", tags=["queries"])
2022-02-18 17:58:16 +00:00
async def get_access_link_handler(
2022-03-08 15:10:06 +00:00
request: Request,
query_name: str,
request_update: data.UpdateDataRequest = Body(...),
2022-03-07 17:14:56 +00:00
) -> Optional[data.QueryPresignUrl]:
2022-02-17 10:22:20 +00:00
"""
2022-03-08 13:18:27 +00:00
Request S3 presign url
2022-02-17 10:22:20 +00:00
"""
# get real connect to query_id
token = request.state.token
2022-03-07 17:14:56 +00:00
try:
query_id = get_query_by_name(query_name, token)
2022-03-08 15:06:50 +00:00
except NameNormalizationException:
raise MoonstreamHTTPException(
status_code=403,
detail=f"Provided query name can't be normalize please select different.",
)
2022-03-09 12:29:02 +00:00
except Exception as e:
logger.error(f"Error in get query: {str(e)}")
2022-03-07 17:14:56 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-02-17 10:22:20 +00:00
try:
entries = bc.search(
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
journal_id=MOONSTREAM_QUERIES_JOURNAL_ID,
2022-03-07 17:14:56 +00:00
query=f"tag:approved tag:query_id:{query_id} !tag:preapprove",
2022-02-17 10:22:20 +00:00
limit=1,
timeout=5,
)
2022-03-07 17:14:56 +00:00
s3_response = None
2022-02-17 10:22:20 +00:00
if entries.results and entries.results[0].content:
passed_params = dict(request_update.params)
2022-02-17 10:22:20 +00:00
tags = entries.results[0].tags
file_type = "json"
if "ext:csv" in tags:
file_type = "csv"
params_hash = query_parameter_hash(passed_params)
bucket = MOONSTREAM_S3_QUERIES_BUCKET
key = f"{MOONSTREAM_S3_QUERIES_BUCKET_PREFIX}/queries/{query_id}/{params_hash}/data.{file_type}"
stats_presigned_url = generate_s3_access_links(
method_name="get_object",
bucket=bucket,
key=key,
expiration=300000,
http_method="GET",
2022-02-17 10:22:20 +00:00
)
2022-03-07 17:14:56 +00:00
s3_response = data.QueryPresignUrl(url=stats_presigned_url)
2022-03-09 12:29:02 +00:00
except BugoutResponseException as e:
logger.error(f"Error in get access link: {str(e)}")
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
except Exception as e:
logger.error(f"Error in get access link: {str(e)}")
2022-02-17 10:22:20 +00:00
raise MoonstreamHTTPException(status_code=500, internal_error=e)
2022-03-09 12:29:02 +00:00
2022-03-07 17:14:56 +00:00
return s3_response
@router.delete("/{query_name}", tags=["queries"])
async def remove_query_handler(
2022-03-08 15:10:06 +00:00
request: Request,
query_name: str,
) -> BugoutJournalEntry:
"""
2022-03-08 13:18:27 +00:00
Request delete query from journal
"""
token = request.state.token
2022-03-07 17:14:56 +00:00
params = {"type": data.BUGOUT_RESOURCE_QUERY_RESOLVER, "name": query_name}
try:
resources: BugoutResources = bc.list_resources(token=token, params=params)
except BugoutResponseException as e:
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
raise MoonstreamHTTPException(status_code=500, internal_error=e)
query_ids: Dict[str, Tuple[UUID, Union[UUID, str]]] = {
resource.resource_data["name"]: (
resource.id,
resource.resource_data["entry_id"],
)
for resource in resources.resources
}
if len(query_ids) == 0:
2022-03-08 13:18:27 +00:00
raise MoonstreamHTTPException(status_code=404, detail="Query does not exists")
try:
2022-02-18 17:58:16 +00:00
bc.delete_resource(token=token, resource_id=query_ids[query_name][0])
except BugoutResponseException as e:
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
raise MoonstreamHTTPException(status_code=500, internal_error=e)
try:
entry = bc.delete_entry(
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
journal_id=MOONSTREAM_QUERIES_JOURNAL_ID,
entry_id=query_ids[query_name][1],
)
except BugoutResponseException as e:
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
2022-03-09 12:29:02 +00:00
except Exception as e:
raise MoonstreamHTTPException(status_code=500, internal_error=e)
return entry