kopia lustrzana https://github.com/bugout-dev/moonstream
173 wiersze
4.3 KiB
Python
173 wiersze
4.3 KiB
Python
import hashlib
|
|
import json
|
|
import logging
|
|
import time
|
|
import uuid
|
|
from collections import OrderedDict
|
|
from datetime import datetime
|
|
from typing import Any, Dict, Optional, Union
|
|
|
|
import boto3 # type: ignore
|
|
import requests # type: ignore
|
|
from bugout.data import BugoutResources
|
|
from bugout.exceptions import BugoutResponseException
|
|
from moonstream.client import ( # type: ignore
|
|
ENDPOINT_QUERIES,
|
|
Moonstream,
|
|
MoonstreamQueryResultUrl,
|
|
)
|
|
|
|
from .middleware import MoonstreamHTTPException
|
|
from .settings import bugout_client as bc
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class EntityCollectionNotFoundException(Exception):
|
|
"""
|
|
Raised when entity collection is not found
|
|
"""
|
|
|
|
|
|
def push_data_to_bucket(
|
|
data: Any, key: str, bucket: str, metadata: Dict[str, Any] = {}
|
|
) -> None:
|
|
s3 = boto3.client("s3")
|
|
s3.put_object(
|
|
Body=data,
|
|
Bucket=bucket,
|
|
Key=key,
|
|
ContentType="application/json",
|
|
Metadata=metadata,
|
|
)
|
|
|
|
logger.info(f"Data pushed to bucket: s3://{bucket}/{key}")
|
|
|
|
|
|
def generate_s3_access_links(
|
|
method_name: str,
|
|
bucket: str,
|
|
key: str,
|
|
http_method: str,
|
|
expiration: int = 300,
|
|
) -> str:
|
|
s3 = boto3.client("s3")
|
|
stats_presigned_url = s3.generate_presigned_url(
|
|
method_name,
|
|
Params={
|
|
"Bucket": bucket,
|
|
"Key": key,
|
|
},
|
|
ExpiresIn=expiration,
|
|
HttpMethod=http_method,
|
|
)
|
|
|
|
return stats_presigned_url
|
|
|
|
|
|
def query_parameter_hash(params: Dict[str, Any]) -> str:
|
|
"""
|
|
Generate a hash of the query parameters
|
|
"""
|
|
|
|
hash = hashlib.md5(
|
|
json.dumps(OrderedDict(params), sort_keys=True).encode("utf-8")
|
|
).hexdigest()
|
|
|
|
return hash
|
|
|
|
|
|
def get_entity_subscription_collection_id(
|
|
resource_type: str,
|
|
token: Union[uuid.UUID, str],
|
|
user_id: uuid.UUID,
|
|
) -> str:
|
|
"""
|
|
Get collection_id from brood resources. If collection not exist and create_if_not_exist is True
|
|
"""
|
|
|
|
params = {
|
|
"type": resource_type,
|
|
"user_id": str(user_id),
|
|
}
|
|
try:
|
|
resources: BugoutResources = bc.list_resources(token=token, params=params)
|
|
except BugoutResponseException as e:
|
|
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error listing subscriptions for user ({user_id}) with token ({token}), error: {str(e)}"
|
|
)
|
|
raise MoonstreamHTTPException(status_code=500, internal_error=e)
|
|
|
|
if len(resources.resources) == 0:
|
|
raise EntityCollectionNotFoundException("Subscription collection not found.")
|
|
else:
|
|
resource = resources.resources[0]
|
|
return resource.resource_data["collection_id"]
|
|
|
|
|
|
def recive_S3_data_from_query(
|
|
client: Moonstream,
|
|
token: Union[str, uuid.UUID],
|
|
query_name: str,
|
|
params: Dict[str, Any] = {},
|
|
time_await: int = 2,
|
|
max_retries: int = 30,
|
|
custom_body: Optional[Dict[str, Any]] = None,
|
|
) -> Any:
|
|
"""
|
|
Await the query to be update data on S3 with if_modified_since and return new the data.
|
|
"""
|
|
|
|
keep_going = True
|
|
|
|
repeat = 0
|
|
|
|
if_modified_since_datetime = datetime.utcnow()
|
|
if_modified_since = if_modified_since_datetime.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
|
|
|
time.sleep(2)
|
|
if custom_body:
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
}
|
|
json = custom_body
|
|
|
|
response = requests.post(
|
|
url=f"{client.api.endpoints[ENDPOINT_QUERIES]}/{query_name}/update_data",
|
|
headers=headers,
|
|
json=json,
|
|
timeout=5,
|
|
)
|
|
data_url = MoonstreamQueryResultUrl(url=response.json()["url"])
|
|
else:
|
|
data_url = client.exec_query(
|
|
token=token,
|
|
name=query_name,
|
|
params=params,
|
|
) # S3 presign_url
|
|
|
|
while keep_going:
|
|
time.sleep(time_await)
|
|
try:
|
|
data_response = requests.get(
|
|
data_url.url,
|
|
headers={"If-Modified-Since": if_modified_since},
|
|
timeout=5,
|
|
)
|
|
except Exception as e:
|
|
logger.error(e)
|
|
continue
|
|
|
|
if data_response.status_code == 200:
|
|
break
|
|
|
|
repeat += 1
|
|
|
|
if repeat > max_retries:
|
|
logger.info("Too many retries")
|
|
break
|
|
return data_response.json()
|