moonstream/crawlers/mooncrawl/mooncrawl/actions.py

173 wiersze
4.3 KiB
Python
Czysty Zwykły widok Historia

2022-11-30 15:31:19 +00:00
import hashlib
import json
2022-11-24 12:42:52 +00:00
import logging
2023-07-01 11:28:35 +00:00
import time
import uuid
from collections import OrderedDict
from datetime import datetime
from typing import Any, Dict, Optional, Union
import boto3 # type: ignore
2023-07-01 11:28:35 +00:00
import requests # type: ignore
from bugout.data import BugoutResources
from bugout.exceptions import BugoutResponseException
from moonstream.client import ( # type: ignore
ENDPOINT_QUERIES,
Moonstream,
MoonstreamQueryResultUrl,
)
from .middleware import MoonstreamHTTPException
from .settings import bugout_client as bc
2022-11-24 12:42:52 +00:00
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class EntityCollectionNotFoundException(Exception):
"""
Raised when entity collection is not found
"""
2022-11-24 12:42:52 +00:00
def push_data_to_bucket(
data: Any, key: str, bucket: str, metadata: Dict[str, Any] = {}
) -> None:
s3 = boto3.client("s3")
s3.put_object(
Body=data,
Bucket=bucket,
Key=key,
ContentType="application/json",
Metadata=metadata,
)
logger.info(f"Data pushed to bucket: s3://{bucket}/{key}")
2022-11-24 13:40:26 +00:00
def generate_s3_access_links(
method_name: str,
bucket: str,
key: str,
http_method: str,
2022-11-28 15:18:27 +00:00
expiration: int = 300,
2022-11-24 13:40:26 +00:00
) -> str:
s3 = boto3.client("s3")
stats_presigned_url = s3.generate_presigned_url(
method_name,
Params={
"Bucket": bucket,
"Key": key,
},
ExpiresIn=expiration,
HttpMethod=http_method,
)
return stats_presigned_url
2022-11-30 15:31:19 +00:00
def query_parameter_hash(params: Dict[str, Any]) -> str:
"""
Generate a hash of the query parameters
"""
hash = hashlib.md5(
json.dumps(OrderedDict(params), sort_keys=True).encode("utf-8")
).hexdigest()
2022-11-30 15:31:19 +00:00
return hash
def get_entity_subscription_collection_id(
resource_type: str,
token: Union[uuid.UUID, str],
user_id: uuid.UUID,
2023-08-01 08:58:36 +00:00
) -> str:
"""
Get collection_id from brood resources. If collection not exist and create_if_not_exist is True
"""
params = {
"type": resource_type,
"user_id": str(user_id),
}
try:
resources: BugoutResources = bc.list_resources(token=token, params=params)
except BugoutResponseException as e:
raise MoonstreamHTTPException(status_code=e.status_code, detail=e.detail)
except Exception as e:
logger.error(
f"Error listing subscriptions for user ({user_id}) with token ({token}), error: {str(e)}"
)
raise MoonstreamHTTPException(status_code=500, internal_error=e)
if len(resources.resources) == 0:
raise EntityCollectionNotFoundException("Subscription collection not found.")
else:
resource = resources.resources[0]
return resource.resource_data["collection_id"]
2023-07-01 11:28:35 +00:00
def recive_S3_data_from_query(
client: Moonstream,
token: Union[str, uuid.UUID],
query_name: str,
params: Dict[str, Any] = {},
time_await: int = 2,
max_retries: int = 30,
custom_body: Optional[Dict[str, Any]] = None,
) -> Any:
"""
Await the query to be update data on S3 with if_modified_since and return new the data.
"""
keep_going = True
repeat = 0
if_modified_since_datetime = datetime.utcnow()
if_modified_since = if_modified_since_datetime.strftime("%a, %d %b %Y %H:%M:%S GMT")
time.sleep(2)
if custom_body:
headers = {
"Authorization": f"Bearer {token}",
}
json = custom_body
response = requests.post(
url=f"{client.api.endpoints[ENDPOINT_QUERIES]}/{query_name}/update_data",
headers=headers,
json=json,
timeout=5,
)
data_url = MoonstreamQueryResultUrl(url=response.json()["url"])
else:
data_url = client.exec_query(
token=token,
name=query_name,
params=params,
) # S3 presign_url
while keep_going:
time.sleep(time_await)
try:
data_response = requests.get(
data_url.url,
headers={"If-Modified-Since": if_modified_since},
timeout=5,
)
except Exception as e:
logger.error(e)
continue
if data_response.status_code == 200:
break
repeat += 1
if repeat > max_retries:
logger.info("Too many retries")
break
return data_response.json()