diff --git a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/__init__.py b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/cli.py b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/cli.py new file mode 100644 index 00000000..7f2e57ec --- /dev/null +++ b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/cli.py @@ -0,0 +1,196 @@ +import argparse +import json +import logging +import os +from typing import Any, Dict +import uuid + +import requests # type: ignore + + +from .utils import get_results_for_moonstream_query +from ..settings import ( + MOONSTREAM_ADMIN_ACCESS_TOKEN, + MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID, + BUGOUT_REQUEST_TIMEOUT_SECONDS, +) + +from ..settings import bugout_client as bc + + +logging.basicConfig() +logger = logging.getLogger(__name__) + + +def handle_leaderboards(args: argparse.Namespace) -> None: + """ + Run the leaderboard generator. + + Get query from journal and push results to leaderboard API. + """ + + ### get leaderboard journal + + query = "#leaderboard" + + if args.leaderboard_id: + query += f" leaderboard_id:{args.leaderboard_id}" + + leaderboards = bc.search( + token=MOONSTREAM_ADMIN_ACCESS_TOKEN, + journal_id=MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID, + query=query, + limit=1, + timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS, + ) + + if len(leaderboards.results) == 0: + raise ValueError("No leaderboard found") + + for leaderboard in leaderboards.results: + if leaderboard.content is None: + continue + + try: + leaderboard_data = json.loads(leaderboard.content) + except json.JSONDecodeError: + logger.error( + f"Could not parse leaderboard content: {[tag for tag in leaderboard.tags if tag.startswith('leaderboard_id')]}" + ) + continue + + ### get results from query API + + leaderboard_id = leaderboard_data["leaderboard_id"] + + query_name = leaderboard_data["query_name"] + + if args.params: + params = json.loads(args.params) + else: + params = leaderboard_data["params"] + + ### execute query + + query_results = get_results_for_moonstream_query( + args.query_api_access_token, + args.query_name, + params, + args.query_api, + args.max_retries, + args.interval, + ) + + ### push results to leaderboard API + + if query_results is None: + logger.error(f"Could not get results for query {query_name}") + continue + + leaderboard_push_api_url = f"{args.engine_api}/leaderboards/{leaderboard_id}" + + leaderboard_api_headers = { + "Authorization": f"Bearer {args.query_api_access_token}", + "Content-Type": "application/json", + } + + leaderboard_api_response = requests.put( + leaderboard_push_api_url, + json=query_results, + headers=leaderboard_api_headers, + timeout=10, + ) + + try: + leaderboard_api_response.raise_for_status() + except requests.exceptions.HTTPError as http_error: + logger.error( + f"Could not push results to leaderboard API: {http_error.response.text} with status code {http_error.response.status_code}" + ) + continue + + ### get leaderboard from leaderboard API + + leaderboard_api_info_url = ( + f"{args.engine_api}/leaderboards/info?leaderboard_id={leaderboard_id}" + ) + + leaderboard_api_response = requests.get( + leaderboard_api_info_url, headers=leaderboard_api_headers, timeout=10 + ) + + try: + leaderboard_api_response.raise_for_status() + except requests.exceptions.HTTPError as http_error: + logger.error( + f"Could not get leaderboard info from leaderboard API: {http_error.response.text} with status code {http_error.response.status_code}" + ) + continue + + info = leaderboard_api_response.json() + + logger.info( + f"Successfully pushed results to leaderboard {info['id']}: {info['name']}" + ) + + +def generate_cli() -> argparse.ArgumentParser: + """ + Generates an argument parser for the "autocorns judge" command. + """ + parser = argparse.ArgumentParser(description="The Judge: Generate leaderboards") + parser.set_defaults(func=lambda _: parser.print_help()) + subparsers = parser.add_subparsers() + + shadowcorns_throwing_shade_parser = subparsers.add_parser( + "throwing-shade", description="Shadowcorns: Throwing Shade Leaderboard" + ) + shadowcorns_throwing_shade_parser.add_argument( + "--query-api", + default="https://api.moonstream.to", + help="Moonstream API URL. Access token expected to be set as MOONSTREAM_ACCESS_TOKEN environment variable.", + ) + shadowcorns_throwing_shade_parser.add_argument( + "--engine-api", + default="https://engineapi.moonstream.to", + help="Moonstream Engine API URL. Access token expected to be set as MOONSTREAM_ACCESS_TOKEN environment variable.", + ) + shadowcorns_throwing_shade_parser.add_argument( + "--leaderboard-id", + type=uuid.UUID, + required=True, + help="Leaderboard ID on Engine API", + ) + shadowcorns_throwing_shade_parser.add_argument( + "--max-retries", + type=int, + default=100, + help="Number of times to retry requests for Moonstream Query results", + ) + shadowcorns_throwing_shade_parser.add_argument( + "--interval", + type=float, + default=30.0, + help="Number of seconds to wait between attempts to get results from Moonstream Query API", + ) + shadowcorns_throwing_shade_parser.add_argument( + "--params", + type=json.loads, + help="Parameters to pass to Moonstream Query API", + ) + shadowcorns_throwing_shade_parser.add_argument( + "--query-api-access-token", + type=str, + required=True, + help="Moonstream Access Token to use for Moonstream Query API requests", + ) + + shadowcorns_throwing_shade_parser.set_defaults(func=handle_leaderboards) + + return parser + + +if __name__ == "__main__": + parser = generate_cli() + args = parser.parse_args() + args.func(args) diff --git a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/utils.py b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/utils.py new file mode 100644 index 00000000..8ec3013b --- /dev/null +++ b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/utils.py @@ -0,0 +1,75 @@ +import datetime +import json +import logging +import os +import time +from typing import Any, Dict, Optional + + +import requests # type: ignore + + +logging.basicConfig() +logger = logging.getLogger(__name__) + + +def get_results_for_moonstream_query( + moonstream_access_token: str, + query_name: str, + params: Dict[str, Any], + api_url: str = "https://api.moonstream.to", + max_retries: int = 100, + interval: float = 30.0, +) -> Optional[Dict[str, Any]]: + result: Optional[Dict[str, Any]] = None + + api_url = api_url.rstrip("/") + request_url = f"{api_url}/queries/{query_name}/update_data" + headers = { + "Authorization": f"Bearer {moonstream_access_token}", + "Content-Type": "application/json", + } + # Assume our clock is not drifting too much from AWS clocks. + if_modified_since_datetime = datetime.datetime.utcnow() + if_modified_since = if_modified_since_datetime.strftime("%a, %d %b %Y %H:%M:%S GMT") + + request_body = {"params": params} + + success = False + attempts = 0 + + while not success and attempts < max_retries: + attempts += 1 + response = requests.post( + request_url, json=request_body, headers=headers, timeout=10 + ) + response.raise_for_status() + response_body = response.json() + data_url = response_body["url"] + + keep_going = True + num_retries = 0 + + logging.debug(f"If-Modified-Since: {if_modified_since}") + while keep_going: + time.sleep(interval) + num_retries += 1 + try: + data_response = requests.get( + data_url, + headers={"If-Modified-Since": if_modified_since}, + timeout=10, + ) + except: + logger.error(f"Failed to get data from {data_url}") + continue + logger.debug(f"Status code: {data_response.status_code}") + logger.debug(f"Last-Modified: {data_response.headers['Last-Modified']}") + if data_response.status_code == 200: + result = data_response.json() + keep_going = False + success = True + if keep_going and max_retries > 0: + keep_going = num_retries <= max_retries + + return result diff --git a/crawlers/mooncrawl/mooncrawl/settings.py b/crawlers/mooncrawl/mooncrawl/settings.py index b0d36b89..f6d43f39 100644 --- a/crawlers/mooncrawl/mooncrawl/settings.py +++ b/crawlers/mooncrawl/mooncrawl/settings.py @@ -310,3 +310,14 @@ HISTORICAL_CRAWLER_STATUS_TAG_PREFIXES = { "historical_crawl_status": "historical_crawl_status", "progress_status": "progress", } + + +# Leaderboard generator + +MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID = os.environ.get( + "MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID", "" +) +if MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID == "": + raise ValueError( + "MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID environment variable must be set" + )