From 4d1d242c813eda2ba1886d5e0ce3ec28d535c137 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 26 Jul 2023 23:51:15 +0300 Subject: [PATCH] Add loggers and fixes. --- crawlers/deploy/deploy.bash | 14 ++ crawlers/deploy/leaderboards-worker.service | 11 + crawlers/deploy/leaderboards-worker.timer | 9 + .../leaderboard_generator/__init__.py | 0 .../mooncrawl/leaderboard_generator/cli.py | 199 ------------------ .../mooncrawl/leaderboard_generator/utils.py | 75 ------- crawlers/mooncrawl/mooncrawl/version.py | 2 +- crawlers/mooncrawl/setup.py | 2 +- 8 files changed, 36 insertions(+), 276 deletions(-) create mode 100644 crawlers/deploy/leaderboards-worker.service create mode 100644 crawlers/deploy/leaderboards-worker.timer delete mode 100644 crawlers/mooncrawl/mooncrawl/leaderboard_generator/__init__.py delete mode 100644 crawlers/mooncrawl/mooncrawl/leaderboard_generator/cli.py delete mode 100644 crawlers/mooncrawl/mooncrawl/leaderboard_generator/utils.py diff --git a/crawlers/deploy/deploy.bash b/crawlers/deploy/deploy.bash index 6df24df1..c0f3545f 100755 --- a/crawlers/deploy/deploy.bash +++ b/crawlers/deploy/deploy.bash @@ -26,6 +26,9 @@ SCRIPT_DIR="$(realpath $(dirname $0))" # Service files MOONCRAWL_SERVICE_FILE="mooncrawl.service" +LEADERBOARDS_WORKER_SERVICE_FILE="leaderboards-worker.service" +LEADERBOARDS_WORKER_TIMER_FILE="leaderboards-worker.timer" + # Ethereum service files ETHEREUM_SYNCHRONIZE_SERVICE_FILE="ethereum-synchronize.service" @@ -553,3 +556,14 @@ cp "${SCRIPT_DIR}/${ZKSYNC_ERA_TESTNET_HISTORICAL_CRAWL_EVENTS_SERVICE_FILE}" "/ cp "${SCRIPT_DIR}/${ZKSYNC_ERA_TESTNET_HISTORICAL_CRAWL_EVENTS_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${ZKSYNC_ERA_TESTNET_HISTORICAL_CRAWL_EVENTS_TIMER_FILE}" XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${ZKSYNC_ERA_TESTNET_HISTORICAL_CRAWL_EVENTS_TIMER_FILE}" + + + +echo +echo +echo -e "${PREFIX_INFO} Replacing existing Leaderboards worker service and timer with: ${LEADERBOARDS_WORKER_SERVICE_FILE}, ${LEADERBOARDS_WORKER_TIMER_FILE}" +chmod 644 "${SCRIPT_DIR}/${LEADERBOARDS_WORKER_SERVICE_FILE}" "${SCRIPT_DIR}/${LEADERBOARDS_WORKER_TIMER_FILE}" +cp "${SCRIPT_DIR}/${LEADERBOARDS_WORKER_SERVICE_FILE}" "/home/ubuntu/.config/systemd/user/${LEADERBOARDS_WORKER_SERVICE_FILE}" +cp "${SCRIPT_DIR}/${LEADERBOARDS_WORKER_TIMER_FILE}" "/home/ubuntu/.config/systemd/user/${LEADERBOARDS_WORKER_TIMER_FILE}" +XDG_RUNTIME_DIR="/run/user/1000" systemctl --user daemon-reload +XDG_RUNTIME_DIR="/run/user/1000" systemctl --user restart --no-block "${LEADERBOARDS_WORKER_TIMER_FILE}" \ No newline at end of file diff --git a/crawlers/deploy/leaderboards-worker.service b/crawlers/deploy/leaderboards-worker.service new file mode 100644 index 00000000..0f70e80e --- /dev/null +++ b/crawlers/deploy/leaderboards-worker.service @@ -0,0 +1,11 @@ +[Unit] +Description=Runs leaderboards generator worker +After=network.target + +[Service] +Type=oneshot +WorkingDirectory=/home/ubuntu/moonstream/crawlers/mooncrawl +EnvironmentFile=/home/ubuntu/moonstream-secrets/app.env +ExecStart=/home/ubuntu/moonstream-env/bin/python -m mooncrawl.leaderboards_generator.cli leaderboards-generate --query-api-access-token "${MOONSTREAM_PUBLIC_QUERIES_DATA_ACCESS_TOKEN}" +CPUWeight=60 +SyslogIdentifier=leaderboards-generator \ No newline at end of file diff --git a/crawlers/deploy/leaderboards-worker.timer b/crawlers/deploy/leaderboards-worker.timer new file mode 100644 index 00000000..108f5175 --- /dev/null +++ b/crawlers/deploy/leaderboards-worker.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Runs leaderboard update script every 10 minutes + +[Timer] +OnBootSec=60s +OnUnitActiveSec=10m + +[Install] +WantedBy=timers.target diff --git a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/__init__.py b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/cli.py b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/cli.py deleted file mode 100644 index 9aaad6d2..00000000 --- a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/cli.py +++ /dev/null @@ -1,199 +0,0 @@ -import argparse -import json -import logging -import os -from typing import Any, Dict -import uuid - -import requests # type: ignore - - -from .utils import get_results_for_moonstream_query -from ..settings import ( - MOONSTREAM_ADMIN_ACCESS_TOKEN, - MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID, - BUGOUT_REQUEST_TIMEOUT_SECONDS, -) - -from ..settings import bugout_client as bc - - -logging.basicConfig() -logger = logging.getLogger(__name__) - - -def handle_leaderboards(args: argparse.Namespace) -> None: - """ - Run the leaderboard generator. - - Get query from journal and push results to leaderboard API. - """ - - ### get leaderboard journal - - query = "#leaderboard" - - if args.leaderboard_id: - query += f" #cleaderboard_id:{args.leaderboard_id}" - - leaderboards = bc.search( - token=MOONSTREAM_ADMIN_ACCESS_TOKEN, - journal_id=MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID, - query=query, - limit=1, - timeout=BUGOUT_REQUEST_TIMEOUT_SECONDS, - ) - - if len(leaderboards.results) == 0: - raise ValueError("No leaderboard found") - - for leaderboard in leaderboards.results: - if leaderboard.content is None: - continue - - try: - leaderboard_data = json.loads(leaderboard.content) - except json.JSONDecodeError: - logger.error( - f"Could not parse leaderboard content: {[tag for tag in leaderboard.tags if tag.startswith('leaderboard_id')]}" - ) - continue - - ### get results from query API - - leaderboard_id = leaderboard_data["leaderboard_id"] - - query_name = leaderboard_data["query_name"] - - if args.params: - params = json.loads(args.params) - else: - params = leaderboard_data["params"] - - ### execute query - - query_results = get_results_for_moonstream_query( - args.query_api_access_token, - query_name, - params, - args.query_api, - args.max_retries, - args.interval, - ) - - ### push results to leaderboard API - - if query_results is None: - logger.error(f"Could not get results for query {query_name}") - continue - - leaderboard_push_api_url = ( - f"{args.engine_api}/leaderboard/{leaderboard_id}/scores" - ) - - leaderboard_api_headers = { - "Authorization": f"Bearer {args.query_api_access_token}", - "Content-Type": "application/json", - } - - leaderboard_api_response = requests.put( - leaderboard_push_api_url, - json=query_results["data"], - headers=leaderboard_api_headers, - timeout=10, - ) - - try: - leaderboard_api_response.raise_for_status() - except requests.exceptions.HTTPError as http_error: - logger.error( - f"Could not push results to leaderboard API: {http_error.response.text} with status code {http_error.response.status_code}" - ) - continue - - ### get leaderboard from leaderboard API - - leaderboard_api_info_url = ( - f"{args.engine_api}/leaderboard/info?leaderboard_id={leaderboard_id}" - ) - - leaderboard_api_response = requests.get( - leaderboard_api_info_url, headers=leaderboard_api_headers, timeout=10 - ) - - try: - leaderboard_api_response.raise_for_status() - except requests.exceptions.HTTPError as http_error: - logger.error( - f"Could not get leaderboard info from leaderboard API: {http_error.response.text} with status code {http_error.response.status_code}" - ) - continue - - info = leaderboard_api_response.json() - - logger.info( - f"Successfully pushed results to leaderboard {info['id']}: {info['title']}" - ) - - -def main(): - """ - Generates an argument parser for the "autocorns judge" command. - """ - - parser = argparse.ArgumentParser(description="The Judge: Generate leaderboards") - parser.set_defaults(func=lambda _: parser.print_help()) - subparsers = parser.add_subparsers() - - shadowcorns_throwing_shade_parser = subparsers.add_parser( - "leaderboards-generate", description="Generate Leaderboard" - ) - shadowcorns_throwing_shade_parser.add_argument( - "--query-api", - default="https://api.moonstream.to", - help="Moonstream API URL. Access token expected to be set as MOONSTREAM_ACCESS_TOKEN environment variable.", - ) - shadowcorns_throwing_shade_parser.add_argument( - "--engine-api", - default="https://engineapi.moonstream.to", - help="Moonstream Engine API URL. Access token expected to be set as MOONSTREAM_ACCESS_TOKEN environment variable.", - ) - shadowcorns_throwing_shade_parser.add_argument( - "--leaderboard-id", - type=uuid.UUID, - required=False, - help="Leaderboard ID on Engine API", - ) - shadowcorns_throwing_shade_parser.add_argument( - "--max-retries", - type=int, - default=100, - help="Number of times to retry requests for Moonstream Query results", - ) - shadowcorns_throwing_shade_parser.add_argument( - "--interval", - type=float, - default=30.0, - help="Number of seconds to wait between attempts to get results from Moonstream Query API", - ) - shadowcorns_throwing_shade_parser.add_argument( - "--params", - type=json.loads, - required=False, - help="Parameters to pass to Moonstream Query API", - ) - shadowcorns_throwing_shade_parser.add_argument( - "--query-api-access-token", - type=str, - required=True, - help="Moonstream Access Token to use for Moonstream Query API requests", - ) - - shadowcorns_throwing_shade_parser.set_defaults(func=handle_leaderboards) - - args = parser.parse_args() - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/utils.py b/crawlers/mooncrawl/mooncrawl/leaderboard_generator/utils.py deleted file mode 100644 index 8ec3013b..00000000 --- a/crawlers/mooncrawl/mooncrawl/leaderboard_generator/utils.py +++ /dev/null @@ -1,75 +0,0 @@ -import datetime -import json -import logging -import os -import time -from typing import Any, Dict, Optional - - -import requests # type: ignore - - -logging.basicConfig() -logger = logging.getLogger(__name__) - - -def get_results_for_moonstream_query( - moonstream_access_token: str, - query_name: str, - params: Dict[str, Any], - api_url: str = "https://api.moonstream.to", - max_retries: int = 100, - interval: float = 30.0, -) -> Optional[Dict[str, Any]]: - result: Optional[Dict[str, Any]] = None - - api_url = api_url.rstrip("/") - request_url = f"{api_url}/queries/{query_name}/update_data" - headers = { - "Authorization": f"Bearer {moonstream_access_token}", - "Content-Type": "application/json", - } - # Assume our clock is not drifting too much from AWS clocks. - if_modified_since_datetime = datetime.datetime.utcnow() - if_modified_since = if_modified_since_datetime.strftime("%a, %d %b %Y %H:%M:%S GMT") - - request_body = {"params": params} - - success = False - attempts = 0 - - while not success and attempts < max_retries: - attempts += 1 - response = requests.post( - request_url, json=request_body, headers=headers, timeout=10 - ) - response.raise_for_status() - response_body = response.json() - data_url = response_body["url"] - - keep_going = True - num_retries = 0 - - logging.debug(f"If-Modified-Since: {if_modified_since}") - while keep_going: - time.sleep(interval) - num_retries += 1 - try: - data_response = requests.get( - data_url, - headers={"If-Modified-Since": if_modified_since}, - timeout=10, - ) - except: - logger.error(f"Failed to get data from {data_url}") - continue - logger.debug(f"Status code: {data_response.status_code}") - logger.debug(f"Last-Modified: {data_response.headers['Last-Modified']}") - if data_response.status_code == 200: - result = data_response.json() - keep_going = False - success = True - if keep_going and max_retries > 0: - keep_going = num_retries <= max_retries - - return result diff --git a/crawlers/mooncrawl/mooncrawl/version.py b/crawlers/mooncrawl/mooncrawl/version.py index f04f8d7c..2d9a5a21 100644 --- a/crawlers/mooncrawl/mooncrawl/version.py +++ b/crawlers/mooncrawl/mooncrawl/version.py @@ -2,4 +2,4 @@ Moonstream crawlers version. """ -MOONCRAWL_VERSION = "0.3.2" +MOONCRAWL_VERSION = "0.3.3" diff --git a/crawlers/mooncrawl/setup.py b/crawlers/mooncrawl/setup.py index b7e42dfc..95751fe2 100644 --- a/crawlers/mooncrawl/setup.py +++ b/crawlers/mooncrawl/setup.py @@ -67,7 +67,7 @@ setup( "state-crawler=mooncrawl.state_crawler.cli:main", "metadata-crawler=mooncrawl.metadata_crawler.cli:main", "custom-crawler=mooncrawl.reports_crawler.cli:main", - "leaderboard-generator=mooncrawl.leaderboard_generator.cli:main", + "leaderboards-generator=mooncrawl.leaderboards_generator.cli:main", ] }, )