From 8a2e624e097a002a9b0b1b92de50a9032ddcb546 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 13 Dec 2023 21:39:52 +0200 Subject: [PATCH] Add changes. --- .../mooncrawl/leaderboards_generator/utils.py | 15 +++++---------- crawlers/mooncrawl/mooncrawl/settings.py | 2 +- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py b/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py index 1e08f668..98135370 100644 --- a/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py +++ b/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py @@ -116,11 +116,6 @@ def get_data_from_url(url): raise Exception(f"Failed to get data: HTTP {response.status_code}") -def chunk_data(data, chunk_size=100000): - for i in range(0, len(data), chunk_size): - yield data[i : i + chunk_size] - - def send_data_to_endpoint(chunks, endpoint_url, headers, timeout=10): for index, chunk in enumerate(chunks): try: @@ -134,7 +129,7 @@ def send_data_to_endpoint(chunks, endpoint_url, headers, timeout=10): logger.error( f"Could not push results to leaderboard API: {http_error.response.text} with status code {http_error.response.status_code}" ) - continue + raise http_error def leaderboard_push_batch( @@ -160,7 +155,7 @@ def leaderboard_push_batch( } leaderboard_api_response = requests.post( - leaderboard_version_api_url, json=json_data, headers=headers, timeout=5 + leaderboard_version_api_url, json=json_data, headers=headers, timeout=10 ) try: @@ -177,7 +172,7 @@ def leaderboard_push_batch( leaderboard_version_push_api_url = f"{MOONSTREAM_ENGINE_URL}/leaderboard/{leaderboard_id}/versions/{leaderboard_version_id}/scores?normalize_addresses={leaderboard_config['normalize_addresses']}&overwrite=false" - chunks = chunk_data(data, chunk_size=batch_size) + chunks = [data[x : x + batch_size] for x in range(0, len(data), batch_size)] send_data_to_endpoint( chunks, leaderboard_version_push_api_url, headers, timeout=timeout @@ -196,7 +191,7 @@ def leaderboard_push_batch( leaderboard_version_publish_api_url, json=json_data, headers=headers, - timeout=5, + timeout=10, ) leaderboard_api_response.raise_for_status() @@ -214,7 +209,7 @@ def leaderboard_push_batch( leaderboard_api_response = requests.delete( leaderboard_version_delete_api_url, headers=headers, - timeout=5, + timeout=timeout, ) leaderboard_api_response.raise_for_status() diff --git a/crawlers/mooncrawl/mooncrawl/settings.py b/crawlers/mooncrawl/mooncrawl/settings.py index 2faafec2..ff843373 100644 --- a/crawlers/mooncrawl/mooncrawl/settings.py +++ b/crawlers/mooncrawl/mooncrawl/settings.py @@ -323,5 +323,5 @@ if MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID == "": ) -MOONSTREAM_LEADERBOARD_GENERATOR_BATCH_SIZE = 20000 +MOONSTREAM_LEADERBOARD_GENERATOR_BATCH_SIZE = 12000 MOONSTREAM_LEADERBOARD_GENERATOR_PUSH_TIMEOUT_SECONDS = 60