enables api_db cache queries if configured with new option (#113)

pull/115/head v0.7.2
Miguel Sozinho Ramalho 2023-12-12 19:20:26 +00:00 zatwierdzone przez GitHub
rodzic 3e56ef137d
commit 6f36e92e02
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
8 zmienionych plików z 40 dodań i 12 usunięć

Wyświetl plik

@ -105,7 +105,8 @@ class Metadata:
def get_timestamp(self, utc=True, iso=True) -> datetime.datetime:
ts = self.get("timestamp")
if not ts: return ts
if not ts: return
if type(ts) == float: ts = datetime.datetime.fromtimestamp(ts)
if utc: ts = ts.replace(tzinfo=datetime.timezone.utc)
if iso: return ts.isoformat()
return ts

Wyświetl plik

@ -77,7 +77,7 @@ class ArchivingOrchestrator:
if cached_result:
logger.debug("Found previously archived entry")
for d in self.databases:
d.done(cached_result)
d.done(cached_result, cached=True)
return cached_result
# 3 - call archivers until one succeeds

Wyświetl plik

@ -1,3 +1,4 @@
from typing import Union
import requests, os
from loguru import logger
@ -14,6 +15,7 @@ class AAApiDb(Database):
def __init__(self, config: dict) -> None:
# without this STEP.__init__ is not called
super().__init__(config)
self.allow_rearchive = bool(self.allow_rearchive)
self.assert_valid_string("api_endpoint")
self.assert_valid_string("api_secret")
@ -21,16 +23,37 @@ class AAApiDb(Database):
def configs() -> dict:
return {
"api_endpoint": {"default": None, "help": "API endpoint where calls are made to"},
"api_secret": {"default": None, "help": "API authentication secret"},
"api_secret": {"default": None, "help": "API Basic authentication secret [deprecating soon]"},
"api_token": {"default": None, "help": "API Bearer token, to be preferred over secret (Basic auth) going forward"},
"public": {"default": False, "help": "whether the URL should be publicly available via the API"},
"author_id": {"default": None, "help": "which email to assign as author"},
"group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
"allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
"tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
}
def fetch(self, item: Metadata) -> Union[Metadata, bool]:
""" query the database for the existence of this item"""
if not self.allow_rearchive: return
params = {"url": item.get_url(), "limit": 1}
headers = {"Authorization": f"Bearer {self.api_token}", "accept": "application/json"}
response = requests.get(os.path.join(self.api_endpoint, "tasks/search-url"), params=params, headers=headers)
def done(self, item: Metadata) -> None:
if response.status_code == 200:
logger.success(f"API returned a previously archived instance: {response.json()}")
# TODO: can we do better than just returning the first result?
return Metadata.from_dict(response.json()[0]["result"])
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
return False
def done(self, item: Metadata, cached: bool=False) -> None:
"""archival result ready - should be saved to DB"""
logger.info(f"saving archive of {item.get_url()} to the AA API.")
if cached:
logger.debug(f"skipping saving archive of {item.get_url()} to the AA API because it was cached")
return
logger.debug(f"saving archive of {item.get_url()} to the AA API.")
payload = {'result': item.to_json(), 'public': self.public, 'author_id': self.author_id, 'group_id': self.group_id, 'tags': list(self.tags)}
response = requests.post(os.path.join(self.api_endpoint, "submit-archive"), json=payload, auth=("abc", self.api_secret))
@ -39,3 +62,5 @@ class AAApiDb(Database):
logger.success(f"AA API: {response.json()}")
else:
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")

Wyświetl plik

@ -27,6 +27,6 @@ class ConsoleDb(Database):
def aborted(self, item: Metadata) -> None:
logger.warning(f"ABORTED {item}")
def done(self, item: Metadata) -> None:
def done(self, item: Metadata, cached: bool=False) -> None:
"""archival result ready - should be saved to DB"""
logger.success(f"DONE {item}")

Wyświetl plik

@ -24,7 +24,7 @@ class CSVDb(Database):
"csv_file": {"default": "db.csv", "help": "CSV file name"}
}
def done(self, item: Metadata) -> None:
def done(self, item: Metadata, cached: bool=False) -> None:
"""archival result ready - should be saved to DB"""
logger.success(f"DONE {item}")
is_empty = not os.path.isfile(self.csv_file) or os.path.getsize(self.csv_file) == 0

Wyświetl plik

@ -36,6 +36,6 @@ class Database(Step, ABC):
return False
@abstractmethod
def done(self, item: Metadata) -> None:
def done(self, item: Metadata, cached: bool=False) -> None:
"""archival result ready - should be saved to DB"""
pass

Wyświetl plik

@ -41,7 +41,7 @@ class GsheetsDb(Database):
"""check if the given item has been archived already"""
return False
def done(self, item: Metadata) -> None:
def done(self, item: Metadata, cached: bool=False) -> None:
"""archival result ready - should be saved to DB"""
logger.success(f"DONE {item.get_url()}")
gw, row = self._retrieve_gsheet(item)
@ -57,8 +57,10 @@ class GsheetsDb(Database):
cell_updates.append((row, col, final_value))
except Exception as e:
logger.error(f"Unable to batch {col}={final_value} due to {e}")
cell_updates.append((row, 'status', item.status))
status_message = item.status
if cached:
status_message = f"[cached] {status_message}"
cell_updates.append((row, 'status', status_message))
media: Media = item.get_final_media()
if hasattr(media, "urls"):

Wyświetl plik

@ -3,7 +3,7 @@ _MAJOR = "0"
_MINOR = "7"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "1"
_PATCH = "2"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""