kopia lustrzana https://github.com/bellingcat/auto-archiver
rodzic
128d4136e3
commit
a786d4bb0e
|
@ -165,3 +165,16 @@ class Metadata:
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return self.__repr__()
|
return self.__repr__()
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def choose_most_complete(results: List[Metadata]) -> Metadata:
|
||||||
|
# returns the most complete result from a list of results
|
||||||
|
# prioritizes results with more media, then more metadata
|
||||||
|
if len(results) == 0: return None
|
||||||
|
if len(results) == 1: return results[0]
|
||||||
|
most_complete = results[0]
|
||||||
|
for r in results[1:]:
|
||||||
|
if len(r.media) > len(most_complete.media): most_complete = r
|
||||||
|
elif len(r.media) == len(most_complete.media) and len(r.metadata) > len(most_complete.metadata): most_complete = r
|
||||||
|
return most_complete
|
|
@ -35,15 +35,15 @@ class AAApiDb(Database):
|
||||||
""" query the database for the existence of this item"""
|
""" query the database for the existence of this item"""
|
||||||
if not self.allow_rearchive: return
|
if not self.allow_rearchive: return
|
||||||
|
|
||||||
params = {"url": item.get_url(), "limit": 1}
|
params = {"url": item.get_url(), "limit": 15}
|
||||||
headers = {"Authorization": f"Bearer {self.api_token}", "accept": "application/json"}
|
headers = {"Authorization": f"Bearer {self.api_token}", "accept": "application/json"}
|
||||||
response = requests.get(os.path.join(self.api_endpoint, "tasks/search-url"), params=params, headers=headers)
|
response = requests.get(os.path.join(self.api_endpoint, "tasks/search-url"), params=params, headers=headers)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
if len(response.json()):
|
if len(response.json()):
|
||||||
logger.success(f"API returned a previously archived instance: {response.json()}")
|
logger.success(f"API returned {len(response.json())} previously archived instance(s)")
|
||||||
# TODO: can we do better than just returning the most recent result?
|
fetched_metadata = [Metadata.from_dict(r["result"]) for r in response.json()]
|
||||||
return Metadata.from_dict(response.json()[0]["result"])
|
return Metadata.choose_most_complete(fetched_metadata)
|
||||||
else:
|
else:
|
||||||
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
|
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -3,7 +3,7 @@ _MAJOR = "0"
|
||||||
_MINOR = "7"
|
_MINOR = "7"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "4"
|
_PATCH = "5"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|
Ładowanie…
Reference in New Issue