kopia lustrzana https://github.com/bellingcat/auto-archiver
adds better debug for wayback failures (#161)
rodzic
928518cda7
commit
ef471f41e1
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
import time, requests
|
import time, requests
|
||||||
|
|
||||||
|
@ -70,11 +71,16 @@ class WaybackArchiverEnricher(Enricher, Archiver):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# check job status
|
# check job status
|
||||||
job_id = r.json().get('job_id')
|
try:
|
||||||
if not job_id:
|
job_id = r.json().get('job_id')
|
||||||
logger.error(f"Wayback failed with {r.json()}")
|
if not job_id:
|
||||||
|
logger.error(f"Wayback failed with {r.json()}")
|
||||||
|
return False
|
||||||
|
except json.decoder.JSONDecodeError as e:
|
||||||
|
logger.error(f"Expected a JSON with job_id from Wayback and got {r.text}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
# waits at most timeout seconds until job is completed, otherwise only enriches the job_id information
|
# waits at most timeout seconds until job is completed, otherwise only enriches the job_id information
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
wayback_url = False
|
wayback_url = False
|
||||||
|
@ -92,6 +98,9 @@ class WaybackArchiverEnricher(Enricher, Archiver):
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
logger.warning(f"RequestException: fetching status for {url=} due to: {e}")
|
logger.warning(f"RequestException: fetching status for {url=} due to: {e}")
|
||||||
break
|
break
|
||||||
|
except json.decoder.JSONDecodeError as e:
|
||||||
|
logger.error(f"Expected a JSON from Wayback and got {r.text} for {url=}")
|
||||||
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"error fetching status for {url=} due to: {e}")
|
logger.warning(f"error fetching status for {url=} due to: {e}")
|
||||||
if not wayback_url:
|
if not wayback_url:
|
||||||
|
|
Ładowanie…
Reference in New Issue