kopia lustrzana https://github.com/bellingcat/auto-archiver
detecting errors at a higher level to avoid false "in progress" messages
rodzic
06e8781f0f
commit
eca10023b0
|
@ -80,54 +80,59 @@ def process_sheet(c: Config):
|
||||||
if not is_retry: continue
|
if not is_retry: continue
|
||||||
|
|
||||||
# All checks done - archival process starts here
|
# All checks done - archival process starts here
|
||||||
gw.set_cell(row, 'status', 'Archive in progress')
|
try:
|
||||||
url = expand_url(url)
|
gw.set_cell(row, 'status', 'Archive in progress')
|
||||||
c.set_folder(gw.get_cell_or_default(row, 'folder', default_folder, when_empty_use_default=True))
|
url = expand_url(url)
|
||||||
|
c.set_folder(gw.get_cell_or_default(row, 'folder', default_folder, when_empty_use_default=True))
|
||||||
|
|
||||||
# make a new driver so each spreadsheet row is idempotent
|
# make a new driver so each spreadsheet row is idempotent
|
||||||
c.recreate_webdriver()
|
c.recreate_webdriver()
|
||||||
|
|
||||||
# order matters, first to succeed excludes remaining
|
# order matters, first to succeed excludes remaining
|
||||||
active_archivers = [
|
active_archivers = [
|
||||||
TelethonArchiver(storage, c.webdriver, c.telegram_config),
|
TelethonArchiver(storage, c.webdriver, c.telegram_config),
|
||||||
TiktokArchiver(storage, c.webdriver),
|
TiktokArchiver(storage, c.webdriver),
|
||||||
YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie),
|
YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie),
|
||||||
TelegramArchiver(storage, c.webdriver),
|
TelegramArchiver(storage, c.webdriver),
|
||||||
TwitterArchiver(storage, c.webdriver),
|
TwitterArchiver(storage, c.webdriver),
|
||||||
WaybackArchiver(storage, c.webdriver, c.wayback_config)
|
WaybackArchiver(storage, c.webdriver, c.wayback_config)
|
||||||
]
|
]
|
||||||
|
|
||||||
for archiver in active_archivers:
|
for archiver in active_archivers:
|
||||||
logger.debug(f'Trying {archiver} on {row=}')
|
logger.debug(f'Trying {archiver} on {row=}')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = archiver.download(url, check_if_exists=True)
|
result = archiver.download(url, check_if_exists=True)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt as e: raise e # so the higher level catch can catch it
|
||||||
# catches keyboard interruptions to do a clean exit
|
except Exception as e:
|
||||||
logger.warning(f"caught interrupt for {archiver} on {row=}")
|
result = False
|
||||||
gw.set_cell(row, 'status', '')
|
logger.error(f'Got unexpected error in row {row} with {archiver.name} for {url=}: {e}\n{traceback.format_exc()}')
|
||||||
c.destroy_webdriver()
|
|
||||||
exit()
|
if result:
|
||||||
except Exception as e:
|
success = result.status in ['success', 'already archived']
|
||||||
result = False
|
result.status = f"{archiver.name}: {result.status}"
|
||||||
logger.error(f'Got unexpected error in row {row} with {archiver.name} for {url=}: {e}\n{traceback.format_exc()}')
|
if success:
|
||||||
|
logger.success(f'{archiver.name} succeeded on {row=}, {url=}')
|
||||||
|
break
|
||||||
|
# only 1 retry possible for now
|
||||||
|
if is_retry and Archiver.is_retry(result.status):
|
||||||
|
result.status = Archiver.remove_retry(result.status)
|
||||||
|
logger.warning(f'{archiver.name} did not succeed on {row=}, final status: {result.status}')
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
success = result.status in ['success', 'already archived']
|
update_sheet(gw, row, result)
|
||||||
result.status = f"{archiver.name}: {result.status}"
|
else:
|
||||||
if success:
|
gw.set_cell(row, 'status', 'failed: no archiver')
|
||||||
logger.success(f'{archiver.name} succeeded on {row=}, {url=}')
|
except KeyboardInterrupt:
|
||||||
break
|
# catches keyboard interruptions to do a clean exit
|
||||||
# only 1 retry possible for now
|
logger.warning(f"caught interrupt on {row=}, {url=}")
|
||||||
if is_retry and Archiver.is_retry(result.status):
|
gw.set_cell(row, 'status', '')
|
||||||
result.status = Archiver.remove_retry(result.status)
|
c.destroy_webdriver()
|
||||||
logger.warning(f'{archiver.name} did not succeed on {row=}, final status: {result.status}')
|
exit()
|
||||||
|
except Exception as e:
|
||||||
if result:
|
logger.error(f'Got unexpected error in row {row} for {url=}: {e}\n{traceback.format_exc()}')
|
||||||
update_sheet(gw, row, result)
|
gw.set_cell(row, 'status', 'failed: unexpected error (see logs)')
|
||||||
else:
|
logger.success(f'Finished worksheet {wks.title}')
|
||||||
gw.set_cell(row, 'status', 'failed: no archiver')
|
|
||||||
logger.success(f'Finshed worksheet {wks.title}')
|
|
||||||
|
|
||||||
|
|
||||||
@logger.catch
|
@logger.catch
|
||||||
|
|
Ładowanie…
Reference in New Issue