detecting errors at a higher level to avoid false "in progress" messages

pull/33/head
msramalho 2022-06-14 19:28:34 +02:00
rodzic 06e8781f0f
commit eca10023b0
1 zmienionych plików z 47 dodań i 42 usunięć

Wyświetl plik

@ -80,54 +80,59 @@ def process_sheet(c: Config):
if not is_retry: continue if not is_retry: continue
# All checks done - archival process starts here # All checks done - archival process starts here
gw.set_cell(row, 'status', 'Archive in progress') try:
url = expand_url(url) gw.set_cell(row, 'status', 'Archive in progress')
c.set_folder(gw.get_cell_or_default(row, 'folder', default_folder, when_empty_use_default=True)) url = expand_url(url)
c.set_folder(gw.get_cell_or_default(row, 'folder', default_folder, when_empty_use_default=True))
# make a new driver so each spreadsheet row is idempotent # make a new driver so each spreadsheet row is idempotent
c.recreate_webdriver() c.recreate_webdriver()
# order matters, first to succeed excludes remaining # order matters, first to succeed excludes remaining
active_archivers = [ active_archivers = [
TelethonArchiver(storage, c.webdriver, c.telegram_config), TelethonArchiver(storage, c.webdriver, c.telegram_config),
TiktokArchiver(storage, c.webdriver), TiktokArchiver(storage, c.webdriver),
YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie), YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie),
TelegramArchiver(storage, c.webdriver), TelegramArchiver(storage, c.webdriver),
TwitterArchiver(storage, c.webdriver), TwitterArchiver(storage, c.webdriver),
WaybackArchiver(storage, c.webdriver, c.wayback_config) WaybackArchiver(storage, c.webdriver, c.wayback_config)
] ]
for archiver in active_archivers: for archiver in active_archivers:
logger.debug(f'Trying {archiver} on {row=}') logger.debug(f'Trying {archiver} on {row=}')
try: try:
result = archiver.download(url, check_if_exists=True) result = archiver.download(url, check_if_exists=True)
except KeyboardInterrupt: except KeyboardInterrupt as e: raise e # so the higher level catch can catch it
# catches keyboard interruptions to do a clean exit except Exception as e:
logger.warning(f"caught interrupt for {archiver} on {row=}") result = False
gw.set_cell(row, 'status', '') logger.error(f'Got unexpected error in row {row} with {archiver.name} for {url=}: {e}\n{traceback.format_exc()}')
c.destroy_webdriver()
exit() if result:
except Exception as e: success = result.status in ['success', 'already archived']
result = False result.status = f"{archiver.name}: {result.status}"
logger.error(f'Got unexpected error in row {row} with {archiver.name} for {url=}: {e}\n{traceback.format_exc()}') if success:
logger.success(f'{archiver.name} succeeded on {row=}, {url=}')
break
# only 1 retry possible for now
if is_retry and Archiver.is_retry(result.status):
result.status = Archiver.remove_retry(result.status)
logger.warning(f'{archiver.name} did not succeed on {row=}, final status: {result.status}')
if result: if result:
success = result.status in ['success', 'already archived'] update_sheet(gw, row, result)
result.status = f"{archiver.name}: {result.status}" else:
if success: gw.set_cell(row, 'status', 'failed: no archiver')
logger.success(f'{archiver.name} succeeded on {row=}, {url=}') except KeyboardInterrupt:
break # catches keyboard interruptions to do a clean exit
# only 1 retry possible for now logger.warning(f"caught interrupt on {row=}, {url=}")
if is_retry and Archiver.is_retry(result.status): gw.set_cell(row, 'status', '')
result.status = Archiver.remove_retry(result.status) c.destroy_webdriver()
logger.warning(f'{archiver.name} did not succeed on {row=}, final status: {result.status}') exit()
except Exception as e:
if result: logger.error(f'Got unexpected error in row {row} for {url=}: {e}\n{traceback.format_exc()}')
update_sheet(gw, row, result) gw.set_cell(row, 'status', 'failed: unexpected error (see logs)')
else: logger.success(f'Finished worksheet {wks.title}')
gw.set_cell(row, 'status', 'failed: no archiver')
logger.success(f'Finshed worksheet {wks.title}')
@logger.catch @logger.catch