From b19bd9a81a4d67920cc67236edb9755e6cdaed25 Mon Sep 17 00:00:00 2001 From: Dave Mateer Date: Mon, 25 Apr 2022 16:14:59 +0100 Subject: [PATCH] auto --- .vscode/launch.json | 9 +++++++++ archivers/base_archiver.py | 5 ----- auto_archive.py | 20 ++++++++++++++------ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index fe359b1..a1bd561 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -19,6 +19,15 @@ "justMyCode": true, "args": ["--sheet","Test Hashing CIR"] }, + { + "name": "Archive - Dearbhla", + "type": "python", + "request": "launch", + "program": "auto_archive.py", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--sheet","Archive - Dearbhla"] + }, { "name": "Kayleigh - test - DM", "type": "python", diff --git a/archivers/base_archiver.py b/archivers/base_archiver.py index af1fca6..d30f832 100644 --- a/archivers/base_archiver.py +++ b/archivers/base_archiver.py @@ -127,12 +127,10 @@ class Archiver(ABC): return hash.hexdigest() def get_screenshot(self, url): - logger.debug(f'In get_screenshot for {url}') key = self.get_key(urlparse(url).path.replace( "/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png") filename = 'tmp/' + key - # DM - Accept cookies popup dismiss for ytdlp video if 'facebook.com' in url: try: @@ -141,11 +139,8 @@ class Archiver(ABC): except: logger.error('Failed on fb accept cookies') - logger.debug(f'get_screenshot: Requesting url') - self.driver.get(url) - logger.debug(f'get_screenshot: Back from request') time.sleep(6) self.driver.save_screenshot(filename) diff --git a/auto_archive.py b/auto_archive.py index 87a964a..31fedf3 100644 --- a/auto_archive.py +++ b/auto_archive.py @@ -15,8 +15,10 @@ from utils import GWorksheet, mkdir_if_not_exists import sys -load_dotenv() +logger.add("trace.log", level="TRACE") +logger.add("warnings.log", level="WARNING") +load_dotenv() def update_sheet(gw, row, result: archivers.ArchiveResult): cell_updates = [] @@ -67,6 +69,9 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES): gc = gspread.service_account(filename='service_account.json') sh = gc.open(sheet) + # DM test raise error for decorator to catch + # raise ValueError('A very specific bad thing happened.') + s3_config = S3Config( bucket=os.getenv('DO_BUCKET'), region=os.getenv('DO_SPACES_REGION'), @@ -88,7 +93,9 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES): # loop through worksheets to check for ii, wks in enumerate(sh.worksheets()): - logger.info(f'Opening worksheet {ii}: "{wks.title}" header={header}') + # logger.info(f'Opening worksheet {ii}: "{wks.title}" header={header}') + # DM take " out of log message and clarify ii + logger.info(f'Opening worksheet ii={ii}: {wks.title} header={header}') gw = GWorksheet(wks, header_row=header, columns=columns) if not gw.col_exists('url'): @@ -134,6 +141,7 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES): result = archiver.download(url, check_if_exists=True) except Exception as e: result = False + # DM loguru writes traceback to files so this traceback may be superfluous logger.error(f'Got unexpected error in row {row} with archiver {archiver} for url {url}: {e}\n{traceback.format_exc()}') if result: @@ -166,10 +174,11 @@ def process_sheet(sheet, header=1, columns=GWorksheet.COLUMN_NAMES): logger.success(f'Finshed worksheet {wks.title}') driver.quit() - +@logger.catch def main(): - - print(sys.argv[1:]) + # DM don't want to use print anymore + # print(sys.argv[1:]) + logger.info(f'Passed args:{sys.argv}') parser = argparse.ArgumentParser( description='Automatically archive social media videos from a Google Sheets document') @@ -192,4 +201,3 @@ def main(): if __name__ == '__main__': main() - logger.success("finished")