check if exists to configuration, save_logs to command line

pull/33/head
msramalho 2022-06-14 21:37:02 +02:00
rodzic a7a555ea85
commit 6872d8e103
3 zmienionych plików z 10 dodań i 2 usunięć

Wyświetl plik

@ -102,7 +102,7 @@ def process_sheet(c: Config):
logger.debug(f'Trying {archiver} on {row=}')
try:
result = archiver.download(url, check_if_exists=True)
result = archiver.download(url, check_if_exists=c.check_if_exists)
except KeyboardInterrupt as e: raise e # so the higher level catch can catch it
except Exception as e:
result = False

Wyświetl plik

@ -50,8 +50,10 @@ class Config:
self.header = int(getattr_or(self.args, "header", execution.get("header", 1)))
Storage.TMP_FOLDER = execution.get("tmp_folder", Storage.TMP_FOLDER)
self.storage = getattr_or(self.args, "storage", execution.get("storage", "s3"))
if getattr_or(self.args, "save_logs", False):
self.save_logs = getattr(self.args, "save_logs") or execution.get("save_logs", False)
if self.save_logs:
self.set_log_files()
self.check_if_exists = getattr(self.args, "check_if_exists") or execution.get("check_if_exists", False)
# Column names come from config and can be overwritten by CMD
# in the end all are considered as lower case
@ -150,6 +152,8 @@ class Config:
parser.add_argument('--storage', action='store', dest='storage', help='which storage to use [execution.storage in config.yaml]', choices=Config.AVAILABLE_STORAGES)
parser.add_argument('--sheet', action='store', dest='sheet', help='the name of the google sheets document [execution.sheet in config.yaml]')
parser.add_argument('--header', action='store', dest='header', help='1-based index for the header row [execution.header in config.yaml]')
parser.add_argument('--check-if-exists', action='store_true', dest='check_if_exists', help='when possible checks if the URL has been archived before and does not archive the same URL twice [exceution.check_if_exists]')
parser.add_argument('--save-logs', action='store_true', dest='save_logs', help='creates or appends execution logs to files logs/LEVEL.log [exceution.save_logs]')
parser.add_argument('--s3-private', action='store_true', help='Store content without public access permission (only for storage=s3) [secrets.s3.private in config.yaml]')
for k, v in GWorksheet.COLUMN_NAMES.items():
@ -210,6 +214,8 @@ class Config:
"sheet": self.sheet,
"storage": self.storage,
"header": self.header,
"check_if_exists": self.check_if_exists,
"save_logs": self.save_logs,
"tmp_folder": Storage.TMP_FOLDER,
"selenium_config": asdict(self.selenium_config),
"selenium_webdriver": self.webdriver != None,

Wyświetl plik

@ -53,6 +53,8 @@ execution:
header: 1
# which storage to use, can be overwritten with CMD --storage=
storage: s3
# defaults to false, when true will try to avoid duplicate URL archives
check_if_exists: true
# optional configurations for the selenium browser that takes screenshots, these are the defaults
selenium:
# values under 10s might mean screenshots fail to grab screenshot