diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index bbdf37c..5820d0c 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -90,7 +90,9 @@ class ArchivingOrchestrator: if cached_result: logger.debug("Found previously archived entry") for d in self.databases: - d.done(cached_result, cached=True) + try: d.done(cached_result, cached=True) + except Exception as e: + logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}") return cached_result # 3 - call archivers until one succeeds @@ -120,6 +122,9 @@ class ArchivingOrchestrator: result.status = "nothing archived" # signal completion to databases and archivers - for d in self.databases: d.done(result) + for d in self.databases: + try: d.done(result) + except Exception as e: + logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}") return result diff --git a/src/auto_archiver/enrichers/wacz_enricher.py b/src/auto_archiver/enrichers/wacz_enricher.py index caa7ab5..265cf81 100644 --- a/src/auto_archiver/enrichers/wacz_enricher.py +++ b/src/auto_archiver/enrichers/wacz_enricher.py @@ -39,14 +39,19 @@ class WaczArchiverEnricher(Enricher, Archiver): def setup(self) -> None: self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER') self.browsertrix_home_host = os.environ.get('BROWSERTRIX_HOME_HOST') + self.browsertrix_home_container = os.environ.get('BROWSERTRIX_HOME_CONTAINER') or self.browsertrix_home_host # create crawls folder if not exists, so it can be safely removed in cleanup - if self.use_docker and self.browsertrix_home_host: - os.makedirs(self.browsertrix_home_host, exist_ok=True) + if self.use_docker: + if self.browsertrix_home_container: + os.makedirs(self.browsertrix_home_container, exist_ok=True) def cleanup(self) -> None: - if self.use_docker and self.browsertrix_home_host: - logger.debug(f"Removing {self.browsertrix_home_host=}") - shutil.rmtree(self.browsertrix_home_host, ignore_errors=True) + if self.use_docker: + if self.browsertrix_home_container: + logger.debug(f"Removing {self.browsertrix_home_container=}") + shutil.rmtree(self.browsertrix_home_container, ignore_errors=True) + + def download(self, item: Metadata) -> Metadata: # this new Metadata object is required to avoid duplication @@ -64,7 +69,7 @@ class WaczArchiverEnricher(Enricher, Archiver): collection = random_str(8) browsertrix_home_host = self.browsertrix_home_host or os.path.abspath(ArchivingContext.get_tmp_dir()) - browsertrix_home_container = os.environ.get('BROWSERTRIX_HOME_CONTAINER') or browsertrix_home_host + browsertrix_home_container = self.browsertrix_home_container or browsertrix_home_host cmd = [ "crawl", diff --git a/src/auto_archiver/version.py b/src/auto_archiver/version.py index 6f2564b..7cabe56 100644 --- a/src/auto_archiver/version.py +++ b/src/auto_archiver/version.py @@ -3,7 +3,7 @@ _MAJOR = "0" _MINOR = "9" # On main and in a nightly release the patch should be one ahead of the last # released build. -_PATCH = "3" +_PATCH = "4" # This is mainly for nightly builds which have the suffix ".dev$DATE". See # https://semver.org/#is-v123-a-semantic-version for the semantics. _SUFFIX = ""