fix: docker+wacz troubles

pull/93/head
msramalho 2023-09-08 15:09:50 +01:00
rodzic edcb2da74a
commit 0dd45d90f1
2 zmienionych plików z 3 dodań i 2 usunięć

Wyświetl plik

@ -2,7 +2,7 @@ FROM webrecorder/browsertrix-crawler:latest
ENV RUNNING_IN_DOCKER=1
WORKDIR /app/auto-archiver
WORKDIR /app
RUN pip install --upgrade pip && \
pip install pipenv && \

Wyświetl plik

@ -27,6 +27,7 @@ class WaczArchiverEnricher(Enricher, Archiver):
def configs() -> dict:
return {
"profile": {"default": None, "help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles)."},
"browsertrix_home": {"default": None, "help": "path to use when calling docker run with a volume, by default it will be the tmp folder generated during execution, but setting this option is needed when running the auto-archiver in a docker container that calls another container via DooD."},
"timeout": {"default": 120, "help": "timeout for WACZ generation in seconds"},
"extract_media": {"default": True, "help": "If enabled all the images/videos/audio present in the WACZ archive will be extracted into separate Media. The .wacz file will be kept untouched."}
}
@ -46,7 +47,7 @@ class WaczArchiverEnricher(Enricher, Archiver):
url = to_enrich.get_url()
collection = str(uuid.uuid4())[0:8]
browsertrix_home = os.path.abspath(ArchivingContext.get_tmp_dir())
browsertrix_home = self.browsertrix_home or os.path.abspath(ArchivingContext.get_tmp_dir())
if os.getenv('RUNNING_IN_DOCKER'):
logger.debug(f"generating WACZ without Docker for {url=}")