kopia lustrzana https://github.com/bellingcat/auto-archiver
fix: docker+wacz troubles
rodzic
edcb2da74a
commit
0dd45d90f1
|
@ -2,7 +2,7 @@ FROM webrecorder/browsertrix-crawler:latest
|
|||
|
||||
ENV RUNNING_IN_DOCKER=1
|
||||
|
||||
WORKDIR /app/auto-archiver
|
||||
WORKDIR /app
|
||||
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install pipenv && \
|
||||
|
|
|
@ -27,6 +27,7 @@ class WaczArchiverEnricher(Enricher, Archiver):
|
|||
def configs() -> dict:
|
||||
return {
|
||||
"profile": {"default": None, "help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles)."},
|
||||
"browsertrix_home": {"default": None, "help": "path to use when calling docker run with a volume, by default it will be the tmp folder generated during execution, but setting this option is needed when running the auto-archiver in a docker container that calls another container via DooD."},
|
||||
"timeout": {"default": 120, "help": "timeout for WACZ generation in seconds"},
|
||||
"extract_media": {"default": True, "help": "If enabled all the images/videos/audio present in the WACZ archive will be extracted into separate Media. The .wacz file will be kept untouched."}
|
||||
}
|
||||
|
@ -46,7 +47,7 @@ class WaczArchiverEnricher(Enricher, Archiver):
|
|||
url = to_enrich.get_url()
|
||||
|
||||
collection = str(uuid.uuid4())[0:8]
|
||||
browsertrix_home = os.path.abspath(ArchivingContext.get_tmp_dir())
|
||||
browsertrix_home = self.browsertrix_home or os.path.abspath(ArchivingContext.get_tmp_dir())
|
||||
|
||||
if os.getenv('RUNNING_IN_DOCKER'):
|
||||
logger.debug(f"generating WACZ without Docker for {url=}")
|
||||
|
|
Ładowanie…
Reference in New Issue