kopia lustrzana https://github.com/bellingcat/auto-archiver
modifies base docker image to use browsertrix 1.4.2 (#182)
* modifies base image to newest browsertrix version * modify browsertrix cmd args based on recent experiencepull/196/head
rodzic
d4fff0b6eb
commit
a6fc4e1bb1
12
Dockerfile
12
Dockerfile
|
@ -1,4 +1,4 @@
|
|||
FROM webrecorder/browsertrix-crawler:1.0.4 AS base
|
||||
FROM webrecorder/browsertrix-crawler:1.4.2 AS base
|
||||
|
||||
ENV RUNNING_IN_DOCKER=1 \
|
||||
LANG=C.UTF-8 \
|
||||
|
@ -29,21 +29,23 @@ ENV POETRY_NO_INTERACTION=1 \
|
|||
POETRY_VIRTUALENVS_CREATE=1
|
||||
|
||||
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install "poetry>=2.0.0,<3.0.0"
|
||||
# Create a virtual environment for poetry and install it
|
||||
RUN python3 -m venv /poetry-venv && \
|
||||
/poetry-venv/bin/python -m pip install --upgrade pip && \
|
||||
/poetry-venv/bin/python -m pip install "poetry>=2.0.0,<3.0.0"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
COPY pyproject.toml poetry.lock README.md ./
|
||||
# Copy dependency files and install dependencies (excluding the package itself)
|
||||
RUN poetry install --only main --no-root --no-cache
|
||||
RUN /poetry-venv/bin/poetry install --only main --no-root --no-cache
|
||||
|
||||
|
||||
# Copy code: This is needed for poetry to install the package itself,
|
||||
# but the environment should be cached from the previous step if toml and lock files haven't changed
|
||||
COPY ./src/ .
|
||||
RUN poetry install --only main --no-cache
|
||||
RUN /poetry-venv/bin/poetry install --only main --no-cache
|
||||
|
||||
|
||||
# Update PATH to include virtual environment binaries
|
||||
|
|
|
@ -84,7 +84,8 @@ class WaczArchiverEnricher(Enricher, Archiver):
|
|||
"--behaviors", "autoscroll,autoplay,autofetch,siteSpecific",
|
||||
"--behaviorTimeout", str(self.timeout),
|
||||
"--timeout", str(self.timeout),
|
||||
"--blockAds" # TODO: test
|
||||
"--diskUtilization", "99",
|
||||
# "--blockAds" # note: this has been known to cause issues on cloudflare protected sites
|
||||
]
|
||||
|
||||
if self.docker_in_docker:
|
||||
|
|
Ładowanie…
Reference in New Issue