modifies base docker image to use browsertrix 1.4.2 (#182)

* modifies base image to newest browsertrix version

* modify browsertrix cmd args based on recent experience
pull/196/head
Miguel Sozinho Ramalho 2025-01-24 13:59:29 +00:00 zatwierdzone przez GitHub
rodzic d4fff0b6eb
commit a6fc4e1bb1
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
2 zmienionych plików z 9 dodań i 6 usunięć

Wyświetl plik

@ -1,4 +1,4 @@
FROM webrecorder/browsertrix-crawler:1.0.4 AS base FROM webrecorder/browsertrix-crawler:1.4.2 AS base
ENV RUNNING_IN_DOCKER=1 \ ENV RUNNING_IN_DOCKER=1 \
LANG=C.UTF-8 \ LANG=C.UTF-8 \
@ -29,21 +29,23 @@ ENV POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_CREATE=1 POETRY_VIRTUALENVS_CREATE=1
RUN pip install --upgrade pip && \ # Create a virtual environment for poetry and install it
pip install "poetry>=2.0.0,<3.0.0" RUN python3 -m venv /poetry-venv && \
/poetry-venv/bin/python -m pip install --upgrade pip && \
/poetry-venv/bin/python -m pip install "poetry>=2.0.0,<3.0.0"
WORKDIR /app WORKDIR /app
COPY pyproject.toml poetry.lock README.md ./ COPY pyproject.toml poetry.lock README.md ./
# Copy dependency files and install dependencies (excluding the package itself) # Copy dependency files and install dependencies (excluding the package itself)
RUN poetry install --only main --no-root --no-cache RUN /poetry-venv/bin/poetry install --only main --no-root --no-cache
# Copy code: This is needed for poetry to install the package itself, # Copy code: This is needed for poetry to install the package itself,
# but the environment should be cached from the previous step if toml and lock files haven't changed # but the environment should be cached from the previous step if toml and lock files haven't changed
COPY ./src/ . COPY ./src/ .
RUN poetry install --only main --no-cache RUN /poetry-venv/bin/poetry install --only main --no-cache
# Update PATH to include virtual environment binaries # Update PATH to include virtual environment binaries

Wyświetl plik

@ -84,7 +84,8 @@ class WaczArchiverEnricher(Enricher, Archiver):
"--behaviors", "autoscroll,autoplay,autofetch,siteSpecific", "--behaviors", "autoscroll,autoplay,autofetch,siteSpecific",
"--behaviorTimeout", str(self.timeout), "--behaviorTimeout", str(self.timeout),
"--timeout", str(self.timeout), "--timeout", str(self.timeout),
"--blockAds" # TODO: test "--diskUtilization", "99",
# "--blockAds" # note: this has been known to cause issues on cloudflare protected sites
] ]
if self.docker_in_docker: if self.docker_in_docker: