From 54fda9cad4a83bec52d522ca64d07b6acd8330f6 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Tue, 10 Jun 2025 18:04:27 +0100 Subject: [PATCH] antibot in docker uses a different user_data_dir --- .../__manifest__.py | 2 +- .../antibot_extractor_enricher.py | 22 ++++++------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/__manifest__.py b/src/auto_archiver/modules/antibot_extractor_enricher/__manifest__.py index 214653c..c7be89f 100644 --- a/src/auto_archiver/modules/antibot_extractor_enricher/__manifest__.py +++ b/src/auto_archiver/modules/antibot_extractor_enricher/__manifest__.py @@ -23,7 +23,7 @@ }, "user_data_dir": { "default": "secrets/antibot_user_data", - "help": "Path to the user data directory for the webdriver. This is used to persist browser state, such as cookies and local storage. When using docker it's best to let docker create the folder otherwise there may be permission issues. The Extractor will try to work without it if that error occurs but login sessions will not be used or preserved on those runs.", + "help": "Path to the user data directory for the webdriver. This is used to persist browser state, such as cookies and local storage. If you use the docker deployment, this path will be appended with `_docker` that is because the folder cannot be shared between the host and the container due to user permissions.", }, "detect_auth_wall": { "default": True, diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py index 08d0c03..549cced 100644 --- a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py +++ b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py @@ -6,7 +6,6 @@ import sys import traceback from urllib.parse import urljoin import glob -import stat import importlib.util from loguru import logger @@ -41,7 +40,7 @@ class AntibotExtractorEnricher(Extractor, Enricher): else: self.max_download_videos = int(self.max_download_videos) - self._prepare_and_warn_about_docker_and_user_data_dir() + self._prepare_user_data_dir() self.dropins = self.load_dropins() @@ -79,19 +78,12 @@ class AntibotExtractorEnricher(Extractor, Enricher): result.status = "antibot" return result - def _prepare_and_warn_about_docker_and_user_data_dir(self): - os.makedirs(self.user_data_dir, exist_ok=True) - - in_docker = os.environ.get("RUNNING_IN_DOCKER") - if in_docker and self.user_data_dir: - st = os.stat(self.user_data_dir) - perms = stat.filemode(st.st_mode) - owner = st.st_uid - group = st.st_gid - if owner != 0 or group != 0: - logger.warning( - f"""ANTIBOT: Running in Docker with user_data_dir {self.user_data_dir} with permissions {perms} and non-root {owner=}. This may cause issues with Chrome, if you get 'session not created' errors make sure to remove the folder and let docker create it.""" - ) + def _prepare_user_data_dir(self): + if self.user_data_dir: + in_docker = os.environ.get("RUNNING_IN_DOCKER") + if in_docker: + self.user_data_dir = self.user_data_dir.rstrip(os.path.sep) + "_docker" + os.makedirs(self.user_data_dir, exist_ok=True) def enrich(self, to_enrich: Metadata, custom_data_dir: bool = True) -> bool: using_user_data_dir = self.user_data_dir if custom_data_dir else None