kopia lustrzana https://github.com/bellingcat/auto-archiver
antibot in docker uses a different user_data_dir
rodzic
71636233cb
commit
54fda9cad4
|
@ -23,7 +23,7 @@
|
||||||
},
|
},
|
||||||
"user_data_dir": {
|
"user_data_dir": {
|
||||||
"default": "secrets/antibot_user_data",
|
"default": "secrets/antibot_user_data",
|
||||||
"help": "Path to the user data directory for the webdriver. This is used to persist browser state, such as cookies and local storage. When using docker it's best to let docker create the folder otherwise there may be permission issues. The Extractor will try to work without it if that error occurs but login sessions will not be used or preserved on those runs.",
|
"help": "Path to the user data directory for the webdriver. This is used to persist browser state, such as cookies and local storage. If you use the docker deployment, this path will be appended with `_docker` that is because the folder cannot be shared between the host and the container due to user permissions.",
|
||||||
},
|
},
|
||||||
"detect_auth_wall": {
|
"detect_auth_wall": {
|
||||||
"default": True,
|
"default": True,
|
||||||
|
|
|
@ -6,7 +6,6 @@ import sys
|
||||||
import traceback
|
import traceback
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
import glob
|
import glob
|
||||||
import stat
|
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
@ -41,7 +40,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
|
||||||
else:
|
else:
|
||||||
self.max_download_videos = int(self.max_download_videos)
|
self.max_download_videos = int(self.max_download_videos)
|
||||||
|
|
||||||
self._prepare_and_warn_about_docker_and_user_data_dir()
|
self._prepare_user_data_dir()
|
||||||
|
|
||||||
self.dropins = self.load_dropins()
|
self.dropins = self.load_dropins()
|
||||||
|
|
||||||
|
@ -79,19 +78,12 @@ class AntibotExtractorEnricher(Extractor, Enricher):
|
||||||
result.status = "antibot"
|
result.status = "antibot"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _prepare_and_warn_about_docker_and_user_data_dir(self):
|
def _prepare_user_data_dir(self):
|
||||||
os.makedirs(self.user_data_dir, exist_ok=True)
|
if self.user_data_dir:
|
||||||
|
|
||||||
in_docker = os.environ.get("RUNNING_IN_DOCKER")
|
in_docker = os.environ.get("RUNNING_IN_DOCKER")
|
||||||
if in_docker and self.user_data_dir:
|
if in_docker:
|
||||||
st = os.stat(self.user_data_dir)
|
self.user_data_dir = self.user_data_dir.rstrip(os.path.sep) + "_docker"
|
||||||
perms = stat.filemode(st.st_mode)
|
os.makedirs(self.user_data_dir, exist_ok=True)
|
||||||
owner = st.st_uid
|
|
||||||
group = st.st_gid
|
|
||||||
if owner != 0 or group != 0:
|
|
||||||
logger.warning(
|
|
||||||
f"""ANTIBOT: Running in Docker with user_data_dir {self.user_data_dir} with permissions {perms} and non-root {owner=}. This may cause issues with Chrome, if you get 'session not created' errors make sure to remove the folder and let docker create it."""
|
|
||||||
)
|
|
||||||
|
|
||||||
def enrich(self, to_enrich: Metadata, custom_data_dir: bool = True) -> bool:
|
def enrich(self, to_enrich: Metadata, custom_data_dir: bool = True) -> bool:
|
||||||
using_user_data_dir = self.user_data_dir if custom_data_dir else None
|
using_user_data_dir = self.user_data_dir if custom_data_dir else None
|
||||||
|
|
Ładowanie…
Reference in New Issue