diff --git a/src/auto_archiver/enrichers/screenshot_enricher.py b/src/auto_archiver/enrichers/screenshot_enricher.py deleted file mode 100644 index abb1e16..0000000 --- a/src/auto_archiver/enrichers/screenshot_enricher.py +++ /dev/null @@ -1,40 +0,0 @@ -from loguru import logger -import time, os -from selenium.common.exceptions import TimeoutException - - -from auto_archiver.core import Enricher -from ..utils import Webdriver, url as UrlUtil, random_str -from ..core import Media, Metadata - -class ScreenshotEnricher(Enricher): - name = "screenshot_enricher" - - @staticmethod - def configs() -> dict: - return { - "width": {"default": 1280, "help": "width of the screenshots"}, - "height": {"default": 720, "help": "height of the screenshots"}, - "timeout": {"default": 60, "help": "timeout for taking the screenshot"}, - "sleep_before_screenshot": {"default": 4, "help": "seconds to wait for the pages to load before taking screenshot"}, - "http_proxy": {"default": "", "help": "http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port"}, - } - - def enrich(self, to_enrich: Metadata) -> None: - url = to_enrich.get_url() - if UrlUtil.is_auth_wall(url): - logger.debug(f"[SKIP] SCREENSHOT since url is behind AUTH WALL: {url=}") - return - - logger.debug(f"Enriching screenshot for {url=}") - with Webdriver(self.width, self.height, self.timeout, 'facebook.com' in url, http_proxy=self.http_proxy) as driver: - try: - driver.get(url) - time.sleep(int(self.sleep_before_screenshot)) - screenshot_file = os.path.join(self.tmp_dir, f"screenshot_{random_str(8)}.png") - driver.save_screenshot(screenshot_file) - to_enrich.add_media(Media(filename=screenshot_file), id="screenshot") - except TimeoutException: - logger.info("TimeoutException loading page for screenshot") - except Exception as e: - logger.error(f"Got error while loading webdriver for screenshot enricher: {e}")