Ruff fix on src.

2025-03-10 19:03:45 +00:00 · 2025-03-10 19:03:45 +00:00 · ca44a40b88
commit ca44a40b88
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,7 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.10
+    hooks:
+      - id: ruff
+#        args: [ --fix ]
+      - id: ruff-format
--- a/src/auto_archiver/core/base_module.py
+++ b/src/auto_archiver/core/base_module.py
@ -1,8 +1,8 @@
 from __future__ import annotations

-from typing import Mapping, Any, Type, TYPE_CHECKING
+from typing import Mapping, Any, TYPE_CHECKING
 from abc import ABC
-from copy import deepcopy, copy
+from copy import deepcopy
 from tempfile import TemporaryDirectory
 from auto_archiver.utils import url as UrlUtil
 from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES
--- a/src/auto_archiver/core/config.py
+++ b/src/auto_archiver/core/config.py
@ -6,7 +6,7 @@ flexible setup in various environments.
 """

 import argparse
-from ruamel.yaml import YAML, CommentedMap, add_representer
+from ruamel.yaml import YAML, CommentedMap
 import json

 from loguru import logger
@ -14,7 +14,6 @@ from loguru import logger
 from copy import deepcopy
 from auto_archiver.core.consts import MODULE_TYPES

-from typing import Any, List, Type, Tuple

 _yaml: YAML = YAML()

--- a/src/auto_archiver/core/extractor.py
+++ b/src/auto_archiver/core/extractor.py
@ -7,12 +7,9 @@ Factory method to initialize an extractor instance based on its name.
 """

 from __future__ import annotations
-from pathlib import Path
 from abc import abstractmethod
-from dataclasses import dataclass
 import mimetypes
 import os
-import mimetypes
 import requests
 from loguru import logger
 from retrying import retry
--- a/src/auto_archiver/core/metadata.py
+++ b/src/auto_archiver/core/metadata.py
@ -13,7 +13,7 @@ from __future__ import annotations
 import hashlib
 from typing import Any, List, Union, Dict
 from dataclasses import dataclass, field
-from dataclasses_json import dataclass_json, config
+from dataclasses_json import dataclass_json
 import datetime
 from urllib.parse import urlparse
 from dateutil.parser import parse as parse_dt
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@ -123,7 +123,7 @@ Here's how that would look: \n\nsteps:\n  {module_type}s:\n  - [your_{module_typ
                    )
                if module_type == "extractor" and config["steps"].get("archivers"):
                    raise SetupError(
-                        f"As of version 0.13.0 of Auto Archiver, the 'archivers' step name has been changed to 'extractors'. Change this in your configuration file and try again. \
+                        "As of version 0.13.0 of Auto Archiver, the 'archivers' step name has been changed to 'extractors'. Change this in your configuration file and try again. \
 Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_here]\n  enrichers:...\n"
                    )
                raise SetupError(
--- a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
+++ b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
@ -135,7 +135,7 @@ class GDriveStorage(Storage):
        debug_header: str = f"[searching {name=} in {parent_id=}]"
        query_string = f"'{parent_id}' in parents and name = '{name}' and trashed = false "
        if use_mime_type:
-            query_string += f" and mimeType='application/vnd.google-apps.folder' "
+            query_string += " and mimeType='application/vnd.google-apps.folder' "

        for attempt in range(retries):
            results = (
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@ -1,4 +1,5 @@
-import datetime, os
+import datetime
+import os
 import importlib
 import subprocess
 from typing import Generator, Type
@ -386,7 +387,7 @@ class GenericExtractor(Extractor):
            item.set("replaced_url", url)

        ydl_options = {
-            "outtmpl": os.path.join(self.tmp_dir, f"%(id)s.%(ext)s"),
+            "outtmpl": os.path.join(self.tmp_dir, "%(id)s.%(ext)s"),
            "quiet": False,
            "noplaylist": not self.allow_playlist,
            "writesubtitles": self.subtitles,
--- a/src/auto_archiver/modules/generic_extractor/twitter.py
+++ b/src/auto_archiver/modules/generic_extractor/twitter.py
@ -1,4 +1,6 @@
-import re, mimetypes, json
+import re
+import mimetypes
+import json
 from datetime import datetime

 from loguru import logger
@ -35,7 +37,7 @@ class Twitter(GenericDropin):
        result = Metadata()
        try:
            if not tweet.get("user") or not tweet.get("created_at"):
-                raise ValueError(f"Error retreiving post. Are you sure it exists?")
+                raise ValueError("Error retreiving post. Are you sure it exists?")
            timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
        except (ValueError, KeyError) as ex:
            logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
--- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
+++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
@ -20,7 +20,7 @@ from slugify import slugify
 from auto_archiver.core import Feeder, Database, Media
 from auto_archiver.core import Metadata
 from auto_archiver.modules.gsheet_feeder_db import GWorksheet
-from auto_archiver.utils.misc import calculate_file_hash, get_current_timestamp
+from auto_archiver.utils.misc import get_current_timestamp


 class GsheetsFeederDB(Feeder, Database):
--- a/src/auto_archiver/modules/html_formatter/html_formatter.py
+++ b/src/auto_archiver/modules/html_formatter/html_formatter.py
@ -1,5 +1,7 @@
 from __future__ import annotations
-import mimetypes, os, pathlib
+import mimetypes
+import os
+import pathlib
 from jinja2 import Environment, FileSystemLoader
 from urllib.parse import quote
 from loguru import logger
--- a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
@ -95,7 +95,7 @@ class InstagramAPIExtractor(Extractor):
        result.set_title(user.get("full_name", username)).set("data", user)
        if pic_url := user.get("profile_pic_url_hd", user.get("profile_pic_url")):
            filename = self.download_from_url(pic_url)
-            result.add_media(Media(filename=filename), id=f"profile_picture")
+            result.add_media(Media(filename=filename), id="profile_picture")

        if self.full_profile:
            user_id = user.get("pk")
@ -133,7 +133,7 @@ class InstagramAPIExtractor(Extractor):

    def download_all_highlights(self, result, username, user_id):
        count_highlights = 0
-        highlights = self.call_api(f"v1/user/highlights", {"user_id": user_id})
+        highlights = self.call_api("v1/user/highlights", {"user_id": user_id})
        for h in highlights:
            try:
                h_info = self._download_highlights_reusable(result, h.get("pk"))
@ -151,9 +151,9 @@ class InstagramAPIExtractor(Extractor):

    def download_post(self, result: Metadata, code: str = None, id: str = None, context: str = None) -> Metadata:
        if id:
-            post = self.call_api(f"v1/media/by/id", {"id": id})
+            post = self.call_api("v1/media/by/id", {"id": id})
        else:
-            post = self.call_api(f"v1/media/by/code", {"code": code})
+            post = self.call_api("v1/media/by/code", {"code": code})
        assert post, f"Post {id or code} not found"

        if caption_text := post.get("caption_text"):
@ -173,7 +173,7 @@ class InstagramAPIExtractor(Extractor):
        return result.success("insta highlights")

    def _download_highlights_reusable(self, result: Metadata, id: str) -> dict:
-        full_h = self.call_api(f"v2/highlight/by/id", {"id": id})
+        full_h = self.call_api("v2/highlight/by/id", {"id": id})
        h_info = full_h.get("response", {}).get("reels", {}).get(f"highlight:{id}")
        assert h_info, f"Highlight {id} not found: {full_h=}"

@ -200,7 +200,7 @@ class InstagramAPIExtractor(Extractor):
        return result.success(f"insta stories {now}")

    def _download_stories_reusable(self, result: Metadata, username: str) -> list[dict]:
-        stories = self.call_api(f"v1/user/stories/by/username", {"username": username})
+        stories = self.call_api("v1/user/stories/by/username", {"username": username})
        if not stories or not len(stories):
            return []
        stories = stories[::-1]  # newest to oldest
@ -219,7 +219,7 @@ class InstagramAPIExtractor(Extractor):

        post_count = 0
        while end_cursor != "":
-            posts = self.call_api(f"v1/user/medias/chunk", {"user_id": user_id, "end_cursor": end_cursor})
+            posts = self.call_api("v1/user/medias/chunk", {"user_id": user_id, "end_cursor": end_cursor})
            if not len(posts) or not type(posts) == list or len(posts) != 2:
                break
            posts, end_cursor = posts[0], posts[1]
@ -244,7 +244,7 @@ class InstagramAPIExtractor(Extractor):

        tagged_count = 0
        while next_page_id != None:
-            resp = self.call_api(f"v2/user/tag/medias", {"user_id": user_id, "page_id": next_page_id})
+            resp = self.call_api("v2/user/tag/medias", {"user_id": user_id, "page_id": next_page_id})
            posts = resp.get("response", {}).get("items", [])
            if not len(posts):
                break
--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@ -4,7 +4,9 @@ highlights, and tagged posts. Authentication is required via username/password o

 """

-import re, os, shutil
+import re
+import os
+import shutil
 import instaloader
 from loguru import logger

@ -36,9 +38,9 @@ class InstagramExtractor(Extractor):
        )
        try:
            self.insta.load_session_from_file(self.username, self.session_file)
-        except Exception as e:
+        except Exception:
            try:
-                logger.debug(f"Session file failed", exc_info=True)
+                logger.debug("Session file failed", exc_info=True)
                logger.info("No valid session file found - Attempting login with use and password.")
                self.insta.login(self.username, self.password)
                self.insta.save_session_to_file(self.session_file)
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
@ -51,7 +51,7 @@ class InstagramTbotExtractor(Extractor):
        """Initializes the Telegram client."""
        try:
            self.client = TelegramClient(self.session_file, self.api_id, self.api_hash)
-        except OperationalError as e:
+        except OperationalError:
            logger.error(
                f"Unable to access the {self.session_file} session. "
                "Ensure that you don't use the same session file here and in telethon_extractor. "
@ -68,7 +68,7 @@ class InstagramTbotExtractor(Extractor):

    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()
-        if not "instagram.com" in url:
+        if "instagram.com" not in url:
            return False

        result = Metadata()
--- a/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
+++ b/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
@ -1,5 +1,6 @@
 from loguru import logger
-import time, os
+import time
+import os
 import base64

 from selenium.common.exceptions import TimeoutException
--- a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
+++ b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
@ -1,4 +1,5 @@
-import ssl, os
+import ssl
+import os
 from slugify import slugify
 from urllib.parse import urlparse
 from loguru import logger
--- a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
+++ b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
@ -1,4 +1,6 @@
-import requests, re, html
+import requests
+import re
+import html
 from bs4 import BeautifulSoup
 from loguru import logger

--- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
+++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
@ -10,7 +10,9 @@ from telethon.errors.rpcerrorlist import (
 )
 from loguru import logger
 from tqdm import tqdm
-import re, time, os
+import re
+import time
+import os

 from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata, Media
@ -63,11 +65,11 @@ class TelethonExtractor(Extractor):
                                logger.warning(
                                    f"please add the property id='{ent.id}' to the 'channel_invites' configuration where {invite=}, not doing so can lead to a minutes-long setup time due to telegram's rate limiting."
                                )
-                        except ValueError as e:
+                        except ValueError:
                            logger.info(f"joining new channel {invite=}")
                            try:
                                self.client(ImportChatInviteRequest(match.group(2)))
-                            except UserAlreadyParticipantError as e:
+                            except UserAlreadyParticipantError:
                                logger.info(f"already joined {invite=}")
                            except InviteRequestSentError:
                                logger.warning(f"already sent a join request with {invite} still no answer")
--- a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
@ -7,7 +7,8 @@ and identify important moments without watching the entire video.

 """

-import ffmpeg, os
+import ffmpeg
+import os
 from loguru import logger

 from auto_archiver.core import Enricher
--- a/src/auto_archiver/modules/wacz_extractor_enricher/wacz_extractor_enricher.py
+++ b/src/auto_archiver/modules/wacz_extractor_enricher/wacz_extractor_enricher.py
@ -1,6 +1,8 @@
 import jsonlines
 import mimetypes
-import os, shutil, subprocess
+import os
+import shutil
+import subprocess
 from zipfile import ZipFile
 from loguru import logger
 from warcio.archiveiterator import ArchiveIterator
@ -186,7 +188,6 @@ class WaczExtractorEnricher(Enricher, Extractor):
        # get media out of .warc
        counter = 0
        seen_urls = set()
-        import json

        with open(warc_filename, "rb") as warc_stream:
            for record in ArchiveIterator(warc_stream):
--- a/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py
+++ b/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py
@ -1,6 +1,7 @@
 import json
 from loguru import logger
-import time, requests
+import time
+import requests

 from auto_archiver.core import Extractor, Enricher
 from auto_archiver.utils import url as UrlUtil
@ -57,7 +58,7 @@ class WaybackExtractorEnricher(Enricher, Extractor):
            if not job_id:
                logger.error(f"Wayback failed with {r.json()}")
                return False
-        except json.decoder.JSONDecodeError as e:
+        except json.decoder.JSONDecodeError:
            logger.error(f"Expected a JSON with job_id from Wayback and got {r.text}")
            return False

@ -80,7 +81,7 @@ class WaybackExtractorEnricher(Enricher, Extractor):
            except requests.exceptions.RequestException as e:
                logger.warning(f"RequestException: fetching status for {url=} due to: {e}")
                break
-            except json.decoder.JSONDecodeError as e:
+            except json.decoder.JSONDecodeError:
                logger.error(f"Expected a JSON from Wayback and got {r.text} for {url=}")
                break
            except Exception as e:
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@ -1,5 +1,6 @@
 import traceback
-import requests, time
+import requests
+import time
 from loguru import logger

 from auto_archiver.core import Enricher
@ -16,7 +17,7 @@ class WhisperEnricher(Enricher):
    def setup(self) -> None:
        self.stores = self.config["steps"]["storages"]
        self.s3 = self.module_factory.get_module("s3_storage", self.config)
-        if not "s3_storage" in self.stores:
+        if "s3_storage" not in self.stores:
            logger.error(
                "WhisperEnricher: To use the WhisperEnricher you need to use S3Storage so files are accessible publicly to the whisper service being called."
            )
--- a/src/auto_archiver/utils/webdriver.py
+++ b/src/auto_archiver/utils/webdriver.py
@ -66,15 +66,15 @@ class CookieSettingDriver(webdriver.Firefox):

        if self.facebook_accept_cookies:
            try:
-                logger.debug(f"Trying fb click accept cookie popup.")
+                logger.debug("Trying fb click accept cookie popup.")
                super(CookieSettingDriver, self).get("http://www.facebook.com")
                essential_only = self.find_element(By.XPATH, "//span[contains(text(), 'Decline optional cookies')]")
                essential_only.click()
-                logger.debug(f"fb click worked")
+                logger.debug("fb click worked")
                # linux server needs a sleep otherwise facebook cookie won't have worked and we'll get a popup on next page
                time.sleep(2)
            except Exception as e:
-                logger.warning(f"Failed on fb accept cookies.", e)
+                logger.warning("Failed on fb accept cookies.", e)

        # now get the actual URL
        super(CookieSettingDriver, self).get(url)