Fix instagram_extractor.py typo, add warning to docs, and add basic regex test.

2025-03-06 16:25:38 +00:00 · 2025-03-06 16:25:38 +00:00 · fa1e65f54c
commit fa1e65f54c
--- a/src/auto_archiver/modules/instagram_extractor/manifest.py
+++ b/src/auto_archiver/modules/instagram_extractor/manifest.py
@ -10,25 +10,30 @@
    "requires_setup": True,
    "configs": {
        "username": {"required": True,
-                     "help": "a valid Instagram username"},
+                     "help": "A valid Instagram username."},
        "password": {
            "required": True,
-            "help": "the corresponding Instagram account password",
+            "help": "The corresponding Instagram account password.",
        },
        "download_folder": {
            "default": "instaloader",
-            "help": "name of a folder to temporarily download content to",
+            "help": "Name of a folder to temporarily download content to.",
        },
        "session_file": {
            "default": "secrets/instaloader.session",
-            "help": "path to the instagram session which saves session credentials",
+            "help": "Path to the instagram session file which saves session credentials. If one doesn't exist this gives the path to store a new one.",
        },
        # TODO: fine-grain
        # "download_stories": {"default": True, "help": "if the link is to a user profile: whether to get stories information"},
    },
    "description": """
-    Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram. This class handles both individual posts
-    and user profiles, downloading as much information as possible, including images, videos, text, stories,
+    Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram. 
+    
+      > ⚠️ **Warning**  
+      > This module is not actively maintained due to known issues with blocking.  
+      > Prioritise usage of the `instagram_tbot_extractor` and `instagram_api_extractor`.
+  
+    This class handles both individual posts and user profiles, downloading as much information as possible, including images, videos, text, stories,
    highlights, and tagged posts. 
    Authentication is required via username/password or a session file.
                    
--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@ -4,8 +4,6 @@

 """
 import re, os, shutil
-from sys import exc_info
-
 import instaloader
 from loguru import logger

@ -17,10 +15,9 @@ class InstagramExtractor(Extractor):
    """
    Uses Instaloader to download either a post (inc images, videos, text) or as much as possible from a profile (posts, stories, highlights, ...)
    """
+
    # NB: post regex should be tested before profile
-
    valid_url = re.compile(r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com|instagr.am|instagr.com)\/")
-
    # https://regex101.com/r/MGPquX/1
    post_pattern = re.compile(r"{valid_url}(?:p|reel)\/(\w+)".format(valid_url=valid_url))
    # https://regex101.com/r/6Wbsxa/1
@ -38,19 +35,14 @@ class InstagramExtractor(Extractor):
        )
        try:
            self.insta.load_session_from_file(self.username, self.session_file)
-        except FileNotFoundError:
-            logger.info("No existing session file found - Attempting login with use and password.")
+        except Exception as e:
            try:
+                logger.debug(f"Session file failed", exc_info=True)
+                logger.info("No valid session file found - Attempting login with use and password.")
                self.insta.login(self.username, self.password)
                self.insta.save_session_to_file(self.session_file)
            except Exception as e:
-                logger.error(f"Failed to log in with Instaloader: {e}")
-                # TODO raise exception?
-                # raise Exception(f"Failed to log in with Instaloader: {e}")
-        except Exception as e:
-            logger.error(f"Error loading session file: {e}")
-            # TODO raise exception?
-            # raise Exception(f"Error loading session file: {e}")
+                logger.error(f"Failed to setup Instagram Extractor with Instagrapi. {e}")


    def download(self, item: Metadata) -> Metadata:
--- a/tests/extractors/test_instagram_extractor.py
+++ b/tests/extractors/test_instagram_extractor.py
@ -1,11 +1,10 @@
 import pytest

 from auto_archiver.modules.instagram_extractor import InstagramExtractor
-from .test_extractor_base import TestExtractorBase


@pytest.fixture
-def intsagram_extractor(setup_module):
+def instagram_extractor(setup_module, mocker):

    extractor_module: str = 'instagram_extractor'
    config: dict = {
@ -14,11 +13,14 @@ def intsagram_extractor(setup_module):
        "download_folder": "instaloader",
        "session_file": "secrets/instaloader.session",
    }
+    fake_loader = mocker.MagicMock()
+    fake_loader.load_session_from_file.return_value = None
+    fake_loader.login.return_value = None
+    fake_loader.save_session_to_file.return_value = None
+    mocker.patch("instaloader.Instaloader", return_value=fake_loader,)
    return setup_module(extractor_module, config)


-
-
@pytest.mark.parametrize("url", [
    "https://www.instagram.com/p/",
    "https://www.instagram.com/p/1234567890/",
@ -27,6 +29,8 @@ def intsagram_extractor(setup_module):
    "https://www.instagram.com/username/stories/",
    "https://www.instagram.com/username/highlights/",
 ])
-def test_regex_matches(url, instagram_extractor):
-    # post
-    assert  instagram_extractor.valid_url.match(url)
+def test_regex_matches(url: str, instagram_extractor: InstagramExtractor) -> None:
+    """
+    Ensure that the valid_url regex matches all provided Instagram URLs.
+    """
+    assert instagram_extractor.valid_url.match(url)