Fix instagram_extractor.py typo, add warning to docs, and add basic regex test.

pull/235/head
erinhmclark 2025-03-06 16:25:38 +00:00
rodzic a705a78632
commit fa1e65f54c
3 zmienionych plików z 27 dodań i 26 usunięć

Wyświetl plik

@ -10,25 +10,30 @@
"requires_setup": True, "requires_setup": True,
"configs": { "configs": {
"username": {"required": True, "username": {"required": True,
"help": "a valid Instagram username"}, "help": "A valid Instagram username."},
"password": { "password": {
"required": True, "required": True,
"help": "the corresponding Instagram account password", "help": "The corresponding Instagram account password.",
}, },
"download_folder": { "download_folder": {
"default": "instaloader", "default": "instaloader",
"help": "name of a folder to temporarily download content to", "help": "Name of a folder to temporarily download content to.",
}, },
"session_file": { "session_file": {
"default": "secrets/instaloader.session", "default": "secrets/instaloader.session",
"help": "path to the instagram session which saves session credentials", "help": "Path to the instagram session file which saves session credentials. If one doesn't exist this gives the path to store a new one.",
}, },
# TODO: fine-grain # TODO: fine-grain
# "download_stories": {"default": True, "help": "if the link is to a user profile: whether to get stories information"}, # "download_stories": {"default": True, "help": "if the link is to a user profile: whether to get stories information"},
}, },
"description": """ "description": """
Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram. This class handles both individual posts Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram.
and user profiles, downloading as much information as possible, including images, videos, text, stories,
> **Warning**
> This module is not actively maintained due to known issues with blocking.
> Prioritise usage of the `instagram_tbot_extractor` and `instagram_api_extractor`.
This class handles both individual posts and user profiles, downloading as much information as possible, including images, videos, text, stories,
highlights, and tagged posts. highlights, and tagged posts.
Authentication is required via username/password or a session file. Authentication is required via username/password or a session file.

Wyświetl plik

@ -4,8 +4,6 @@
""" """
import re, os, shutil import re, os, shutil
from sys import exc_info
import instaloader import instaloader
from loguru import logger from loguru import logger
@ -17,10 +15,9 @@ class InstagramExtractor(Extractor):
""" """
Uses Instaloader to download either a post (inc images, videos, text) or as much as possible from a profile (posts, stories, highlights, ...) Uses Instaloader to download either a post (inc images, videos, text) or as much as possible from a profile (posts, stories, highlights, ...)
""" """
# NB: post regex should be tested before profile # NB: post regex should be tested before profile
valid_url = re.compile(r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com|instagr.am|instagr.com)\/") valid_url = re.compile(r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com|instagr.am|instagr.com)\/")
# https://regex101.com/r/MGPquX/1 # https://regex101.com/r/MGPquX/1
post_pattern = re.compile(r"{valid_url}(?:p|reel)\/(\w+)".format(valid_url=valid_url)) post_pattern = re.compile(r"{valid_url}(?:p|reel)\/(\w+)".format(valid_url=valid_url))
# https://regex101.com/r/6Wbsxa/1 # https://regex101.com/r/6Wbsxa/1
@ -38,19 +35,14 @@ class InstagramExtractor(Extractor):
) )
try: try:
self.insta.load_session_from_file(self.username, self.session_file) self.insta.load_session_from_file(self.username, self.session_file)
except FileNotFoundError: except Exception as e:
logger.info("No existing session file found - Attempting login with use and password.")
try: try:
logger.debug(f"Session file failed", exc_info=True)
logger.info("No valid session file found - Attempting login with use and password.")
self.insta.login(self.username, self.password) self.insta.login(self.username, self.password)
self.insta.save_session_to_file(self.session_file) self.insta.save_session_to_file(self.session_file)
except Exception as e: except Exception as e:
logger.error(f"Failed to log in with Instaloader: {e}") logger.error(f"Failed to setup Instagram Extractor with Instagrapi. {e}")
# TODO raise exception?
# raise Exception(f"Failed to log in with Instaloader: {e}")
except Exception as e:
logger.error(f"Error loading session file: {e}")
# TODO raise exception?
# raise Exception(f"Error loading session file: {e}")
def download(self, item: Metadata) -> Metadata: def download(self, item: Metadata) -> Metadata:

Wyświetl plik

@ -1,11 +1,10 @@
import pytest import pytest
from auto_archiver.modules.instagram_extractor import InstagramExtractor from auto_archiver.modules.instagram_extractor import InstagramExtractor
from .test_extractor_base import TestExtractorBase
@pytest.fixture @pytest.fixture
def intsagram_extractor(setup_module): def instagram_extractor(setup_module, mocker):
extractor_module: str = 'instagram_extractor' extractor_module: str = 'instagram_extractor'
config: dict = { config: dict = {
@ -14,11 +13,14 @@ def intsagram_extractor(setup_module):
"download_folder": "instaloader", "download_folder": "instaloader",
"session_file": "secrets/instaloader.session", "session_file": "secrets/instaloader.session",
} }
fake_loader = mocker.MagicMock()
fake_loader.load_session_from_file.return_value = None
fake_loader.login.return_value = None
fake_loader.save_session_to_file.return_value = None
mocker.patch("instaloader.Instaloader", return_value=fake_loader,)
return setup_module(extractor_module, config) return setup_module(extractor_module, config)
@pytest.mark.parametrize("url", [ @pytest.mark.parametrize("url", [
"https://www.instagram.com/p/", "https://www.instagram.com/p/",
"https://www.instagram.com/p/1234567890/", "https://www.instagram.com/p/1234567890/",
@ -27,6 +29,8 @@ def intsagram_extractor(setup_module):
"https://www.instagram.com/username/stories/", "https://www.instagram.com/username/stories/",
"https://www.instagram.com/username/highlights/", "https://www.instagram.com/username/highlights/",
]) ])
def test_regex_matches(url, instagram_extractor): def test_regex_matches(url: str, instagram_extractor: InstagramExtractor) -> None:
# post """
assert instagram_extractor.valid_url.match(url) Ensure that the valid_url regex matches all provided Instagram URLs.
"""
assert instagram_extractor.valid_url.match(url)