diff --git a/src/auto_archiver/core/module.py b/src/auto_archiver/core/module.py index 3db861b..f620500 100644 --- a/src/auto_archiver/core/module.py +++ b/src/auto_archiver/core/module.py @@ -274,6 +274,9 @@ class LazyBaseModule: # finally, get the class instance instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)() + # save the instance for future easy loading + self._instance = instance + # set the name, display name and module factory instance.name = self.name instance.display_name = self.display_name @@ -286,8 +289,6 @@ class LazyBaseModule: instance.config_setup(config) instance.setup() - # save the instance for future easy loading - self._instance = instance return instance def __repr__(self): diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index cbd1af5..b637878 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -387,8 +387,10 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_ except (KeyboardInterrupt, Exception) as e: if not isinstance(e, KeyboardInterrupt) and not isinstance(e, SetupError): logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}") - if loaded_module and module_type == "extractor": - loaded_module.cleanup() + + # access the _instance here because loaded_module may not return if there's an error + if lazy_module._instance and module_type == "extractor": + lazy_module._instance.cleanup() raise e if not loaded_module: diff --git a/src/auto_archiver/core/validators.py b/src/auto_archiver/core/validators.py index 765a02b..decd9f1 100644 --- a/src/auto_archiver/core/validators.py +++ b/src/auto_archiver/core/validators.py @@ -4,12 +4,6 @@ import argparse import json -def example_validator(value): - if "example" not in value: - raise argparse.ArgumentTypeError(f"{value} is not a valid value for this argument") - return value - - def positive_number(value): if value < 0: raise argparse.ArgumentTypeError(f"{value} is not a positive number") diff --git a/src/auto_archiver/modules/telethon_extractor/__manifest__.py b/src/auto_archiver/modules/telethon_extractor/__manifest__.py index 150b62c..e29a9b0 100644 --- a/src/auto_archiver/modules/telethon_extractor/__manifest__.py +++ b/src/auto_archiver/modules/telethon_extractor/__manifest__.py @@ -19,7 +19,7 @@ }, "session_file": { "default": "secrets/anon", - "help": "optional, records the telegram login session for future usage, '.session' will be appended to the provided value.", + "help": "Path of the file to save the telegram login session for future usage, '.session' will be appended to the provided path.", }, "join_channels": { "default": True, diff --git a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py index b06962e..73fb4e8 100644 --- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py +++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py @@ -1,4 +1,10 @@ +import os import shutil +import re +import time +from pathlib import Path +from datetime import date + from telethon.sync import TelegramClient from telethon.errors import ChannelInvalidError from telethon.tl.functions.messages import ImportChatInviteRequest @@ -8,11 +14,9 @@ from telethon.errors.rpcerrorlist import ( InviteRequestSentError, InviteHashExpiredError, ) -from loguru import logger + from tqdm import tqdm -import re -import time -import os +from loguru import logger from auto_archiver.core import Extractor from auto_archiver.core import Metadata, Media @@ -31,10 +35,22 @@ class TelethonExtractor(Extractor): """ logger.info(f"SETUP {self.name} checking login...") + # in case the user already added '.session' to the session_file + base_session_name = self.session_file.removesuffix(".session") + base_session_filepath = f"{base_session_name}.session" + + if self.session_file and not os.path.exists(base_session_filepath): + logger.warning( + f"SETUP - Session file {base_session_filepath} does not exist for {self.name}, creating an empty one." + ) + Path(base_session_filepath).touch() + # make a copy of the session that is used exclusively with this archiver instance - new_session_file = os.path.join("secrets/", f"telethon-{time.strftime('%Y-%m-%d')}{random_str(8)}.session") - shutil.copy(self.session_file + ".session", new_session_file) - self.session_file = new_session_file.replace(".session", "") + self.session_file = os.path.join( + os.path.dirname(base_session_filepath), f"telethon-{date.today().strftime('%Y-%m-%d')}{random_str(8)}" + ) + logger.debug(f"Making a copy of the session file {base_session_filepath} to {self.session_file}.session") + shutil.copy(base_session_filepath, f"{self.session_file}.session") # initiate the client self.client = TelegramClient(self.session_file, self.api_id, self.api_hash) @@ -87,8 +103,8 @@ class TelethonExtractor(Extractor): pbar.update() def cleanup(self) -> None: - logger.info(f"CLEANUP {self.name}.") - session_file_name = self.session_file + ".session" + logger.info(f"CLEANUP {self.name} - removing session file {self.session_file}.session") + session_file_name = f"{self.session_file}.session" if os.path.exists(session_file_name): os.remove(session_file_name) diff --git a/tests/conftest.py b/tests/conftest.py index 44d8058..6377646 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,7 +17,24 @@ from auto_archiver.core.module import ModuleFactory # that you only want to run if everything else succeeds (e.g. API calls). The order here is important # what comes first will be run first (at the end of all other tests not mentioned) # format is the name of the module (python file) without the .py extension -TESTS_TO_RUN_LAST = ["test_twitter_api_archiver"] +TESTS_TO_RUN_LAST = ["test_generic_archiver", "test_twitter_api_archiver"] + + +# don't check for ytdlp updates in tests +@pytest.fixture(autouse=True) +def skip_check_for_update(mocker): + update_ytdlp = mocker.patch( + "auto_archiver.modules.generic_extractor.generic_extractor.GenericExtractor.update_ytdlp" + ) + update_ytdlp.return_value = False + + +@pytest.fixture +def get_lazy_module(): + def _get_lazy_module(module_name): + return ModuleFactory().get_module_lazy(module_name) + + return _get_lazy_module @pytest.fixture @@ -134,6 +151,7 @@ def unpickle(): @pytest.fixture def mock_binary_dependencies(mocker): + mocker.patch("subprocess.run").return_value = mocker.Mock(returncode=0) mock_shutil_which = mocker.patch("shutil.which") # Mock all binary dependencies as available mock_shutil_which.return_value = "/usr/bin/fake_binary" diff --git a/tests/data/test_modules/example_extractor/example_extractor.py b/tests/data/test_modules/example_extractor/example_extractor.py index 1c63383..ade26e4 100644 --- a/tests/data/test_modules/example_extractor/example_extractor.py +++ b/tests/data/test_modules/example_extractor/example_extractor.py @@ -1,6 +1,11 @@ from auto_archiver.core import Extractor +from loguru import logger + class ExampleExtractor(Extractor): def download(self, item): - print("download") + logger.info("download") + + def cleanup(self): + logger.info("cleanup") diff --git a/tests/data/test_modules/example_module/example_module.py b/tests/data/test_modules/example_module/example_module.py index 392abe0..898df96 100644 --- a/tests/data/test_modules/example_module/example_module.py +++ b/tests/data/test_modules/example_module/example_module.py @@ -1,27 +1,29 @@ from auto_archiver.core import Extractor, Enricher, Feeder, Database, Storage, Formatter, Metadata +from loguru import logger + class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter): def download(self, item): - print("download") + logger.info("download") def __iter__(self): yield Metadata().set_url("https://example.com") def done(self, result): - print("done") + logger.info("done") def enrich(self, to_enrich): - print("enrich") + logger.info("enrich") def get_cdn_url(self, media): return "nice_url" def save(self, item): - print("save") + logger.info("save") def uploadf(self, file, key, **kwargs): - print("uploadf") + logger.info("uploadf") def format(self, item): - print("format") + logger.info("format") diff --git a/tests/extractors/test_generic_extractor.py b/tests/extractors/test_generic_extractor.py index 85a0364..2fb5c35 100644 --- a/tests/extractors/test_generic_extractor.py +++ b/tests/extractors/test_generic_extractor.py @@ -36,7 +36,7 @@ class TestGenericExtractor(TestExtractorBase): package = "auto_archiver.modules.generic_extractor" assert self.extractor.dropin_for_name("bluesky", package=package) - # test loading dropings via filepath + # test loading dropins via filepath path = os.path.join(dirname(dirname(__file__)), "data/") assert self.extractor.dropin_for_name("dropin", additional_paths=[path]) @@ -121,7 +121,7 @@ class TestGenericExtractor(TestExtractorBase): == "Buy NEW Keyboard Cat Merch! https://keyboardcat.creator-spring.com\n\nxo Keyboard Cat memes make your day better!\nhttp://www.keyboardcatstore.com/\nhttps://www.facebook.com/thekeyboardcat\nhttp://www.charlieschmidt.com/" ) assert len(result.media) == 2 - assert Path(result.media[0].filename).name == "J---aiyznGQ.webm" + assert "J---aiyznGQ" in Path(result.media[0].filename).name assert Path(result.media[1].filename).name == "hqdefault.jpg" @pytest.mark.download diff --git a/tests/extractors/test_telethon_extractor.py b/tests/extractors/test_telethon_extractor.py new file mode 100644 index 0000000..0a2d5c8 --- /dev/null +++ b/tests/extractors/test_telethon_extractor.py @@ -0,0 +1,26 @@ +import os +from datetime import date + +import pytest + + +@pytest.fixture(autouse=True) +def mock_client_setup(mocker): + mocker.patch("telethon.client.auth.AuthMethods.start") + + +def test_setup_fails_clear_session_file(get_lazy_module, tmp_path, mocker): + start = mocker.patch("telethon.client.auth.AuthMethods.start") + start.side_effect = Exception("Test exception") + + # make sure the default setup file is created + session_file = tmp_path / "test.session" + + lazy_module = get_lazy_module("telethon_extractor") + + with pytest.raises(Exception): + lazy_module.load({"telethon_extractor": {"session_file": str(session_file), "api_id": 123, "api_hash": "ABC"}}) + + assert session_file.exists() + assert f"telethon-{date.today().strftime('%Y-%m-%d')}" in lazy_module._instance.session_file + assert os.path.exists(lazy_module._instance.session_file + ".session") diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 3367ce0..86e3125 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -237,3 +237,23 @@ def test_wrong_step_type(test_args, caplog): with pytest.raises(SetupError) as err: orchestrator.setup(args) assert "Module 'example_extractor' is not a feeder" in str(err.value) + + +def test_load_failed_extractor_cleanup(test_args, mocker, caplog): + orchestrator = ArchivingOrchestrator() + + # hack to set up the paths so we can patch properly + orchestrator.module_factory.setup_paths([TEST_MODULES]) + + # patch example_module.setup to throw an exception + mocker.patch( + "auto_archiver.modules.example_extractor.example_extractor.ExampleExtractor.setup", + side_effect=Exception("Test exception"), + ) + + with pytest.raises(Exception): + orchestrator.setup(test_args + ["--extractors", "example_extractor"]) + + assert "Error during setup of modules: Test exception" in caplog.text + # make sure the 'cleanup' is called + assert "cleanup" in caplog.text