From 1039e9631f0c13cca0f23bac4710b7bc6a7be1b7 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Wed, 11 Jun 2025 11:22:23 +0100 Subject: [PATCH] new reddit tests with .env.test --- .gitignore | 1 + docs/source/development/testing.md | 15 +++++++++++++-- .../dropins/reddit.py | 2 +- tests/.env.test.example | 6 ++++++ tests/conftest.py | 19 +++++++++++++++++++ .../test_antibot_extractor_enricher.py | 19 +++++++++++++++++-- 6 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 tests/.env.test.example diff --git a/.gitignore b/.gitignore index 35eee83..2c579fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ tmp*/ temp/ .env* +!.env*.example .DS_Store expmt/ service_account.json diff --git a/docs/source/development/testing.md b/docs/source/development/testing.md index 5de9574..290592c 100644 --- a/docs/source/development/testing.md +++ b/docs/source/development/testing.md @@ -3,14 +3,14 @@ `pytest` is used for testing. There are two main types of tests: 1. 'core' tests which should be run on every change -2. 'download' tests which hit the network. These tests will do things like make API calls (e.g. Twitter, Bluesky etc.) and should be run regularly to make sure that APIs have not changed. +2. 'download' tests which hit the network. These tests will do things like make API calls (e.g. Twitter, Bluesky etc.) and should be run regularly to make sure that APIs have not changed, they take longer. ## Running Tests 1. Make sure you've installed the dev dependencies with `pytest install --with dev` 2. Tests can be run as follows: -``` +```{code} bash #### Command prefix of 'poetry run' removed here for simplicity # run core tests pytest -ra -v -m "not download" @@ -18,4 +18,15 @@ pytest -ra -v -m "not download" pytest -ra -v -m "download" # run all tests pytest -ra -v + + +# run a specific test file +pytest -ra -v tests/test_file.py +# run a specific test function +pytest -ra -v tests/test_file.py::test_function_name +``` + +3. Some tests require environment variables to be set. You can use the example `.env.test.example` file as a template. Copy it to `.env.test` and fill in the required values. This file will be loaded automatically by `pytest`. +```{code} bash +cp .env.test.example .env.test ``` \ No newline at end of file diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py index 44d572b..78bc510 100644 --- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py +++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py @@ -36,7 +36,7 @@ class RedditDropin(Dropin): self._close_cookies_banner() username, password = self._get_username_password("reddit.com") - logger.debug("RedditDropin Logging in to VK with username: {}", username) + logger.debug("RedditDropin Logging in to Reddit with username: {}", username) self.sb.type("#login-username", username) self.sb.type("#login-password", password) diff --git a/tests/.env.test.example b/tests/.env.test.example new file mode 100644 index 0000000..2e058ab --- /dev/null +++ b/tests/.env.test.example @@ -0,0 +1,6 @@ +# reddit test credentials +REDDIT_TEST_USERNAME="" +REDDIT_TEST_PASSWORD="" + +# twitter test credentials +TWITTER_BEARER_TOKEN="TEST_KEY" \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index ba7b48d..a54f01d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ from tempfile import TemporaryDirectory from typing import Dict, Tuple import hashlib +from loguru import logger import pytest from auto_archiver.core.metadata import Metadata, Media from auto_archiver.core.module import ModuleFactory @@ -20,6 +21,24 @@ from auto_archiver.core.module import ModuleFactory TESTS_TO_RUN_LAST = ["test_generic_archiver", "test_twitter_api_archiver"] +def pytest_configure(): + # load environment variables from .env.test file. + env_path = os.path.join(os.path.dirname(__file__), ".env.test") + if os.path.exists(env_path): + with open(env_path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + if "=" in line: + key, value = line.split("=", 1) + os.environ[key.strip()] = value.strip().lstrip('"').rstrip('"') + else: + logger.warning( + f"Environment file {env_path} not found. Skipping loading environment variables, some tests may fail." + ) + + # don't check for ytdlp updates in tests @pytest.fixture(autouse=True) def skip_check_for_update(mocker): diff --git a/tests/extractors/test_antibot_extractor_enricher.py b/tests/extractors/test_antibot_extractor_enricher.py index 06107b4..c0044b9 100644 --- a/tests/extractors/test_antibot_extractor_enricher.py +++ b/tests/extractors/test_antibot_extractor_enricher.py @@ -1,3 +1,4 @@ +import os import pytest from auto_archiver.modules.antibot_extractor_enricher.antibot_extractor_enricher import AntibotExtractorEnricher @@ -34,7 +35,14 @@ class TestAntibotExtractorEnricher(TestExtractorBase): "save_to_pdf": False, "max_download_images": 0, "max_download_videos": 0, + "user_data_dir": "./tests/tmp/user_data", "proxy": None, + "authentication": { + "reddit.com": { + "username": os.environ.get("REDDIT_TEST_USERNAME"), + "password": os.environ.get("REDDIT_TEST_PASSWORD"), + } + }, } @pytest.mark.download @@ -76,16 +84,23 @@ class TestAntibotExtractorEnricher(TestExtractorBase): 5, 0, ), + ( + "https://www.reddit.com/r/BeAmazed/comments/1l6b1n4/duy_tran_is_the_owner_and_prime_wood_work_artist/", + "Duy tran is the owner and prime wood work artist", + " Created Jan 26, 2015", + 4, + 0, + ), ], ) def test_download_pages_with_media(self, setup_module, make_item, url, in_title, in_text, image_count, video_count): """ Test downloading pages with media. """ - self.extractor = setup_module( self.extractor_module, - { + self.config + | { "save_to_pdf": True, "max_download_images": 5, "max_download_videos": "inf",