new reddit tests with .env.test

2025-06-11 11:22:23 +01:00 · 2025-06-11 11:22:23 +01:00 · 1039e9631f
commit 1039e9631f
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 tmp*/
 temp/
 .env*
+!.env*.example
 .DS_Store
 expmt/
 service_account.json
--- a/docs/source/development/testing.md
+++ b/docs/source/development/testing.md
@ -3,14 +3,14 @@
 `pytest` is used for testing. There are two main types of tests:

 1. 'core' tests which should be run on every change
-2. 'download' tests which hit the network. These tests will do things like make API calls (e.g. Twitter, Bluesky etc.) and should be run regularly to make sure that APIs have not changed.
+2. 'download' tests which hit the network. These tests will do things like make API calls (e.g. Twitter, Bluesky etc.) and should be run regularly to make sure that APIs have not changed, they take longer.


 ## Running Tests 

 1. Make sure you've installed the dev dependencies with `pytest install --with dev`
 2. Tests can be run as follows:
-```
+```{code} bash
 #### Command prefix of 'poetry run' removed here for simplicity
 # run core tests
 pytest -ra -v -m "not download"
@ -18,4 +18,15 @@ pytest -ra -v -m "not download"
 pytest -ra -v -m "download"
 # run all tests
 pytest -ra -v
+
+
+# run a specific test file
+pytest -ra -v tests/test_file.py
+# run a specific test function
+pytest -ra -v tests/test_file.py::test_function_name
+```
+
+3. Some tests require environment variables to be set. You can use the example `.env.test.example` file as a template. Copy it to `.env.test` and fill in the required values. This file will be loaded automatically by `pytest`.
+```{code} bash
+cp .env.test.example .env.test
 ```
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py
@ -36,7 +36,7 @@ class RedditDropin(Dropin):
        self._close_cookies_banner()

        username, password = self._get_username_password("reddit.com")
-        logger.debug("RedditDropin Logging in to VK with username: {}", username)
+        logger.debug("RedditDropin Logging in to Reddit with username: {}", username)

        self.sb.type("#login-username", username)
        self.sb.type("#login-password", password)
--- a/tests/.env.test.example
+++ b/tests/.env.test.example
@ -0,0 +1,6 @@
+# reddit test credentials
+REDDIT_TEST_USERNAME=""
+REDDIT_TEST_PASSWORD=""
+
+# twitter test credentials
+TWITTER_BEARER_TOKEN="TEST_KEY"
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -9,6 +9,7 @@ from tempfile import TemporaryDirectory
 from typing import Dict, Tuple
 import hashlib

+from loguru import logger
 import pytest
 from auto_archiver.core.metadata import Metadata, Media
 from auto_archiver.core.module import ModuleFactory
@ -20,6 +21,24 @@ from auto_archiver.core.module import ModuleFactory
 TESTS_TO_RUN_LAST = ["test_generic_archiver", "test_twitter_api_archiver"]


+def pytest_configure():
+    # load environment variables from .env.test file.
+    env_path = os.path.join(os.path.dirname(__file__), ".env.test")
+    if os.path.exists(env_path):
+        with open(env_path) as f:
+            for line in f:
+                line = line.strip()
+                if not line or line.startswith("#"):
+                    continue
+                if "=" in line:
+                    key, value = line.split("=", 1)
+                    os.environ[key.strip()] = value.strip().lstrip('"').rstrip('"')
+    else:
+        logger.warning(
+            f"Environment file {env_path} not found. Skipping loading environment variables, some tests may fail."
+        )
+
+
 # don't check for ytdlp updates in tests
@pytest.fixture(autouse=True)
 def skip_check_for_update(mocker):
--- a/tests/extractors/test_antibot_extractor_enricher.py
+++ b/tests/extractors/test_antibot_extractor_enricher.py
@ -1,3 +1,4 @@
+import os
 import pytest

 from auto_archiver.modules.antibot_extractor_enricher.antibot_extractor_enricher import AntibotExtractorEnricher
@ -34,7 +35,14 @@ class TestAntibotExtractorEnricher(TestExtractorBase):
        "save_to_pdf": False,
        "max_download_images": 0,
        "max_download_videos": 0,
+        "user_data_dir": "./tests/tmp/user_data",
        "proxy": None,
+        "authentication": {
+            "reddit.com": {
+                "username": os.environ.get("REDDIT_TEST_USERNAME"),
+                "password": os.environ.get("REDDIT_TEST_PASSWORD"),
+            }
+        },
    }

    @pytest.mark.download
@ -76,16 +84,23 @@ class TestAntibotExtractorEnricher(TestExtractorBase):
                5,
                0,
            ),
+            (
+                "https://www.reddit.com/r/BeAmazed/comments/1l6b1n4/duy_tran_is_the_owner_and_prime_wood_work_artist/",
+                "Duy tran is the owner and prime wood work artist",
+                " Created Jan 26, 2015",
+                4,
+                0,
+            ),
        ],
    )
    def test_download_pages_with_media(self, setup_module, make_item, url, in_title, in_text, image_count, video_count):
        """
        Test downloading pages with media.
        """
-
        self.extractor = setup_module(
            self.extractor_module,
-            {
+            self.config
+            | {
                "save_to_pdf": True,
                "max_download_images": 5,
                "max_download_videos": "inf",