Merge pull request #234 from bellingcat/update_suggestions

Auto Updates
pull/217/head^2
Patrick Robertson 2025-03-07 15:12:03 +00:00 zatwierdzone przez GitHub
commit 027985024b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
4 zmienionych plików z 75 dodań i 2 usunięć

Wyświetl plik

@ -15,6 +15,7 @@ from copy import copy
from rich_argparse import RichHelpFormatter
from loguru import logger
import requests
from .metadata import Metadata, Media
from auto_archiver.version import __version__
@ -348,7 +349,23 @@ class ArchivingOrchestrator:
yaml_config = self.load_config(basic_config.config_file)
return self.setup_complete_parser(basic_config, yaml_config, unused_args)
def check_for_updates(self):
response = requests.get("https://pypi.org/pypi/auto-archiver/json").json()
latest_version = response['info']['version']
# check version compared to current version
if latest_version != __version__:
if os.environ.get('RUNNING_IN_DOCKER'):
update_cmd = "`docker pull bellingcat/auto-archiver:latest`"
else:
update_cmd = "`pip install --upgrade auto-archiver`"
logger.warning("")
logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********")
logger.warning(f"A new version of auto-archiver is available (v{latest_version}, you have {__version__})")
logger.warning(f"Make sure to update to the latest version using: {update_cmd}")
logger.warning("")
def setup(self, args: list):
"""
Function to configure all setup of the orchestrator: setup configs and load modules.
@ -356,6 +373,8 @@ class ArchivingOrchestrator:
This method should only ever be called once
"""
self.check_for_updates()
if self.setup_finished:
logger.warning("The `setup_config()` function should only ever be run once. \
If you need to re-run the setup, please re-instantiate a new instance of the orchestrator. \

Wyświetl plik

@ -10,7 +10,7 @@ class ConsoleDb(Database):
"""
def started(self, item: Metadata) -> None:
logger.warning(f"STARTED {item}")
logger.info(f"STARTED {item}")
def failed(self, item: Metadata, reason:str) -> None:
logger.error(f"FAILED {item}: {reason}")

Wyświetl plik

@ -28,6 +28,13 @@ the broader archiving framework.
metadata objects. Some dropins are included in this generic_archiver by default, but
custom dropins can be created to handle additional websites and passed to the archiver
via the command line using the `--dropins` option (TODO!).
### Auto-Updates
The Generic Extractor will also automatically check for updates to `yt-dlp` (every 5 days by default).
This can be configured using the `ytdlp_update_interval` setting (or disabled by setting it to -1).
If you are having issues with the extractor, you can review the version of `yt-dlp` being used with `yt-dlp --version`.
""",
"configs": {
"subtitles": {"default": True, "help": "download subtitles if available", "type": "bool"},
@ -64,5 +71,10 @@ via the command line using the `--dropins` option (TODO!).
"default": "inf",
"help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.",
},
"ytdlp_update_interval": {
"default": 5,
"help": "How often to check for yt-dlp updates (days). If positive, will check and update yt-dlp every [num] days. Set it to -1 to disable, or 0 to always update on every run.",
"type": "int",
},
},
}

Wyświetl plik

@ -1,7 +1,11 @@
import datetime, os, yt_dlp, pysubs2
import datetime, os
import importlib
import subprocess
from typing import Generator, Type
import yt_dlp
from yt_dlp.extractor.common import InfoExtractor
import pysubs2
from loguru import logger
@ -11,6 +15,44 @@ from auto_archiver.core import Metadata, Media
class GenericExtractor(Extractor):
_dropins = {}
def setup(self):
# check for file .ytdlp-update in the secrets folder
if self.ytdlp_update_interval < 0:
return
use_secrets = os.path.exists('secrets')
path = os.path.join('secrets' if use_secrets else '', '.ytdlp-update')
next_update_check = None
if os.path.exists(path):
with open(path, "r") as f:
next_update_check = datetime.datetime.fromisoformat(f.read())
if not next_update_check or next_update_check < datetime.datetime.now():
self.update_ytdlp()
next_update_check = datetime.datetime.now() + datetime.timedelta(days=self.ytdlp_update_interval)
with open(path, "w") as f:
f.write(next_update_check.isoformat())
def update_ytdlp(self):
logger.info("Checking and updating yt-dlp...")
logger.info(f"Tip: change the 'ytdlp_update_interval' setting to control how often yt-dlp is updated. Set to -1 to disable or 0 to enable on every run. Current setting: {self.ytdlp_update_interval}")
from importlib.metadata import version as get_version
old_version = get_version("yt-dlp")
try:
# try and update with pip (this works inside poetry environment and in a normal virtualenv)
result = subprocess.run(["pip", "install", "--upgrade", "yt-dlp"], check=True, capture_output=True)
if "Successfully installed yt-dlp" in result.stdout.decode():
new_version = importlib.metadata.version("yt-dlp")
logger.info(f"yt-dlp successfully (from {old_version} to {new_version})")
importlib.reload(yt_dlp)
else:
logger.info("yt-dlp already up to date")
except Exception as e:
logger.error(f"Error updating yt-dlp: {e}")
def suitable_extractors(self, url: str) -> Generator[str, None, None]:
"""
Returns a list of valid extractors for the given URL"""