Improved docstrings for base modules

pull/190/head
Patrick Robertson 2025-02-12 11:32:13 +00:00
rodzic 8054ea96b3
commit a0c4a82825
6 zmienionych plików z 87 dodań i 9 usunięć

Wyświetl plik

@ -1,3 +1,8 @@
"""
Database module for the auto-archiver that defines the interface for implementing database modules
in the media archiving framework.
"""
from __future__ import annotations
from abc import abstractmethod
from typing import Union
@ -5,6 +10,11 @@ from typing import Union
from auto_archiver.core import Metadata, BaseModule
class Database(BaseModule):
"""
Base class for implementing database modules in the media archiving framework.
Subclasses must implement the `fetch` and `done` methods to define platform-specific behavior.
"""
def started(self, item: Metadata) -> None:
"""signals the DB that the given item archival has started"""

Wyświetl plik

@ -1,5 +1,5 @@
"""
Enrichers are modular components that enhance archived content by adding
Base module for Enrichers modular components that enhance archived content by adding
context, metadata, or additional processing.
These add additional information to the context, such as screenshots, hashes, and metadata.
@ -13,7 +13,16 @@ from abc import abstractmethod
from auto_archiver.core import Metadata, BaseModule
class Enricher(BaseModule):
"""Base classes and utilities for enrichers in the Auto-Archiver system."""
"""Base classes and utilities for enrichers in the Auto-Archiver system.
Enricher modules must implement the `enrich` method to define their behavior.
"""
@abstractmethod
def enrich(self, to_enrich: Metadata) -> None: pass
def enrich(self, to_enrich: Metadata) -> None:
"""
Enriches a Metadata object with additional information or context.
Takes the metadata object to enrich as an argument and modifies it in place, returning None.
"""
pass

Wyświetl plik

@ -29,14 +29,24 @@ class Extractor(BaseModule):
valid_url: re.Pattern = None
def cleanup(self) -> None:
# called when extractors are done, or upon errors, cleanup any resources
"""
Called when extractors are done, or upon errors, cleanup any resources
"""
pass
def sanitize_url(self, url: str) -> str:
# used to clean unnecessary URL parameters OR unfurl redirect links
"""
Used to clean unnecessary URL parameters OR unfurl redirect links
"""
return url
def match_link(self, url: str) -> re.Match:
"""
Returns a match object if the given URL matches the valid_url pattern or False/None if not.
Normally used in the `suitable` method to check if the URL is supported by this extractor.
"""
return self.valid_url.match(url)
def suitable(self, url: str) -> bool:

Wyświetl plik

@ -1,3 +1,7 @@
"""
The feeder base module defines the interface for implementing feeders in the media archiving framework.
"""
from __future__ import annotations
from abc import abstractmethod
from auto_archiver.core import Metadata
@ -5,5 +9,17 @@ from auto_archiver.core import BaseModule
class Feeder(BaseModule):
"""
Base class for implementing feeders in the media archiving framework.
Subclasses must implement the `__iter__` method to define platform-specific behavior.
"""
@abstractmethod
def __iter__(self) -> Metadata: return None
def __iter__(self) -> Metadata:
"""
Returns an iterator (use `yield`) over the items to be archived.
These should be instances of Metadata, typically created with Metadata().set_url(url).
"""
return None

Wyświetl plik

@ -1,9 +1,24 @@
"""
Base module for formatters modular components that format metadata into media objects for storage.
The most commonly used formatter is the HTML formatter, which takes metadata and formats it into an HTML file for storage.
"""
from __future__ import annotations
from abc import abstractmethod
from auto_archiver.core import Metadata, Media, BaseModule
class Formatter(BaseModule):
"""
Base class for implementing formatters in the media archiving framework.
Subclasses must implement the `format` method to define their behavior.
"""
@abstractmethod
def format(self, item: Metadata) -> Media: return None
def format(self, item: Metadata) -> Media:
"""
Formats a Metadata object into a user-viewable format (e.g. HTML) and stores it if needed.
"""
return None

Wyświetl plik

@ -1,3 +1,7 @@
"""
Base module for Storage modules modular components that store media objects in various locations.
"""
from __future__ import annotations
from abc import abstractmethod
from typing import IO
@ -12,6 +16,12 @@ from auto_archiver.core import Media, BaseModule, Metadata
from auto_archiver.modules.hash_enricher.hash_enricher import HashEnricher
from auto_archiver.core.module import get_module
class Storage(BaseModule):
"""
Base class for implementing storage modules in the media archiving framework.
Subclasses must implement the `get_cdn_url` and `uploadf` methods to define their behavior.
"""
def store(self, media: Media, url: str, metadata: Metadata=None) -> None:
if media.is_stored(in_storage=self):
@ -22,10 +32,18 @@ class Storage(BaseModule):
media.add_url(self.get_cdn_url(media))
@abstractmethod
def get_cdn_url(self, media: Media) -> str: pass
def get_cdn_url(self, media: Media) -> str:
"""
Returns the URL of the media object stored in the CDN.
"""
pass
@abstractmethod
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool:
"""
Uploads (or saves) a file to the storage service/location.
"""
pass
def upload(self, media: Media, **kwargs) -> bool:
logger.debug(f'[{self.__class__.__name__}] storing file {media.filename} with key {media.key}')