kopia lustrzana https://github.com/bellingcat/auto-archiver
Improved docstrings for base modules
rodzic
8054ea96b3
commit
a0c4a82825
|
@ -1,3 +1,8 @@
|
|||
"""
|
||||
Database module for the auto-archiver that defines the interface for implementing database modules
|
||||
in the media archiving framework.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from abc import abstractmethod
|
||||
from typing import Union
|
||||
|
@ -5,6 +10,11 @@ from typing import Union
|
|||
from auto_archiver.core import Metadata, BaseModule
|
||||
|
||||
class Database(BaseModule):
|
||||
"""
|
||||
Base class for implementing database modules in the media archiving framework.
|
||||
|
||||
Subclasses must implement the `fetch` and `done` methods to define platform-specific behavior.
|
||||
"""
|
||||
|
||||
def started(self, item: Metadata) -> None:
|
||||
"""signals the DB that the given item archival has started"""
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
Enrichers are modular components that enhance archived content by adding
|
||||
Base module for Enrichers – modular components that enhance archived content by adding
|
||||
context, metadata, or additional processing.
|
||||
|
||||
These add additional information to the context, such as screenshots, hashes, and metadata.
|
||||
|
@ -13,7 +13,16 @@ from abc import abstractmethod
|
|||
from auto_archiver.core import Metadata, BaseModule
|
||||
|
||||
class Enricher(BaseModule):
|
||||
"""Base classes and utilities for enrichers in the Auto-Archiver system."""
|
||||
"""Base classes and utilities for enrichers in the Auto-Archiver system.
|
||||
|
||||
Enricher modules must implement the `enrich` method to define their behavior.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def enrich(self, to_enrich: Metadata) -> None: pass
|
||||
def enrich(self, to_enrich: Metadata) -> None:
|
||||
"""
|
||||
Enriches a Metadata object with additional information or context.
|
||||
|
||||
Takes the metadata object to enrich as an argument and modifies it in place, returning None.
|
||||
"""
|
||||
pass
|
||||
|
|
|
@ -29,14 +29,24 @@ class Extractor(BaseModule):
|
|||
valid_url: re.Pattern = None
|
||||
|
||||
def cleanup(self) -> None:
|
||||
# called when extractors are done, or upon errors, cleanup any resources
|
||||
"""
|
||||
Called when extractors are done, or upon errors, cleanup any resources
|
||||
"""
|
||||
pass
|
||||
|
||||
def sanitize_url(self, url: str) -> str:
|
||||
# used to clean unnecessary URL parameters OR unfurl redirect links
|
||||
"""
|
||||
Used to clean unnecessary URL parameters OR unfurl redirect links
|
||||
"""
|
||||
return url
|
||||
|
||||
def match_link(self, url: str) -> re.Match:
|
||||
"""
|
||||
Returns a match object if the given URL matches the valid_url pattern or False/None if not.
|
||||
|
||||
Normally used in the `suitable` method to check if the URL is supported by this extractor.
|
||||
|
||||
"""
|
||||
return self.valid_url.match(url)
|
||||
|
||||
def suitable(self, url: str) -> bool:
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
"""
|
||||
The feeder base module defines the interface for implementing feeders in the media archiving framework.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from abc import abstractmethod
|
||||
from auto_archiver.core import Metadata
|
||||
|
@ -5,5 +9,17 @@ from auto_archiver.core import BaseModule
|
|||
|
||||
class Feeder(BaseModule):
|
||||
|
||||
"""
|
||||
Base class for implementing feeders in the media archiving framework.
|
||||
|
||||
Subclasses must implement the `__iter__` method to define platform-specific behavior.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __iter__(self) -> Metadata: return None
|
||||
def __iter__(self) -> Metadata:
|
||||
"""
|
||||
Returns an iterator (use `yield`) over the items to be archived.
|
||||
|
||||
These should be instances of Metadata, typically created with Metadata().set_url(url).
|
||||
"""
|
||||
return None
|
|
@ -1,9 +1,24 @@
|
|||
"""
|
||||
Base module for formatters – modular components that format metadata into media objects for storage.
|
||||
|
||||
The most commonly used formatter is the HTML formatter, which takes metadata and formats it into an HTML file for storage.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from abc import abstractmethod
|
||||
from auto_archiver.core import Metadata, Media, BaseModule
|
||||
|
||||
|
||||
class Formatter(BaseModule):
|
||||
"""
|
||||
Base class for implementing formatters in the media archiving framework.
|
||||
|
||||
Subclasses must implement the `format` method to define their behavior.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def format(self, item: Metadata) -> Media: return None
|
||||
def format(self, item: Metadata) -> Media:
|
||||
"""
|
||||
Formats a Metadata object into a user-viewable format (e.g. HTML) and stores it if needed.
|
||||
"""
|
||||
return None
|
|
@ -1,3 +1,7 @@
|
|||
"""
|
||||
Base module for Storage modules – modular components that store media objects in various locations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from abc import abstractmethod
|
||||
from typing import IO
|
||||
|
@ -12,6 +16,12 @@ from auto_archiver.core import Media, BaseModule, Metadata
|
|||
from auto_archiver.modules.hash_enricher.hash_enricher import HashEnricher
|
||||
from auto_archiver.core.module import get_module
|
||||
class Storage(BaseModule):
|
||||
|
||||
"""
|
||||
Base class for implementing storage modules in the media archiving framework.
|
||||
|
||||
Subclasses must implement the `get_cdn_url` and `uploadf` methods to define their behavior.
|
||||
"""
|
||||
|
||||
def store(self, media: Media, url: str, metadata: Metadata=None) -> None:
|
||||
if media.is_stored(in_storage=self):
|
||||
|
@ -22,10 +32,18 @@ class Storage(BaseModule):
|
|||
media.add_url(self.get_cdn_url(media))
|
||||
|
||||
@abstractmethod
|
||||
def get_cdn_url(self, media: Media) -> str: pass
|
||||
def get_cdn_url(self, media: Media) -> str:
|
||||
"""
|
||||
Returns the URL of the media object stored in the CDN.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass
|
||||
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool:
|
||||
"""
|
||||
Uploads (or saves) a file to the storage service/location.
|
||||
"""
|
||||
pass
|
||||
|
||||
def upload(self, media: Media, **kwargs) -> bool:
|
||||
logger.debug(f'[{self.__class__.__name__}] storing file {media.filename} with key {media.key}')
|
||||
|
|
Ładowanie…
Reference in New Issue