diff --git a/Dockerfile b/Dockerfile index 96b8405..4e15424 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,7 @@ RUN pip install --upgrade pip && \ COPY Pipfile Pipfile.lock ./ RUN pipenv install --python=3.10 --system --deploy ENV IS_DOCKER=1 +# doing this at the end helps during development, builds are quick COPY ./src/ . # TODO: figure out how to make volumes not be root, does it depend on host or dockerfile? diff --git a/src/auto_archiver/core/config.py b/src/auto_archiver/core/config.py index 206593e..5aa725d 100644 --- a/src/auto_archiver/core/config.py +++ b/src/auto_archiver/core/config.py @@ -4,9 +4,10 @@ import argparse, yaml from dataclasses import dataclass, field from typing import List from collections import defaultdict +from loguru import logger from ..archivers import Archiver -from ..feeders import Feeder, CLIFeeder +from ..feeders import Feeder from ..databases import Database from ..formatters import Formatter from ..storages import Storage @@ -80,7 +81,6 @@ class Config: # 2. read YAML config file (or use provided value) self.yaml_config = self.read_yaml(yaml_config_filename) - # print(f"{self.yaml_config.get('configurations', {})=}") # 3. CONFIGS: decide value with priority: CLI >> config.yaml >> default self.config = defaultdict(dict) for config_path, default in self.defaults.items(): @@ -90,7 +90,6 @@ class Config: val = self.cli_ops[config_path](val, default) if val is None: val = self.yaml_config.get("configurations", {}).get(child, {}).get(config, default) - # print(child, config, val) self.config[child][config] = val self.config = dict(self.config) @@ -99,21 +98,19 @@ class Config: assert "archivers" in steps, "your configuration steps are missing the archivers property" assert "storages" in steps, "your configuration steps are missing the storages property" - # print("config.py", self.config) - self.feeder = Feeder.init(steps.get("feeder", "cli_feeder"), self.config) - self.formatter = Formatter.init(steps.get("formatter", "html_formatter"), self.config) + self.formatter = Formatter.init(steps.get("formatter", "mute_formatter"), self.config) self.enrichers = [Enricher.init(e, self.config) for e in steps.get("enrichers", [])] self.archivers = [Archiver.init(e, self.config) for e in (steps.get("archivers") or [])] self.databases = [Database.init(e, self.config) for e in steps.get("databases", [])] self.storages = [Storage.init(e, self.config) for e in steps.get("storages", [])] - print("feeder", self.feeder) - print("enrichers", [e for e in self.enrichers]) - print("archivers", [e for e in self.archivers]) - print("databases", [e for e in self.databases]) - print("storages", [e for e in self.storages]) - print("formatter", self.formatter) + logger.info(f"FEEDER: {self.feeder.name}") + logger.info(f"ENRICHERS: {[x.name for x in self.enrichers]}") + logger.info(f"ARCHIVERS: {[x.name for x in self.archivers]}") + logger.info(f"DATABASES: {[x.name for x in self.databases]}") + logger.info(f"STORAGES: {[x.name for x in self.storages]}") + logger.info(f"FORMATTER: {self.formatter.name}") def read_yaml(self, yaml_filename: str) -> dict: with open(yaml_filename, "r", encoding="utf-8") as inf: diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py index a6cfd4d..e7940a4 100644 --- a/src/auto_archiver/core/orchestrator.py +++ b/src/auto_archiver/core/orchestrator.py @@ -1,7 +1,6 @@ from __future__ import annotations from ast import List -from typing import Union, Dict -from dataclasses import dataclass +from typing import Union from ..archivers import Archiver from ..feeders import Feeder @@ -12,7 +11,7 @@ from ..databases import Database from .media import Media from .metadata import Metadata -import tempfile, time, traceback +import tempfile, traceback from loguru import logger diff --git a/src/auto_archiver/feeders/cli_feeder.py b/src/auto_archiver/feeders/cli_feeder.py index b9c0723..abc7602 100644 --- a/src/auto_archiver/feeders/cli_feeder.py +++ b/src/auto_archiver/feeders/cli_feeder.py @@ -15,7 +15,7 @@ class CLIFeeder(Feeder): # without this STEP.__init__ is not called super().__init__(config) if type(self.urls) != list or len(self.urls) == 0: - logger.info(f"CLI Feeder did not receive any URL to process") + raise Exception("CLI Feeder did not receive any URL to process") @staticmethod def configs() -> dict: diff --git a/src/auto_archiver/formatters/__init__.py b/src/auto_archiver/formatters/__init__.py index 07a52a0..ce8afac 100644 --- a/src/auto_archiver/formatters/__init__.py +++ b/src/auto_archiver/formatters/__init__.py @@ -1,2 +1,3 @@ from .formatter import Formatter -from .html_formatter import HtmlFormatter \ No newline at end of file +from .html_formatter import HtmlFormatter +from .mute_formatter import MuteFormatter \ No newline at end of file diff --git a/src/auto_archiver/formatters/formatter.py b/src/auto_archiver/formatters/formatter.py index 80d5d06..b10477e 100644 --- a/src/auto_archiver/formatters/formatter.py +++ b/src/auto_archiver/formatters/formatter.py @@ -1,8 +1,7 @@ from __future__ import annotations from dataclasses import dataclass from abc import abstractmethod -from ..core import Metadata -from ..core import Step +from ..core import Metadata, Media, Step @dataclass @@ -18,4 +17,4 @@ class Formatter(Step): return Step.init(name, config, Formatter) @abstractmethod - def format(self, item) -> Metadata: return None \ No newline at end of file + def format(self, item: Metadata) -> Media: return None \ No newline at end of file diff --git a/src/auto_archiver/formatters/html_formatter.py b/src/auto_archiver/formatters/html_formatter.py index 1aab084..72db4c0 100644 --- a/src/auto_archiver/formatters/html_formatter.py +++ b/src/auto_archiver/formatters/html_formatter.py @@ -1,6 +1,5 @@ from __future__ import annotations from dataclasses import dataclass -from abc import abstractmethod import mimetypes from jinja2 import Environment, FileSystemLoader import uuid, os, pathlib diff --git a/src/auto_archiver/formatters/mute_formatter.py b/src/auto_archiver/formatters/mute_formatter.py new file mode 100644 index 0000000..81b89b5 --- /dev/null +++ b/src/auto_archiver/formatters/mute_formatter.py @@ -0,0 +1,15 @@ +from __future__ import annotations +from dataclasses import dataclass +from ..core import Metadata, Media +from . import Formatter + + +@dataclass +class MuteFormatter(Formatter): + name = "mute_formatter" + + def __init__(self, config: dict) -> None: + # without this STEP.__init__ is not called + super().__init__(config) + + def format(self, item: Metadata) -> Media: return None \ No newline at end of file