kopia lustrzana https://github.com/bellingcat/auto-archiver
mute formatter and docker
rodzic
c261361ac8
commit
f5b7c3a5ea
|
@ -22,6 +22,7 @@ RUN pip install --upgrade pip && \
|
|||
COPY Pipfile Pipfile.lock ./
|
||||
RUN pipenv install --python=3.10 --system --deploy
|
||||
ENV IS_DOCKER=1
|
||||
# doing this at the end helps during development, builds are quick
|
||||
COPY ./src/ .
|
||||
|
||||
# TODO: figure out how to make volumes not be root, does it depend on host or dockerfile?
|
||||
|
|
|
@ -4,9 +4,10 @@ import argparse, yaml
|
|||
from dataclasses import dataclass, field
|
||||
from typing import List
|
||||
from collections import defaultdict
|
||||
from loguru import logger
|
||||
|
||||
from ..archivers import Archiver
|
||||
from ..feeders import Feeder, CLIFeeder
|
||||
from ..feeders import Feeder
|
||||
from ..databases import Database
|
||||
from ..formatters import Formatter
|
||||
from ..storages import Storage
|
||||
|
@ -80,7 +81,6 @@ class Config:
|
|||
# 2. read YAML config file (or use provided value)
|
||||
self.yaml_config = self.read_yaml(yaml_config_filename)
|
||||
|
||||
# print(f"{self.yaml_config.get('configurations', {})=}")
|
||||
# 3. CONFIGS: decide value with priority: CLI >> config.yaml >> default
|
||||
self.config = defaultdict(dict)
|
||||
for config_path, default in self.defaults.items():
|
||||
|
@ -90,7 +90,6 @@ class Config:
|
|||
val = self.cli_ops[config_path](val, default)
|
||||
if val is None:
|
||||
val = self.yaml_config.get("configurations", {}).get(child, {}).get(config, default)
|
||||
# print(child, config, val)
|
||||
self.config[child][config] = val
|
||||
self.config = dict(self.config)
|
||||
|
||||
|
@ -99,21 +98,19 @@ class Config:
|
|||
assert "archivers" in steps, "your configuration steps are missing the archivers property"
|
||||
assert "storages" in steps, "your configuration steps are missing the storages property"
|
||||
|
||||
# print("config.py", self.config)
|
||||
|
||||
self.feeder = Feeder.init(steps.get("feeder", "cli_feeder"), self.config)
|
||||
self.formatter = Formatter.init(steps.get("formatter", "html_formatter"), self.config)
|
||||
self.formatter = Formatter.init(steps.get("formatter", "mute_formatter"), self.config)
|
||||
self.enrichers = [Enricher.init(e, self.config) for e in steps.get("enrichers", [])]
|
||||
self.archivers = [Archiver.init(e, self.config) for e in (steps.get("archivers") or [])]
|
||||
self.databases = [Database.init(e, self.config) for e in steps.get("databases", [])]
|
||||
self.storages = [Storage.init(e, self.config) for e in steps.get("storages", [])]
|
||||
|
||||
print("feeder", self.feeder)
|
||||
print("enrichers", [e for e in self.enrichers])
|
||||
print("archivers", [e for e in self.archivers])
|
||||
print("databases", [e for e in self.databases])
|
||||
print("storages", [e for e in self.storages])
|
||||
print("formatter", self.formatter)
|
||||
logger.info(f"FEEDER: {self.feeder.name}")
|
||||
logger.info(f"ENRICHERS: {[x.name for x in self.enrichers]}")
|
||||
logger.info(f"ARCHIVERS: {[x.name for x in self.archivers]}")
|
||||
logger.info(f"DATABASES: {[x.name for x in self.databases]}")
|
||||
logger.info(f"STORAGES: {[x.name for x in self.storages]}")
|
||||
logger.info(f"FORMATTER: {self.formatter.name}")
|
||||
|
||||
def read_yaml(self, yaml_filename: str) -> dict:
|
||||
with open(yaml_filename, "r", encoding="utf-8") as inf:
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from __future__ import annotations
|
||||
from ast import List
|
||||
from typing import Union, Dict
|
||||
from dataclasses import dataclass
|
||||
from typing import Union
|
||||
|
||||
from ..archivers import Archiver
|
||||
from ..feeders import Feeder
|
||||
|
@ -12,7 +11,7 @@ from ..databases import Database
|
|||
from .media import Media
|
||||
from .metadata import Metadata
|
||||
|
||||
import tempfile, time, traceback
|
||||
import tempfile, traceback
|
||||
from loguru import logger
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ class CLIFeeder(Feeder):
|
|||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
if type(self.urls) != list or len(self.urls) == 0:
|
||||
logger.info(f"CLI Feeder did not receive any URL to process")
|
||||
raise Exception("CLI Feeder did not receive any URL to process")
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict:
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
from .formatter import Formatter
|
||||
from .html_formatter import HtmlFormatter
|
||||
from .html_formatter import HtmlFormatter
|
||||
from .mute_formatter import MuteFormatter
|
|
@ -1,8 +1,7 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from abc import abstractmethod
|
||||
from ..core import Metadata
|
||||
from ..core import Step
|
||||
from ..core import Metadata, Media, Step
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -18,4 +17,4 @@ class Formatter(Step):
|
|||
return Step.init(name, config, Formatter)
|
||||
|
||||
@abstractmethod
|
||||
def format(self, item) -> Metadata: return None
|
||||
def format(self, item: Metadata) -> Media: return None
|
|
@ -1,6 +1,5 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from abc import abstractmethod
|
||||
import mimetypes
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
import uuid, os, pathlib
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from ..core import Metadata, Media
|
||||
from . import Formatter
|
||||
|
||||
|
||||
@dataclass
|
||||
class MuteFormatter(Formatter):
|
||||
name = "mute_formatter"
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
|
||||
def format(self, item: Metadata) -> Media: return None
|
Ładowanie…
Reference in New Issue