diff --git a/src/archivers/telethon_archiverv2.py b/src/archivers/telethon_archiverv2.py index 6851cb5..819070a 100644 --- a/src/archivers/telethon_archiverv2.py +++ b/src/archivers/telethon_archiverv2.py @@ -136,7 +136,7 @@ class TelethonArchiver(Archiverv2): for i, om_url in enumerate(other_media_urls): filename = os.path.join(tmp_dir, f'{chat}_{group_id}_{i}') self.download_from_url(om_url, filename) - result.add_media(Media(filename=filename, id=f"{group_id}_{i}")) + result.add_media(Media(filename=filename), id=f"{group_id}_{i}") filename_dest = os.path.join(tmp_dir, f'{chat}_{group_id}', str(mp.id)) filename = self.client.download_media(mp.media, filename_dest) diff --git a/src/configs/v2config.py b/src/configs/v2config.py index 5b47d0f..dec3565 100644 --- a/src/configs/v2config.py +++ b/src/configs/v2config.py @@ -57,7 +57,7 @@ class ConfigV2: assert "." not in child.name, f"class prop name cannot contain dots('.'): {child.name}" assert "." not in config, f"config property cannot contain dots('.'): {config}" config_path = f"{child.name}.{config}" - parser.add_argument(f'--{config_path}', action='store', dest=config_path, help=f"{details['help']} (defaults to {details['default']})") + parser.add_argument(f'--{config_path}', action='store', dest=config_path, help=f"{details['help']} (defaults to {details['default']})", choices=details.get("choices", None)) self.defaults[config_path] = details["default"] if "cli_set" in details: self.cli_ops[config_path] = details["cli_set"] diff --git a/src/enrichers/__init__.py b/src/enrichers/__init__.py index 2a871d1..95b3fad 100644 --- a/src/enrichers/__init__.py +++ b/src/enrichers/__init__.py @@ -1,3 +1,4 @@ from .enricher import Enricher from .screenshot_enricher import ScreenshotEnricher -from .wayback_enricher import WaybackEnricher \ No newline at end of file +from .wayback_enricher import WaybackEnricher +from .hash_enricher import HashEnricher \ No newline at end of file diff --git a/src/enrichers/hash_enricher.py b/src/enrichers/hash_enricher.py new file mode 100644 index 0000000..786c861 --- /dev/null +++ b/src/enrichers/hash_enricher.py @@ -0,0 +1,41 @@ +import hashlib +from utils import Webdriver +from . import Enricher +from metadata import Metadata +from loguru import logger +from selenium.common.exceptions import TimeoutException +import time, requests + + +class HashEnricher(Enricher): + """ + Calculates hashes for Media instances + """ + name = "hash_enricher" + + def __init__(self, config: dict) -> None: + # without this STEP.__init__ is not called + super().__init__(config) + algo_choices = self.configs()["algorithm"]["choices"] + assert self.algorithm in algo_choices, f"Invalid hash algorithm selected, must be one of {algo_choices} (you selected {self.algorithm})." + + @staticmethod + def configs() -> dict: + return { + "algorithm": {"default": "SHA-256", "help": "hash algorithm to use", "choices": ["SHA-256", "SHA3-512"]} + } + + def enrich(self, to_enrich: Metadata) -> None: + url = to_enrich.get_url() + logger.debug(f"calculating media hashes for {url=} (using {self.algorithm})") + + for i, m in enumerate(to_enrich.media): + with open(m.filename, "rb") as f: + bytes = f.read() # read entire file as bytes + hash = None + if self.algorithm == "SHA-256": + hash = hashlib.sha256(bytes) + elif self.algorithm == "SHA3-512": + hash = hashlib.sha3_512(bytes) + else: continue + to_enrich.media[i].set("hash", f"{self.algorithm}:{hash.hexdigest()}") diff --git a/src/enrichers/screenshot_enricher.py b/src/enrichers/screenshot_enricher.py index b008e52..0375e3b 100644 --- a/src/enrichers/screenshot_enricher.py +++ b/src/enrichers/screenshot_enricher.py @@ -27,7 +27,7 @@ class ScreenshotEnricher(Enricher): time.sleep(2) screenshot_file = os.path.join(to_enrich.get_tmp_dir(), f"screenshot_{str(uuid.uuid4())[0:8]}.png") driver.save_screenshot(screenshot_file) - to_enrich.add_media(Media(filename=screenshot_file, id="screenshot")) + to_enrich.add_media(Media(filename=screenshot_file), id="screenshot") except TimeoutException: logger.info("TimeoutException loading page for screenshot") except Exception as e: diff --git a/src/formatters/templates/html_template.html b/src/formatters/templates/html_template.html index f488a5f..e757cae 100644 --- a/src/formatters/templates/html_template.html +++ b/src/formatters/templates/html_template.html @@ -39,12 +39,29 @@ .center { text-align: center; } + + .copy:hover { + font-weight: 600; + cursor: copy; + } + + #notification { + position: fixed; + right: 20px; + top: 20px; + background: aquamarine; + box-shadow: 6px 8px 5px 0px #000000; + padding: 10px; + font-size: large; + display: none; + }
+title: '{{ title }}'
+title: '{{ title }}'
|
{% for url in m.urls %}
+ {% if 'http' in url %}
{% if 'image' in m.mimetype %}
- |
|
{{ key }} | -{{ metadata[key] | urlize }} | ++ {{ metadata[key] | urlize }} + |
Made with bellingcat/auto-archiver
+