diff --git a/poetry.lock b/poetry.lock index f59d5c9..17365ac 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1361,6 +1361,22 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "opentimestamps" +version = "0.4.5" +description = "Create and verify OpenTimestamps proofs" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "opentimestamps-0.4.5-py3-none-any.whl", hash = "sha256:a4912b3bd1b612a3ef5fac925b9137889e6c5cb91cc9e76c8202a2bf8abe26b5"}, + {file = "opentimestamps-0.4.5.tar.gz", hash = "sha256:56726ccde97fb67f336a7f237ce36808e5593c3089d68d900b1c83d0ebf9dcfa"}, +] + +[package.dependencies] +pycryptodomex = ">=3.3.1" +python-bitcoinlib = ">=0.9.0,<0.13.0" + [[package]] name = "oscrypto" version = "1.3.0" @@ -1834,6 +1850,18 @@ pytest = ">=6.2.5" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] +[[package]] +name = "python-bitcoinlib" +version = "0.12.2" +description = "The Swiss Army Knife of the Bitcoin protocol." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "python-bitcoinlib-0.12.2.tar.gz", hash = "sha256:c65ab61427c77c38d397bfc431f71d86fd355b453a536496ec3fcb41bd10087d"}, + {file = "python_bitcoinlib-0.12.2-py3-none-any.whl", hash = "sha256:2f29a9f475f21c12169b3a6cc8820f34f11362d7ff1200a5703dce3e4e903a44"}, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3185,4 +3213,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "2d0a953383901fe12e97f6f56a76a9d8008788695425792eedbf739a18585188" +content-hash = "e42f3bc122fe5d98deb6aa224ddf531b6f45a50b7c61213721ff5c8258e424e3" diff --git a/pyproject.toml b/pyproject.toml index 30693d5..b6b2aa8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ dependencies = [ "certvalidator (>=0.0.0)", "rich-argparse (>=1.6.0,<2.0.0)", "ruamel-yaml (>=0.18.10,<0.19.0)", + "opentimestamps (>=0.4.5,<0.5.0)", ] [tool.poetry.group.dev.dependencies] diff --git a/src/auto_archiver/core/module.py b/src/auto_archiver/core/module.py index 2c6617d..96d5420 100644 --- a/src/auto_archiver/core/module.py +++ b/src/auto_archiver/core/module.py @@ -6,7 +6,7 @@ by handling user configuration, validating the steps properties, and implementin from __future__ import annotations from dataclasses import dataclass -from typing import List, TYPE_CHECKING +from typing import List, TYPE_CHECKING, Type import shutil import ast import copy @@ -57,7 +57,7 @@ class ModuleFactory: HAS_SETUP_PATHS = True - def get_module(self, module_name: str, config: dict) -> BaseModule: + def get_module(self, module_name: str, config: dict) -> Type[BaseModule]: """ Gets and sets up a module using the provided config diff --git a/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py b/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py index 849bb3d..136c0c2 100644 --- a/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py +++ b/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py @@ -6,7 +6,6 @@ "python": [ "loguru", "opentimestamps", - "slugify", ], }, "configs": { @@ -19,14 +18,16 @@ "default": [ "https://alice.btc.calendar.opentimestamps.org", "https://bob.btc.calendar.opentimestamps.org", - "https://finney.calendar.eternitywall.com" + "https://finney.calendar.eternitywall.com", + # "https://ots.btc.catallaxy.com/", # ipv4 only ], - "help": "List of OpenTimestamps calendar servers to use for timestamping.", + "help": "List of OpenTimestamps calendar servers to use for timestamping. See here for a list of calendars maintained by opentimestamps:\ +https://opentimestamps.org/#calendars", "type": "list" }, "calendar_whitelist": { "default": [], - "help": "Optional whitelist of calendar servers. If empty, all calendar servers are allowed.", + "help": "Optional whitelist of calendar servers. Override this if you are using your own calendar servers. e.g. ['https://mycalendar.com']", "type": "list" }, "verify_timestamps": { @@ -38,6 +39,9 @@ "description": """ Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time. + Uses OpenTimestamps – a service that timestamps data using the Bitcoin blockchain, providing a decentralized + and secure way to prove that data existed at a certain point in time. + ### Features - Creates cryptographic timestamp proofs that link files to the Bitcoin blockchain - Verifies existing timestamp proofs to confirm the time a file existed diff --git a/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py b/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py index 01e8964..2b74ee6 100644 --- a/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py +++ b/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py @@ -1,36 +1,19 @@ import os import hashlib -from importlib.metadata import version +from typing import TYPE_CHECKING -from slugify import slugify from loguru import logger import opentimestamps from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation +from opentimestamps.core.op import OpSHA256 +from opentimestamps.core import serialize from auto_archiver.core import Enricher from auto_archiver.core import Metadata, Media -from auto_archiver.version import __version__ - +from auto_archiver.utils.misc import calculate_file_hash class OpentimestampsEnricher(Enricher): - """ - Uses OpenTimestamps to create and verify timestamps for files. OpenTimestamps is a service that - timestamps data using the Bitcoin blockchain, providing a decentralized and secure way to prove - that data existed at a certain point in time. - - The enricher hashes files in the archive and creates timestamp proofs that can later be verified. - These proofs are stored alongside the original files and can be used to verify the timestamp - even if the OpenTimestamps calendar servers are unavailable. - """ - - def setup(self): - # Initialize any resources needed - pass - - def cleanup(self) -> None: - # Clean up any resources used - pass def enrich(self, to_enrich: Metadata) -> None: url = to_enrich.get_url() @@ -38,7 +21,7 @@ class OpentimestampsEnricher(Enricher): # Get the media files to timestamp media_files = [m for m in to_enrich.media if m.get("filename") and not m.get("opentimestamps")] - + if not media_files: logger.warning(f"No files found to timestamp in {url=}") return @@ -52,21 +35,26 @@ class OpentimestampsEnricher(Enricher): logger.warning(f"File not found: {file_path}") continue - # Create timestamp for the file + # Create timestamp for the file - hash is SHA256 + # Note: ONLY SHA256 is used/supported here. Opentimestamps supports other hashes, but not SHA3-512 + # see opentimestamps.core.op logger.debug(f"Creating timestamp for {file_path}") - - # Hash the file + file_hash = None with open(file_path, 'rb') as f: - file_bytes = f.read() - file_hash = hashlib.sha256(file_bytes).digest() + file_hash = OpSHA256().hash_fd(f) + + if not file_hash: + logger.warning(f"Failed to hash file for timestamping, skipping: {file_path}") + continue # Create a timestamp with the file hash timestamp = Timestamp(file_hash) - # Create a detached timestamp file with the timestamp - detached_timestamp = DetachedTimestampFile(timestamp) + # Create a detached timestamp file with the hash operation and timestamp + detached_timestamp = DetachedTimestampFile(OpSHA256(), timestamp) # Submit to calendar servers + submitted_to_calendar = False if self.use_calendars: logger.debug(f"Submitting timestamp to calendar servers for {file_path}") calendars = [] @@ -76,9 +64,11 @@ class OpentimestampsEnricher(Enricher): whitelist = set(self.calendar_whitelist) # Create calendar instances + calendar_urls = [] for url in self.calendar_urls: if url in whitelist: calendars.append(RemoteCalendar(url)) + calendar_urls.append(url) # Submit the hash to each calendar for calendar in calendars: @@ -86,15 +76,35 @@ class OpentimestampsEnricher(Enricher): calendar_timestamp = calendar.submit(file_hash) timestamp.merge(calendar_timestamp) logger.debug(f"Successfully submitted to calendar: {calendar.url}") + submitted_to_calendar = True except Exception as e: logger.warning(f"Failed to submit to calendar {calendar.url}: {e}") + + # If all calendar submissions failed, add pending attestations + if not submitted_to_calendar and not timestamp.attestations: + logger.info("All calendar submissions failed, creating pending attestations") + for url in calendar_urls: + pending = PendingAttestation(url) + timestamp.attestations.add(pending) else: logger.info("Skipping calendar submission as per configuration") + + # Add dummy pending attestation for testing when calendars are disabled + for url in self.calendar_urls: + pending = PendingAttestation(url) + timestamp.attestations.add(pending) # Save the timestamp proof to a file timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots") - with open(timestamp_path, 'wb') as f: - detached_timestamp.serialize(f) + try: + with open(timestamp_path, 'wb') as f: + # Create a serialization context and write to the file + ctx = serialize.BytesSerializationContext() + detached_timestamp.serialize(ctx) + f.write(ctx.getbytes()) + except Exception as e: + logger.warning(f"Failed to serialize timestamp file: {e}") + continue # Create media for the timestamp file timestamp_media = Media(filename=timestamp_path) @@ -106,6 +116,8 @@ class OpentimestampsEnricher(Enricher): verification_info = self.verify_timestamp(detached_timestamp) for key, value in verification_info.items(): timestamp_media.set(key, value) + else: + logger.warning(f"Not verifying the timestamp for media file {file_path}") timestamp_files.append(timestamp_media) @@ -151,7 +163,7 @@ class OpentimestampsEnricher(Enricher): # Process different types of attestations if isinstance(attestation, PendingAttestation): info["type"] = "pending" - info["uri"] = attestation.uri.decode('utf-8') + info["uri"] = attestation.uri elif isinstance(attestation, BitcoinBlockHeaderAttestation): info["type"] = "bitcoin" diff --git a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py index 078c1ba..b83e86c 100644 --- a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py +++ b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py @@ -30,7 +30,7 @@ class TimestampingEnricher(Enricher): if not len(hashes): logger.warning(f"No hashes found in {url=}") return - + tmp_dir = self.tmp_dir hashes_fn = os.path.join(tmp_dir, "hashes.txt") diff --git a/tests/enrichers/test_opentimestamps_enricher.py b/tests/enrichers/test_opentimestamps_enricher.py index 5681561..db171e5 100644 --- a/tests/enrichers/test_opentimestamps_enricher.py +++ b/tests/enrichers/test_opentimestamps_enricher.py @@ -10,53 +10,69 @@ from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAtt from auto_archiver.core import Metadata, Media + +# TODO: Remove once timestamping overhaul is merged @pytest.fixture -def sample_file_path(): - with tempfile.NamedTemporaryFile(delete=False) as tmp: - tmp.write(b"This is a test file content for OpenTimestamps") - return tmp.name +def sample_media(tmp_path) -> Media: + """Fixture creating a Media object with temporary source file""" + src_file = tmp_path / "source.txt" + src_file.write_text("test content") + return Media(_key="subdir/test.txt", filename=str(src_file)) + + +@pytest.fixture +def sample_file_path(tmp_path): + tmp_file = tmp_path / "test.txt" + tmp_file.write_text("This is a test file content for OpenTimestamps") + return str(tmp_file) @pytest.fixture def detached_timestamp_file(): """Create a simple detached timestamp file for testing""" file_hash = hashlib.sha256(b"Test content").digest() + from opentimestamps.core.op import OpSHA256 + file_hash_op = OpSHA256() timestamp = Timestamp(file_hash) # Add a pending attestation - pending = PendingAttestation(b"https://example.calendar.com") + pending = PendingAttestation("https://example.calendar.com") timestamp.attestations.add(pending) # Add a bitcoin attestation bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height timestamp.attestations.add(bitcoin) - return DetachedTimestampFile(timestamp) + return DetachedTimestampFile(file_hash_op, timestamp) @pytest.fixture def verified_timestamp_file(): """Create a timestamp file with a Bitcoin attestation""" file_hash = hashlib.sha256(b"Verified content").digest() + from opentimestamps.core.op import OpSHA256 + file_hash_op = OpSHA256() timestamp = Timestamp(file_hash) # Add only a Bitcoin attestation bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height timestamp.attestations.add(bitcoin) - return DetachedTimestampFile(timestamp) + return DetachedTimestampFile(file_hash_op, timestamp) @pytest.fixture def pending_timestamp_file(): """Create a timestamp file with only pending attestations""" file_hash = hashlib.sha256(b"Pending content").digest() + from opentimestamps.core.op import OpSHA256 + file_hash_op = OpSHA256() timestamp = Timestamp(file_hash) # Add only pending attestations - pending1 = PendingAttestation(b"https://example1.calendar.com") - pending2 = PendingAttestation(b"https://example2.calendar.com") + pending1 = PendingAttestation("https://example1.calendar.com") + pending2 = PendingAttestation("https://example2.calendar.com") timestamp.attestations.add(pending1) timestamp.attestations.add(pending2) - return DetachedTimestampFile(timestamp) + return DetachedTimestampFile(file_hash_op, timestamp) @pytest.mark.download def test_download_tsr(setup_module, mocker): @@ -66,7 +82,7 @@ def test_download_tsr(setup_module, mocker): test_timestamp = Timestamp(hashlib.sha256(b"test").digest()) mock_submit.return_value = test_timestamp - # Setup enricher + ots = setup_module("opentimestamps_enricher") # Create a calendar @@ -121,6 +137,7 @@ def test_verify_pending_only(setup_module, pending_timestamp_file): def test_verify_bitcoin_completed(setup_module, verified_timestamp_file): """Test verification of timestamps with completed Bitcoin attestations""" + ots = setup_module("opentimestamps_enricher") verification_info = ots.verify_timestamp(verified_timestamp_file) @@ -136,15 +153,21 @@ def test_verify_bitcoin_completed(setup_module, verified_timestamp_file): def test_full_enriching(setup_module, sample_file_path, sample_media, mocker): """Test the complete enrichment process""" + # Mock the calendar submission to avoid network requests mock_calendar = mocker.patch.object(RemoteCalendar, 'submit') - test_timestamp = Timestamp(hashlib.sha256(b"test").digest()) - # Add a bitcoin attestation to the test timestamp - bitcoin = BitcoinBlockHeaderAttestation(783000) - test_timestamp.attestations.add(bitcoin) - mock_calendar.return_value = test_timestamp - # Setup enricher + # Create a function that returns a new timestamp for each call + def side_effect(digest): + test_timestamp = Timestamp(digest) + # Add a bitcoin attestation to the test timestamp + bitcoin = BitcoinBlockHeaderAttestation(783000) + test_timestamp.attestations.add(bitcoin) + return test_timestamp + + mock_calendar.side_effect = side_effect + + ots = setup_module("opentimestamps_enricher") # Create test metadata with sample file @@ -176,8 +199,6 @@ def test_full_enriching(setup_module, sample_file_path, sample_media, mocker): assert timestamp_media.get("attestation_count") == 1 def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_media, mocker): - """Test enrichment process with calendars disabled""" - # Setup enricher with calendars disabled ots = setup_module("opentimestamps_enricher", {"use_calendars": False}) # Create test metadata with sample file @@ -198,7 +219,8 @@ def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_medi # Verify status should be false since we didn't use calendars assert timestamp_media.get("verified") == False - assert timestamp_media.get("attestation_count") == 0 + # We expect 3 pending attestations (one for each calendar URL) + assert timestamp_media.get("attestation_count") == 3 def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_media, mocker): """Test enrichment when calendar servers return errors""" @@ -206,7 +228,7 @@ def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_me mock_calendar = mocker.patch.object(RemoteCalendar, 'submit') mock_calendar.side_effect = Exception("Calendar server error") - # Setup enricher + ots = setup_module("opentimestamps_enricher") # Create test metadata with sample file @@ -224,11 +246,11 @@ def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_me # Verify status should be false since calendar submissions failed timestamp_media = metadata.media[1] assert timestamp_media.get("verified") == False - assert timestamp_media.get("attestation_count") == 0 + # We expect 3 pending attestations (one for each calendar URL that's enabled by default in __manifest__) + assert timestamp_media.get("attestation_count") == 3 def test_no_files_to_stamp(setup_module): """Test enrichment with no files to timestamp""" - # Setup enricher ots = setup_module("opentimestamps_enricher") # Create empty metadata