Finish off timestamping module

pull/247/head
Patrick Robertson 2025-03-12 10:24:57 +00:00
rodzic 28041d94d9
commit 1423c10363
7 zmienionych plików z 130 dodań i 63 usunięć

30
poetry.lock wygenerowano
Wyświetl plik

@ -1361,6 +1361,22 @@ rsa = ["cryptography (>=3.0.0)"]
signals = ["blinker (>=1.4.0)"] signals = ["blinker (>=1.4.0)"]
signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
[[package]]
name = "opentimestamps"
version = "0.4.5"
description = "Create and verify OpenTimestamps proofs"
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "opentimestamps-0.4.5-py3-none-any.whl", hash = "sha256:a4912b3bd1b612a3ef5fac925b9137889e6c5cb91cc9e76c8202a2bf8abe26b5"},
{file = "opentimestamps-0.4.5.tar.gz", hash = "sha256:56726ccde97fb67f336a7f237ce36808e5593c3089d68d900b1c83d0ebf9dcfa"},
]
[package.dependencies]
pycryptodomex = ">=3.3.1"
python-bitcoinlib = ">=0.9.0,<0.13.0"
[[package]] [[package]]
name = "oscrypto" name = "oscrypto"
version = "1.3.0" version = "1.3.0"
@ -1834,6 +1850,18 @@ pytest = ">=6.2.5"
[package.extras] [package.extras]
dev = ["pre-commit", "pytest-asyncio", "tox"] dev = ["pre-commit", "pytest-asyncio", "tox"]
[[package]]
name = "python-bitcoinlib"
version = "0.12.2"
description = "The Swiss Army Knife of the Bitcoin protocol."
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "python-bitcoinlib-0.12.2.tar.gz", hash = "sha256:c65ab61427c77c38d397bfc431f71d86fd355b453a536496ec3fcb41bd10087d"},
{file = "python_bitcoinlib-0.12.2-py3-none-any.whl", hash = "sha256:2f29a9f475f21c12169b3a6cc8820f34f11362d7ff1200a5703dce3e4e903a44"},
]
[[package]] [[package]]
name = "python-dateutil" name = "python-dateutil"
version = "2.9.0.post0" version = "2.9.0.post0"
@ -3185,4 +3213,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = ">=3.10,<3.13" python-versions = ">=3.10,<3.13"
content-hash = "2d0a953383901fe12e97f6f56a76a9d8008788695425792eedbf739a18585188" content-hash = "e42f3bc122fe5d98deb6aa224ddf531b6f45a50b7c61213721ff5c8258e424e3"

Wyświetl plik

@ -57,6 +57,7 @@ dependencies = [
"certvalidator (>=0.0.0)", "certvalidator (>=0.0.0)",
"rich-argparse (>=1.6.0,<2.0.0)", "rich-argparse (>=1.6.0,<2.0.0)",
"ruamel-yaml (>=0.18.10,<0.19.0)", "ruamel-yaml (>=0.18.10,<0.19.0)",
"opentimestamps (>=0.4.5,<0.5.0)",
] ]
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]

Wyświetl plik

@ -6,7 +6,7 @@ by handling user configuration, validating the steps properties, and implementin
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, TYPE_CHECKING from typing import List, TYPE_CHECKING, Type
import shutil import shutil
import ast import ast
import copy import copy
@ -57,7 +57,7 @@ class ModuleFactory:
HAS_SETUP_PATHS = True HAS_SETUP_PATHS = True
def get_module(self, module_name: str, config: dict) -> BaseModule: def get_module(self, module_name: str, config: dict) -> Type[BaseModule]:
""" """
Gets and sets up a module using the provided config Gets and sets up a module using the provided config

Wyświetl plik

@ -6,7 +6,6 @@
"python": [ "python": [
"loguru", "loguru",
"opentimestamps", "opentimestamps",
"slugify",
], ],
}, },
"configs": { "configs": {
@ -19,14 +18,16 @@
"default": [ "default": [
"https://alice.btc.calendar.opentimestamps.org", "https://alice.btc.calendar.opentimestamps.org",
"https://bob.btc.calendar.opentimestamps.org", "https://bob.btc.calendar.opentimestamps.org",
"https://finney.calendar.eternitywall.com" "https://finney.calendar.eternitywall.com",
# "https://ots.btc.catallaxy.com/", # ipv4 only
], ],
"help": "List of OpenTimestamps calendar servers to use for timestamping.", "help": "List of OpenTimestamps calendar servers to use for timestamping. See here for a list of calendars maintained by opentimestamps:\
https://opentimestamps.org/#calendars",
"type": "list" "type": "list"
}, },
"calendar_whitelist": { "calendar_whitelist": {
"default": [], "default": [],
"help": "Optional whitelist of calendar servers. If empty, all calendar servers are allowed.", "help": "Optional whitelist of calendar servers. Override this if you are using your own calendar servers. e.g. ['https://mycalendar.com']",
"type": "list" "type": "list"
}, },
"verify_timestamps": { "verify_timestamps": {
@ -38,6 +39,9 @@
"description": """ "description": """
Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time. Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time.
Uses OpenTimestamps a service that timestamps data using the Bitcoin blockchain, providing a decentralized
and secure way to prove that data existed at a certain point in time.
### Features ### Features
- Creates cryptographic timestamp proofs that link files to the Bitcoin blockchain - Creates cryptographic timestamp proofs that link files to the Bitcoin blockchain
- Verifies existing timestamp proofs to confirm the time a file existed - Verifies existing timestamp proofs to confirm the time a file existed

Wyświetl plik

@ -1,36 +1,19 @@
import os import os
import hashlib import hashlib
from importlib.metadata import version from typing import TYPE_CHECKING
from slugify import slugify
from loguru import logger from loguru import logger
import opentimestamps import opentimestamps
from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST
from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile
from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation
from opentimestamps.core.op import OpSHA256
from opentimestamps.core import serialize
from auto_archiver.core import Enricher from auto_archiver.core import Enricher
from auto_archiver.core import Metadata, Media from auto_archiver.core import Metadata, Media
from auto_archiver.version import __version__ from auto_archiver.utils.misc import calculate_file_hash
class OpentimestampsEnricher(Enricher): class OpentimestampsEnricher(Enricher):
"""
Uses OpenTimestamps to create and verify timestamps for files. OpenTimestamps is a service that
timestamps data using the Bitcoin blockchain, providing a decentralized and secure way to prove
that data existed at a certain point in time.
The enricher hashes files in the archive and creates timestamp proofs that can later be verified.
These proofs are stored alongside the original files and can be used to verify the timestamp
even if the OpenTimestamps calendar servers are unavailable.
"""
def setup(self):
# Initialize any resources needed
pass
def cleanup(self) -> None:
# Clean up any resources used
pass
def enrich(self, to_enrich: Metadata) -> None: def enrich(self, to_enrich: Metadata) -> None:
url = to_enrich.get_url() url = to_enrich.get_url()
@ -38,7 +21,7 @@ class OpentimestampsEnricher(Enricher):
# Get the media files to timestamp # Get the media files to timestamp
media_files = [m for m in to_enrich.media if m.get("filename") and not m.get("opentimestamps")] media_files = [m for m in to_enrich.media if m.get("filename") and not m.get("opentimestamps")]
if not media_files: if not media_files:
logger.warning(f"No files found to timestamp in {url=}") logger.warning(f"No files found to timestamp in {url=}")
return return
@ -52,21 +35,26 @@ class OpentimestampsEnricher(Enricher):
logger.warning(f"File not found: {file_path}") logger.warning(f"File not found: {file_path}")
continue continue
# Create timestamp for the file # Create timestamp for the file - hash is SHA256
# Note: ONLY SHA256 is used/supported here. Opentimestamps supports other hashes, but not SHA3-512
# see opentimestamps.core.op
logger.debug(f"Creating timestamp for {file_path}") logger.debug(f"Creating timestamp for {file_path}")
file_hash = None
# Hash the file
with open(file_path, 'rb') as f: with open(file_path, 'rb') as f:
file_bytes = f.read() file_hash = OpSHA256().hash_fd(f)
file_hash = hashlib.sha256(file_bytes).digest()
if not file_hash:
logger.warning(f"Failed to hash file for timestamping, skipping: {file_path}")
continue
# Create a timestamp with the file hash # Create a timestamp with the file hash
timestamp = Timestamp(file_hash) timestamp = Timestamp(file_hash)
# Create a detached timestamp file with the timestamp # Create a detached timestamp file with the hash operation and timestamp
detached_timestamp = DetachedTimestampFile(timestamp) detached_timestamp = DetachedTimestampFile(OpSHA256(), timestamp)
# Submit to calendar servers # Submit to calendar servers
submitted_to_calendar = False
if self.use_calendars: if self.use_calendars:
logger.debug(f"Submitting timestamp to calendar servers for {file_path}") logger.debug(f"Submitting timestamp to calendar servers for {file_path}")
calendars = [] calendars = []
@ -76,9 +64,11 @@ class OpentimestampsEnricher(Enricher):
whitelist = set(self.calendar_whitelist) whitelist = set(self.calendar_whitelist)
# Create calendar instances # Create calendar instances
calendar_urls = []
for url in self.calendar_urls: for url in self.calendar_urls:
if url in whitelist: if url in whitelist:
calendars.append(RemoteCalendar(url)) calendars.append(RemoteCalendar(url))
calendar_urls.append(url)
# Submit the hash to each calendar # Submit the hash to each calendar
for calendar in calendars: for calendar in calendars:
@ -86,15 +76,35 @@ class OpentimestampsEnricher(Enricher):
calendar_timestamp = calendar.submit(file_hash) calendar_timestamp = calendar.submit(file_hash)
timestamp.merge(calendar_timestamp) timestamp.merge(calendar_timestamp)
logger.debug(f"Successfully submitted to calendar: {calendar.url}") logger.debug(f"Successfully submitted to calendar: {calendar.url}")
submitted_to_calendar = True
except Exception as e: except Exception as e:
logger.warning(f"Failed to submit to calendar {calendar.url}: {e}") logger.warning(f"Failed to submit to calendar {calendar.url}: {e}")
# If all calendar submissions failed, add pending attestations
if not submitted_to_calendar and not timestamp.attestations:
logger.info("All calendar submissions failed, creating pending attestations")
for url in calendar_urls:
pending = PendingAttestation(url)
timestamp.attestations.add(pending)
else: else:
logger.info("Skipping calendar submission as per configuration") logger.info("Skipping calendar submission as per configuration")
# Add dummy pending attestation for testing when calendars are disabled
for url in self.calendar_urls:
pending = PendingAttestation(url)
timestamp.attestations.add(pending)
# Save the timestamp proof to a file # Save the timestamp proof to a file
timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots") timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots")
with open(timestamp_path, 'wb') as f: try:
detached_timestamp.serialize(f) with open(timestamp_path, 'wb') as f:
# Create a serialization context and write to the file
ctx = serialize.BytesSerializationContext()
detached_timestamp.serialize(ctx)
f.write(ctx.getbytes())
except Exception as e:
logger.warning(f"Failed to serialize timestamp file: {e}")
continue
# Create media for the timestamp file # Create media for the timestamp file
timestamp_media = Media(filename=timestamp_path) timestamp_media = Media(filename=timestamp_path)
@ -106,6 +116,8 @@ class OpentimestampsEnricher(Enricher):
verification_info = self.verify_timestamp(detached_timestamp) verification_info = self.verify_timestamp(detached_timestamp)
for key, value in verification_info.items(): for key, value in verification_info.items():
timestamp_media.set(key, value) timestamp_media.set(key, value)
else:
logger.warning(f"Not verifying the timestamp for media file {file_path}")
timestamp_files.append(timestamp_media) timestamp_files.append(timestamp_media)
@ -151,7 +163,7 @@ class OpentimestampsEnricher(Enricher):
# Process different types of attestations # Process different types of attestations
if isinstance(attestation, PendingAttestation): if isinstance(attestation, PendingAttestation):
info["type"] = "pending" info["type"] = "pending"
info["uri"] = attestation.uri.decode('utf-8') info["uri"] = attestation.uri
elif isinstance(attestation, BitcoinBlockHeaderAttestation): elif isinstance(attestation, BitcoinBlockHeaderAttestation):
info["type"] = "bitcoin" info["type"] = "bitcoin"

Wyświetl plik

@ -30,7 +30,7 @@ class TimestampingEnricher(Enricher):
if not len(hashes): if not len(hashes):
logger.warning(f"No hashes found in {url=}") logger.warning(f"No hashes found in {url=}")
return return
tmp_dir = self.tmp_dir tmp_dir = self.tmp_dir
hashes_fn = os.path.join(tmp_dir, "hashes.txt") hashes_fn = os.path.join(tmp_dir, "hashes.txt")

Wyświetl plik

@ -10,53 +10,69 @@ from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAtt
from auto_archiver.core import Metadata, Media from auto_archiver.core import Metadata, Media
# TODO: Remove once timestamping overhaul is merged
@pytest.fixture @pytest.fixture
def sample_file_path(): def sample_media(tmp_path) -> Media:
with tempfile.NamedTemporaryFile(delete=False) as tmp: """Fixture creating a Media object with temporary source file"""
tmp.write(b"This is a test file content for OpenTimestamps") src_file = tmp_path / "source.txt"
return tmp.name src_file.write_text("test content")
return Media(_key="subdir/test.txt", filename=str(src_file))
@pytest.fixture
def sample_file_path(tmp_path):
tmp_file = tmp_path / "test.txt"
tmp_file.write_text("This is a test file content for OpenTimestamps")
return str(tmp_file)
@pytest.fixture @pytest.fixture
def detached_timestamp_file(): def detached_timestamp_file():
"""Create a simple detached timestamp file for testing""" """Create a simple detached timestamp file for testing"""
file_hash = hashlib.sha256(b"Test content").digest() file_hash = hashlib.sha256(b"Test content").digest()
from opentimestamps.core.op import OpSHA256
file_hash_op = OpSHA256()
timestamp = Timestamp(file_hash) timestamp = Timestamp(file_hash)
# Add a pending attestation # Add a pending attestation
pending = PendingAttestation(b"https://example.calendar.com") pending = PendingAttestation("https://example.calendar.com")
timestamp.attestations.add(pending) timestamp.attestations.add(pending)
# Add a bitcoin attestation # Add a bitcoin attestation
bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height
timestamp.attestations.add(bitcoin) timestamp.attestations.add(bitcoin)
return DetachedTimestampFile(timestamp) return DetachedTimestampFile(file_hash_op, timestamp)
@pytest.fixture @pytest.fixture
def verified_timestamp_file(): def verified_timestamp_file():
"""Create a timestamp file with a Bitcoin attestation""" """Create a timestamp file with a Bitcoin attestation"""
file_hash = hashlib.sha256(b"Verified content").digest() file_hash = hashlib.sha256(b"Verified content").digest()
from opentimestamps.core.op import OpSHA256
file_hash_op = OpSHA256()
timestamp = Timestamp(file_hash) timestamp = Timestamp(file_hash)
# Add only a Bitcoin attestation # Add only a Bitcoin attestation
bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height
timestamp.attestations.add(bitcoin) timestamp.attestations.add(bitcoin)
return DetachedTimestampFile(timestamp) return DetachedTimestampFile(file_hash_op, timestamp)
@pytest.fixture @pytest.fixture
def pending_timestamp_file(): def pending_timestamp_file():
"""Create a timestamp file with only pending attestations""" """Create a timestamp file with only pending attestations"""
file_hash = hashlib.sha256(b"Pending content").digest() file_hash = hashlib.sha256(b"Pending content").digest()
from opentimestamps.core.op import OpSHA256
file_hash_op = OpSHA256()
timestamp = Timestamp(file_hash) timestamp = Timestamp(file_hash)
# Add only pending attestations # Add only pending attestations
pending1 = PendingAttestation(b"https://example1.calendar.com") pending1 = PendingAttestation("https://example1.calendar.com")
pending2 = PendingAttestation(b"https://example2.calendar.com") pending2 = PendingAttestation("https://example2.calendar.com")
timestamp.attestations.add(pending1) timestamp.attestations.add(pending1)
timestamp.attestations.add(pending2) timestamp.attestations.add(pending2)
return DetachedTimestampFile(timestamp) return DetachedTimestampFile(file_hash_op, timestamp)
@pytest.mark.download @pytest.mark.download
def test_download_tsr(setup_module, mocker): def test_download_tsr(setup_module, mocker):
@ -66,7 +82,7 @@ def test_download_tsr(setup_module, mocker):
test_timestamp = Timestamp(hashlib.sha256(b"test").digest()) test_timestamp = Timestamp(hashlib.sha256(b"test").digest())
mock_submit.return_value = test_timestamp mock_submit.return_value = test_timestamp
# Setup enricher
ots = setup_module("opentimestamps_enricher") ots = setup_module("opentimestamps_enricher")
# Create a calendar # Create a calendar
@ -121,6 +137,7 @@ def test_verify_pending_only(setup_module, pending_timestamp_file):
def test_verify_bitcoin_completed(setup_module, verified_timestamp_file): def test_verify_bitcoin_completed(setup_module, verified_timestamp_file):
"""Test verification of timestamps with completed Bitcoin attestations""" """Test verification of timestamps with completed Bitcoin attestations"""
ots = setup_module("opentimestamps_enricher") ots = setup_module("opentimestamps_enricher")
verification_info = ots.verify_timestamp(verified_timestamp_file) verification_info = ots.verify_timestamp(verified_timestamp_file)
@ -136,15 +153,21 @@ def test_verify_bitcoin_completed(setup_module, verified_timestamp_file):
def test_full_enriching(setup_module, sample_file_path, sample_media, mocker): def test_full_enriching(setup_module, sample_file_path, sample_media, mocker):
"""Test the complete enrichment process""" """Test the complete enrichment process"""
# Mock the calendar submission to avoid network requests # Mock the calendar submission to avoid network requests
mock_calendar = mocker.patch.object(RemoteCalendar, 'submit') mock_calendar = mocker.patch.object(RemoteCalendar, 'submit')
test_timestamp = Timestamp(hashlib.sha256(b"test").digest())
# Add a bitcoin attestation to the test timestamp
bitcoin = BitcoinBlockHeaderAttestation(783000)
test_timestamp.attestations.add(bitcoin)
mock_calendar.return_value = test_timestamp
# Setup enricher # Create a function that returns a new timestamp for each call
def side_effect(digest):
test_timestamp = Timestamp(digest)
# Add a bitcoin attestation to the test timestamp
bitcoin = BitcoinBlockHeaderAttestation(783000)
test_timestamp.attestations.add(bitcoin)
return test_timestamp
mock_calendar.side_effect = side_effect
ots = setup_module("opentimestamps_enricher") ots = setup_module("opentimestamps_enricher")
# Create test metadata with sample file # Create test metadata with sample file
@ -176,8 +199,6 @@ def test_full_enriching(setup_module, sample_file_path, sample_media, mocker):
assert timestamp_media.get("attestation_count") == 1 assert timestamp_media.get("attestation_count") == 1
def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_media, mocker): def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_media, mocker):
"""Test enrichment process with calendars disabled"""
# Setup enricher with calendars disabled
ots = setup_module("opentimestamps_enricher", {"use_calendars": False}) ots = setup_module("opentimestamps_enricher", {"use_calendars": False})
# Create test metadata with sample file # Create test metadata with sample file
@ -198,7 +219,8 @@ def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_medi
# Verify status should be false since we didn't use calendars # Verify status should be false since we didn't use calendars
assert timestamp_media.get("verified") == False assert timestamp_media.get("verified") == False
assert timestamp_media.get("attestation_count") == 0 # We expect 3 pending attestations (one for each calendar URL)
assert timestamp_media.get("attestation_count") == 3
def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_media, mocker): def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_media, mocker):
"""Test enrichment when calendar servers return errors""" """Test enrichment when calendar servers return errors"""
@ -206,7 +228,7 @@ def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_me
mock_calendar = mocker.patch.object(RemoteCalendar, 'submit') mock_calendar = mocker.patch.object(RemoteCalendar, 'submit')
mock_calendar.side_effect = Exception("Calendar server error") mock_calendar.side_effect = Exception("Calendar server error")
# Setup enricher
ots = setup_module("opentimestamps_enricher") ots = setup_module("opentimestamps_enricher")
# Create test metadata with sample file # Create test metadata with sample file
@ -224,11 +246,11 @@ def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_me
# Verify status should be false since calendar submissions failed # Verify status should be false since calendar submissions failed
timestamp_media = metadata.media[1] timestamp_media = metadata.media[1]
assert timestamp_media.get("verified") == False assert timestamp_media.get("verified") == False
assert timestamp_media.get("attestation_count") == 0 # We expect 3 pending attestations (one for each calendar URL that's enabled by default in __manifest__)
assert timestamp_media.get("attestation_count") == 3
def test_no_files_to_stamp(setup_module): def test_no_files_to_stamp(setup_module):
"""Test enrichment with no files to timestamp""" """Test enrichment with no files to timestamp"""
# Setup enricher
ots = setup_module("opentimestamps_enricher") ots = setup_module("opentimestamps_enricher")
# Create empty metadata # Create empty metadata