Finish off timestamping module

pull/247/head
Patrick Robertson 2025-03-12 10:24:57 +00:00
rodzic 28041d94d9
commit 1423c10363
7 zmienionych plików z 130 dodań i 63 usunięć

30
poetry.lock wygenerowano
Wyświetl plik

@ -1361,6 +1361,22 @@ rsa = ["cryptography (>=3.0.0)"]
signals = ["blinker (>=1.4.0)"]
signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
[[package]]
name = "opentimestamps"
version = "0.4.5"
description = "Create and verify OpenTimestamps proofs"
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "opentimestamps-0.4.5-py3-none-any.whl", hash = "sha256:a4912b3bd1b612a3ef5fac925b9137889e6c5cb91cc9e76c8202a2bf8abe26b5"},
{file = "opentimestamps-0.4.5.tar.gz", hash = "sha256:56726ccde97fb67f336a7f237ce36808e5593c3089d68d900b1c83d0ebf9dcfa"},
]
[package.dependencies]
pycryptodomex = ">=3.3.1"
python-bitcoinlib = ">=0.9.0,<0.13.0"
[[package]]
name = "oscrypto"
version = "1.3.0"
@ -1834,6 +1850,18 @@ pytest = ">=6.2.5"
[package.extras]
dev = ["pre-commit", "pytest-asyncio", "tox"]
[[package]]
name = "python-bitcoinlib"
version = "0.12.2"
description = "The Swiss Army Knife of the Bitcoin protocol."
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "python-bitcoinlib-0.12.2.tar.gz", hash = "sha256:c65ab61427c77c38d397bfc431f71d86fd355b453a536496ec3fcb41bd10087d"},
{file = "python_bitcoinlib-0.12.2-py3-none-any.whl", hash = "sha256:2f29a9f475f21c12169b3a6cc8820f34f11362d7ff1200a5703dce3e4e903a44"},
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@ -3185,4 +3213,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.13"
content-hash = "2d0a953383901fe12e97f6f56a76a9d8008788695425792eedbf739a18585188"
content-hash = "e42f3bc122fe5d98deb6aa224ddf531b6f45a50b7c61213721ff5c8258e424e3"

Wyświetl plik

@ -57,6 +57,7 @@ dependencies = [
"certvalidator (>=0.0.0)",
"rich-argparse (>=1.6.0,<2.0.0)",
"ruamel-yaml (>=0.18.10,<0.19.0)",
"opentimestamps (>=0.4.5,<0.5.0)",
]
[tool.poetry.group.dev.dependencies]

Wyświetl plik

@ -6,7 +6,7 @@ by handling user configuration, validating the steps properties, and implementin
from __future__ import annotations
from dataclasses import dataclass
from typing import List, TYPE_CHECKING
from typing import List, TYPE_CHECKING, Type
import shutil
import ast
import copy
@ -57,7 +57,7 @@ class ModuleFactory:
HAS_SETUP_PATHS = True
def get_module(self, module_name: str, config: dict) -> BaseModule:
def get_module(self, module_name: str, config: dict) -> Type[BaseModule]:
"""
Gets and sets up a module using the provided config

Wyświetl plik

@ -6,7 +6,6 @@
"python": [
"loguru",
"opentimestamps",
"slugify",
],
},
"configs": {
@ -19,14 +18,16 @@
"default": [
"https://alice.btc.calendar.opentimestamps.org",
"https://bob.btc.calendar.opentimestamps.org",
"https://finney.calendar.eternitywall.com"
"https://finney.calendar.eternitywall.com",
# "https://ots.btc.catallaxy.com/", # ipv4 only
],
"help": "List of OpenTimestamps calendar servers to use for timestamping.",
"help": "List of OpenTimestamps calendar servers to use for timestamping. See here for a list of calendars maintained by opentimestamps:\
https://opentimestamps.org/#calendars",
"type": "list"
},
"calendar_whitelist": {
"default": [],
"help": "Optional whitelist of calendar servers. If empty, all calendar servers are allowed.",
"help": "Optional whitelist of calendar servers. Override this if you are using your own calendar servers. e.g. ['https://mycalendar.com']",
"type": "list"
},
"verify_timestamps": {
@ -38,6 +39,9 @@
"description": """
Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time.
Uses OpenTimestamps a service that timestamps data using the Bitcoin blockchain, providing a decentralized
and secure way to prove that data existed at a certain point in time.
### Features
- Creates cryptographic timestamp proofs that link files to the Bitcoin blockchain
- Verifies existing timestamp proofs to confirm the time a file existed

Wyświetl plik

@ -1,36 +1,19 @@
import os
import hashlib
from importlib.metadata import version
from typing import TYPE_CHECKING
from slugify import slugify
from loguru import logger
import opentimestamps
from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST
from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile
from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation
from opentimestamps.core.op import OpSHA256
from opentimestamps.core import serialize
from auto_archiver.core import Enricher
from auto_archiver.core import Metadata, Media
from auto_archiver.version import __version__
from auto_archiver.utils.misc import calculate_file_hash
class OpentimestampsEnricher(Enricher):
"""
Uses OpenTimestamps to create and verify timestamps for files. OpenTimestamps is a service that
timestamps data using the Bitcoin blockchain, providing a decentralized and secure way to prove
that data existed at a certain point in time.
The enricher hashes files in the archive and creates timestamp proofs that can later be verified.
These proofs are stored alongside the original files and can be used to verify the timestamp
even if the OpenTimestamps calendar servers are unavailable.
"""
def setup(self):
# Initialize any resources needed
pass
def cleanup(self) -> None:
# Clean up any resources used
pass
def enrich(self, to_enrich: Metadata) -> None:
url = to_enrich.get_url()
@ -52,21 +35,26 @@ class OpentimestampsEnricher(Enricher):
logger.warning(f"File not found: {file_path}")
continue
# Create timestamp for the file
# Create timestamp for the file - hash is SHA256
# Note: ONLY SHA256 is used/supported here. Opentimestamps supports other hashes, but not SHA3-512
# see opentimestamps.core.op
logger.debug(f"Creating timestamp for {file_path}")
# Hash the file
file_hash = None
with open(file_path, 'rb') as f:
file_bytes = f.read()
file_hash = hashlib.sha256(file_bytes).digest()
file_hash = OpSHA256().hash_fd(f)
if not file_hash:
logger.warning(f"Failed to hash file for timestamping, skipping: {file_path}")
continue
# Create a timestamp with the file hash
timestamp = Timestamp(file_hash)
# Create a detached timestamp file with the timestamp
detached_timestamp = DetachedTimestampFile(timestamp)
# Create a detached timestamp file with the hash operation and timestamp
detached_timestamp = DetachedTimestampFile(OpSHA256(), timestamp)
# Submit to calendar servers
submitted_to_calendar = False
if self.use_calendars:
logger.debug(f"Submitting timestamp to calendar servers for {file_path}")
calendars = []
@ -76,9 +64,11 @@ class OpentimestampsEnricher(Enricher):
whitelist = set(self.calendar_whitelist)
# Create calendar instances
calendar_urls = []
for url in self.calendar_urls:
if url in whitelist:
calendars.append(RemoteCalendar(url))
calendar_urls.append(url)
# Submit the hash to each calendar
for calendar in calendars:
@ -86,15 +76,35 @@ class OpentimestampsEnricher(Enricher):
calendar_timestamp = calendar.submit(file_hash)
timestamp.merge(calendar_timestamp)
logger.debug(f"Successfully submitted to calendar: {calendar.url}")
submitted_to_calendar = True
except Exception as e:
logger.warning(f"Failed to submit to calendar {calendar.url}: {e}")
# If all calendar submissions failed, add pending attestations
if not submitted_to_calendar and not timestamp.attestations:
logger.info("All calendar submissions failed, creating pending attestations")
for url in calendar_urls:
pending = PendingAttestation(url)
timestamp.attestations.add(pending)
else:
logger.info("Skipping calendar submission as per configuration")
# Add dummy pending attestation for testing when calendars are disabled
for url in self.calendar_urls:
pending = PendingAttestation(url)
timestamp.attestations.add(pending)
# Save the timestamp proof to a file
timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots")
with open(timestamp_path, 'wb') as f:
detached_timestamp.serialize(f)
try:
with open(timestamp_path, 'wb') as f:
# Create a serialization context and write to the file
ctx = serialize.BytesSerializationContext()
detached_timestamp.serialize(ctx)
f.write(ctx.getbytes())
except Exception as e:
logger.warning(f"Failed to serialize timestamp file: {e}")
continue
# Create media for the timestamp file
timestamp_media = Media(filename=timestamp_path)
@ -106,6 +116,8 @@ class OpentimestampsEnricher(Enricher):
verification_info = self.verify_timestamp(detached_timestamp)
for key, value in verification_info.items():
timestamp_media.set(key, value)
else:
logger.warning(f"Not verifying the timestamp for media file {file_path}")
timestamp_files.append(timestamp_media)
@ -151,7 +163,7 @@ class OpentimestampsEnricher(Enricher):
# Process different types of attestations
if isinstance(attestation, PendingAttestation):
info["type"] = "pending"
info["uri"] = attestation.uri.decode('utf-8')
info["uri"] = attestation.uri
elif isinstance(attestation, BitcoinBlockHeaderAttestation):
info["type"] = "bitcoin"

Wyświetl plik

@ -10,53 +10,69 @@ from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAtt
from auto_archiver.core import Metadata, Media
# TODO: Remove once timestamping overhaul is merged
@pytest.fixture
def sample_file_path():
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp.write(b"This is a test file content for OpenTimestamps")
return tmp.name
def sample_media(tmp_path) -> Media:
"""Fixture creating a Media object with temporary source file"""
src_file = tmp_path / "source.txt"
src_file.write_text("test content")
return Media(_key="subdir/test.txt", filename=str(src_file))
@pytest.fixture
def sample_file_path(tmp_path):
tmp_file = tmp_path / "test.txt"
tmp_file.write_text("This is a test file content for OpenTimestamps")
return str(tmp_file)
@pytest.fixture
def detached_timestamp_file():
"""Create a simple detached timestamp file for testing"""
file_hash = hashlib.sha256(b"Test content").digest()
from opentimestamps.core.op import OpSHA256
file_hash_op = OpSHA256()
timestamp = Timestamp(file_hash)
# Add a pending attestation
pending = PendingAttestation(b"https://example.calendar.com")
pending = PendingAttestation("https://example.calendar.com")
timestamp.attestations.add(pending)
# Add a bitcoin attestation
bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height
timestamp.attestations.add(bitcoin)
return DetachedTimestampFile(timestamp)
return DetachedTimestampFile(file_hash_op, timestamp)
@pytest.fixture
def verified_timestamp_file():
"""Create a timestamp file with a Bitcoin attestation"""
file_hash = hashlib.sha256(b"Verified content").digest()
from opentimestamps.core.op import OpSHA256
file_hash_op = OpSHA256()
timestamp = Timestamp(file_hash)
# Add only a Bitcoin attestation
bitcoin = BitcoinBlockHeaderAttestation(783000) # Some block height
timestamp.attestations.add(bitcoin)
return DetachedTimestampFile(timestamp)
return DetachedTimestampFile(file_hash_op, timestamp)
@pytest.fixture
def pending_timestamp_file():
"""Create a timestamp file with only pending attestations"""
file_hash = hashlib.sha256(b"Pending content").digest()
from opentimestamps.core.op import OpSHA256
file_hash_op = OpSHA256()
timestamp = Timestamp(file_hash)
# Add only pending attestations
pending1 = PendingAttestation(b"https://example1.calendar.com")
pending2 = PendingAttestation(b"https://example2.calendar.com")
pending1 = PendingAttestation("https://example1.calendar.com")
pending2 = PendingAttestation("https://example2.calendar.com")
timestamp.attestations.add(pending1)
timestamp.attestations.add(pending2)
return DetachedTimestampFile(timestamp)
return DetachedTimestampFile(file_hash_op, timestamp)
@pytest.mark.download
def test_download_tsr(setup_module, mocker):
@ -66,7 +82,7 @@ def test_download_tsr(setup_module, mocker):
test_timestamp = Timestamp(hashlib.sha256(b"test").digest())
mock_submit.return_value = test_timestamp
# Setup enricher
ots = setup_module("opentimestamps_enricher")
# Create a calendar
@ -121,6 +137,7 @@ def test_verify_pending_only(setup_module, pending_timestamp_file):
def test_verify_bitcoin_completed(setup_module, verified_timestamp_file):
"""Test verification of timestamps with completed Bitcoin attestations"""
ots = setup_module("opentimestamps_enricher")
verification_info = ots.verify_timestamp(verified_timestamp_file)
@ -136,15 +153,21 @@ def test_verify_bitcoin_completed(setup_module, verified_timestamp_file):
def test_full_enriching(setup_module, sample_file_path, sample_media, mocker):
"""Test the complete enrichment process"""
# Mock the calendar submission to avoid network requests
mock_calendar = mocker.patch.object(RemoteCalendar, 'submit')
test_timestamp = Timestamp(hashlib.sha256(b"test").digest())
# Add a bitcoin attestation to the test timestamp
bitcoin = BitcoinBlockHeaderAttestation(783000)
test_timestamp.attestations.add(bitcoin)
mock_calendar.return_value = test_timestamp
# Setup enricher
# Create a function that returns a new timestamp for each call
def side_effect(digest):
test_timestamp = Timestamp(digest)
# Add a bitcoin attestation to the test timestamp
bitcoin = BitcoinBlockHeaderAttestation(783000)
test_timestamp.attestations.add(bitcoin)
return test_timestamp
mock_calendar.side_effect = side_effect
ots = setup_module("opentimestamps_enricher")
# Create test metadata with sample file
@ -176,8 +199,6 @@ def test_full_enriching(setup_module, sample_file_path, sample_media, mocker):
assert timestamp_media.get("attestation_count") == 1
def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_media, mocker):
"""Test enrichment process with calendars disabled"""
# Setup enricher with calendars disabled
ots = setup_module("opentimestamps_enricher", {"use_calendars": False})
# Create test metadata with sample file
@ -198,7 +219,8 @@ def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_medi
# Verify status should be false since we didn't use calendars
assert timestamp_media.get("verified") == False
assert timestamp_media.get("attestation_count") == 0
# We expect 3 pending attestations (one for each calendar URL)
assert timestamp_media.get("attestation_count") == 3
def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_media, mocker):
"""Test enrichment when calendar servers return errors"""
@ -206,7 +228,7 @@ def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_me
mock_calendar = mocker.patch.object(RemoteCalendar, 'submit')
mock_calendar.side_effect = Exception("Calendar server error")
# Setup enricher
ots = setup_module("opentimestamps_enricher")
# Create test metadata with sample file
@ -224,11 +246,11 @@ def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_me
# Verify status should be false since calendar submissions failed
timestamp_media = metadata.media[1]
assert timestamp_media.get("verified") == False
assert timestamp_media.get("attestation_count") == 0
# We expect 3 pending attestations (one for each calendar URL that's enabled by default in __manifest__)
assert timestamp_media.get("attestation_count") == 3
def test_no_files_to_stamp(setup_module):
"""Test enrichment with no files to timestamp"""
# Setup enricher
ots = setup_module("opentimestamps_enricher")
# Create empty metadata