kopia lustrzana https://github.com/bellingcat/auto-archiver
Create opentimestamps module
rodzic
94543e9a67
commit
b70ed97ffd
|
@ -1,13 +1,52 @@
|
||||||
{
|
{
|
||||||
"name": "Opentimestamps Enricher",
|
"name": "OpenTimestamps Enricher",
|
||||||
"type": ["enricher"],
|
"type": ["enricher"],
|
||||||
"requires_setup": True,
|
"requires_setup": True,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"python": [
|
"python": [
|
||||||
"loguru",
|
"loguru",
|
||||||
"opentimestamps-client",
|
"opentimestamps",
|
||||||
|
"slugify",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"configs": {
|
||||||
|
"use_calendars": {
|
||||||
|
"default": True,
|
||||||
|
"help": "Whether to connect to OpenTimestamps calendar servers to create timestamps. If false, creates local timestamp proofs only.",
|
||||||
|
"type": "bool"
|
||||||
|
},
|
||||||
|
"calendar_urls": {
|
||||||
|
"default": [
|
||||||
|
"https://alice.btc.calendar.opentimestamps.org",
|
||||||
|
"https://bob.btc.calendar.opentimestamps.org",
|
||||||
|
"https://finney.calendar.eternitywall.com"
|
||||||
|
],
|
||||||
|
"help": "List of OpenTimestamps calendar servers to use for timestamping.",
|
||||||
|
"type": "list"
|
||||||
|
},
|
||||||
|
"calendar_whitelist": {
|
||||||
|
"default": [],
|
||||||
|
"help": "Optional whitelist of calendar servers. If empty, all calendar servers are allowed.",
|
||||||
|
"type": "list"
|
||||||
|
},
|
||||||
|
"verify_timestamps": {
|
||||||
|
"default": True,
|
||||||
|
"help": "Whether to verify timestamps after creating them.",
|
||||||
|
"type": "bool"
|
||||||
|
}
|
||||||
|
},
|
||||||
"description": """
|
"description": """
|
||||||
|
Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time.
|
||||||
|
|
||||||
|
### Features
|
||||||
|
- Creates cryptographic timestamp proofs that link files to the Bitcoin blockchain
|
||||||
|
- Verifies existing timestamp proofs to confirm the time a file existed
|
||||||
|
- Uses multiple calendar servers to ensure reliability and redundancy
|
||||||
|
- Stores timestamp proofs alongside original files for future verification
|
||||||
|
|
||||||
|
### Notes
|
||||||
|
- Can work offline to create timestamp proofs that can be upgraded later
|
||||||
|
- Verification checks if timestamps have been confirmed in the Bitcoin blockchain
|
||||||
|
- Should run after files have been archived and hashed
|
||||||
"""
|
"""
|
||||||
}
|
}
|
|
@ -0,0 +1,174 @@
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
from importlib.metadata import version
|
||||||
|
|
||||||
|
from slugify import slugify
|
||||||
|
from loguru import logger
|
||||||
|
import opentimestamps
|
||||||
|
from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST
|
||||||
|
from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile
|
||||||
|
from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation
|
||||||
|
from auto_archiver.core import Enricher
|
||||||
|
from auto_archiver.core import Metadata, Media
|
||||||
|
from auto_archiver.version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
class OpentimestampsEnricher(Enricher):
|
||||||
|
"""
|
||||||
|
Uses OpenTimestamps to create and verify timestamps for files. OpenTimestamps is a service that
|
||||||
|
timestamps data using the Bitcoin blockchain, providing a decentralized and secure way to prove
|
||||||
|
that data existed at a certain point in time.
|
||||||
|
|
||||||
|
The enricher hashes files in the archive and creates timestamp proofs that can later be verified.
|
||||||
|
These proofs are stored alongside the original files and can be used to verify the timestamp
|
||||||
|
even if the OpenTimestamps calendar servers are unavailable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def setup(self):
|
||||||
|
# Initialize any resources needed
|
||||||
|
pass
|
||||||
|
|
||||||
|
def cleanup(self) -> None:
|
||||||
|
# Clean up any resources used
|
||||||
|
pass
|
||||||
|
|
||||||
|
def enrich(self, to_enrich: Metadata) -> None:
|
||||||
|
url = to_enrich.get_url()
|
||||||
|
logger.debug(f"OpenTimestamps timestamping files for {url=}")
|
||||||
|
|
||||||
|
# Get the media files to timestamp
|
||||||
|
media_files = [m for m in to_enrich.media if m.get("filename") and not m.get("opentimestamps")]
|
||||||
|
|
||||||
|
if not media_files:
|
||||||
|
logger.warning(f"No files found to timestamp in {url=}")
|
||||||
|
return
|
||||||
|
|
||||||
|
timestamp_files = []
|
||||||
|
for media in media_files:
|
||||||
|
try:
|
||||||
|
# Get the file path from the media
|
||||||
|
file_path = media.get("filename")
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logger.warning(f"File not found: {file_path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create timestamp for the file
|
||||||
|
logger.debug(f"Creating timestamp for {file_path}")
|
||||||
|
|
||||||
|
# Hash the file
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
file_bytes = f.read()
|
||||||
|
file_hash = hashlib.sha256(file_bytes).digest()
|
||||||
|
|
||||||
|
# Create a timestamp with the file hash
|
||||||
|
timestamp = Timestamp(file_hash)
|
||||||
|
|
||||||
|
# Create a detached timestamp file with the timestamp
|
||||||
|
detached_timestamp = DetachedTimestampFile(timestamp)
|
||||||
|
|
||||||
|
# Submit to calendar servers
|
||||||
|
if self.use_calendars:
|
||||||
|
logger.debug(f"Submitting timestamp to calendar servers for {file_path}")
|
||||||
|
calendars = []
|
||||||
|
whitelist = DEFAULT_CALENDAR_WHITELIST
|
||||||
|
|
||||||
|
if self.calendar_whitelist:
|
||||||
|
whitelist = set(self.calendar_whitelist)
|
||||||
|
|
||||||
|
# Create calendar instances
|
||||||
|
for url in self.calendar_urls:
|
||||||
|
if url in whitelist:
|
||||||
|
calendars.append(RemoteCalendar(url))
|
||||||
|
|
||||||
|
# Submit the hash to each calendar
|
||||||
|
for calendar in calendars:
|
||||||
|
try:
|
||||||
|
calendar_timestamp = calendar.submit(file_hash)
|
||||||
|
timestamp.merge(calendar_timestamp)
|
||||||
|
logger.debug(f"Successfully submitted to calendar: {calendar.url}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to submit to calendar {calendar.url}: {e}")
|
||||||
|
else:
|
||||||
|
logger.info("Skipping calendar submission as per configuration")
|
||||||
|
|
||||||
|
# Save the timestamp proof to a file
|
||||||
|
timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots")
|
||||||
|
with open(timestamp_path, 'wb') as f:
|
||||||
|
detached_timestamp.serialize(f)
|
||||||
|
|
||||||
|
# Create media for the timestamp file
|
||||||
|
timestamp_media = Media(filename=timestamp_path)
|
||||||
|
timestamp_media.set("source_file", os.path.basename(file_path))
|
||||||
|
timestamp_media.set("opentimestamps_version", opentimestamps.__version__)
|
||||||
|
|
||||||
|
# Verify the timestamp if needed
|
||||||
|
if self.verify_timestamps:
|
||||||
|
verification_info = self.verify_timestamp(detached_timestamp)
|
||||||
|
for key, value in verification_info.items():
|
||||||
|
timestamp_media.set(key, value)
|
||||||
|
|
||||||
|
timestamp_files.append(timestamp_media)
|
||||||
|
|
||||||
|
# Update the original media to indicate it's been timestamped
|
||||||
|
media.set("opentimestamps", True)
|
||||||
|
media.set("opentimestamp_file", timestamp_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error while timestamping {media.get('filename')}: {e}")
|
||||||
|
|
||||||
|
# Add timestamp files to the metadata
|
||||||
|
if timestamp_files:
|
||||||
|
for ts_media in timestamp_files:
|
||||||
|
to_enrich.add_media(ts_media)
|
||||||
|
|
||||||
|
to_enrich.set("opentimestamped", True)
|
||||||
|
to_enrich.set("opentimestamps_count", len(timestamp_files))
|
||||||
|
logger.success(f"{len(timestamp_files)} OpenTimestamps proofs created for {url=}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No successful timestamps created for {url=}")
|
||||||
|
|
||||||
|
def verify_timestamp(self, detached_timestamp):
|
||||||
|
"""
|
||||||
|
Verify a timestamp and extract verification information.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
detached_timestamp: The detached timestamp to verify.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Information about the verification result.
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
# Check if we have attestations
|
||||||
|
attestations = list(detached_timestamp.timestamp.all_attestations())
|
||||||
|
result["attestation_count"] = len(attestations)
|
||||||
|
|
||||||
|
if attestations:
|
||||||
|
attestation_info = []
|
||||||
|
for msg, attestation in attestations:
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
# Process different types of attestations
|
||||||
|
if isinstance(attestation, PendingAttestation):
|
||||||
|
info["type"] = "pending"
|
||||||
|
info["uri"] = attestation.uri.decode('utf-8')
|
||||||
|
|
||||||
|
elif isinstance(attestation, BitcoinBlockHeaderAttestation):
|
||||||
|
info["type"] = "bitcoin"
|
||||||
|
info["block_height"] = attestation.height
|
||||||
|
|
||||||
|
attestation_info.append(info)
|
||||||
|
|
||||||
|
result["attestations"] = attestation_info
|
||||||
|
|
||||||
|
# For at least one confirmed attestation
|
||||||
|
if any(a.get("type") == "bitcoin" for a in attestation_info):
|
||||||
|
result["verified"] = True
|
||||||
|
else:
|
||||||
|
result["verified"] = False
|
||||||
|
result["pending"] = True
|
||||||
|
else:
|
||||||
|
result["verified"] = False
|
||||||
|
result["pending"] = False
|
||||||
|
|
||||||
|
return result
|
Ładowanie…
Reference in New Issue