From e7489ac4c41fb28270e8738cc13bcc129234a5b7 Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Thu, 13 Mar 2025 14:30:33 +0000 Subject: [PATCH] Tidy up opentimestamps * Simplify * Don't add fake (pending) attestations if the calendar urls all have issues * Remove unnecessary configs * Improve docs on upgrading + verifying --- .../opentimestamps_enricher/__manifest__.py | 79 +++++++++++----- .../opentimestamps_enricher.py | 92 +++++++++---------- .../enrichers/test_opentimestamps_enricher.py | 43 ++------- 3 files changed, 106 insertions(+), 108 deletions(-) diff --git a/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py b/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py index ff038e1..733ff1a 100644 --- a/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py +++ b/src/auto_archiver/modules/opentimestamps_enricher/__manifest__.py @@ -9,11 +9,6 @@ ], }, "configs": { - "use_calendars": { - "default": True, - "help": "Whether to connect to OpenTimestamps calendar servers to create timestamps. If false, creates local timestamp proofs only.", - "type": "bool" - }, "calendar_urls": { "default": [ "https://alice.btc.calendar.opentimestamps.org", @@ -30,34 +25,76 @@ https://opentimestamps.org/#calendars", "help": "Optional whitelist of calendar servers. Override this if you are using your own calendar servers. e.g. ['https://mycalendar.com']", "type": "list" }, - "verify_timestamps": { - "default": True, - "help": "Whether to verify timestamps after creating them.", - "type": "bool" - } }, "description": """ Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time. Uses OpenTimestamps – a service that timestamps data using the Bitcoin blockchain, providing a decentralized - and secure way to prove that data existed at a certain point in time. + and secure way to prove that data existed at a certain point in time. A SHA256 hash of the file to be timestamped is used as the token + and sent to each of the 'timestamp calendars' for inclusion in the blockchain. The proof is then saved alongside the original file in a file with + the '.ots' extension. ### Features - - Creates cryptographic timestamp proofs that link files to the Bitcoin blockchain - - Verifies existing timestamp proofs to confirm the time a file existed - - Uses multiple calendar servers to ensure reliability and redundancy + - Creates cryptographic timestamp proofs that link files to the Bitcoin or Litecoin blockchain + - Verifies timestamp proofs have been submitted to the blockchain (note: does not confirm they have been *added*) + - Can use multiple calendar servers to ensure reliability and redundancy - Stores timestamp proofs alongside original files for future verification - ### Notes - - Can work offline to create timestamp proofs that can be upgraded later - - Verification checks if timestamps have been confirmed in the Bitcoin blockchain - - Should run after files have been archived and hashed + ### Timestamp status + An opentimestamp, when submitted to a timestmap server will have a 'pending' status (Pending Attestation) as it waits to be added + to the blockchain. Once it has been added to the blockchain, it will have a 'confirmed' status (Bitcoin Block Timestamp). + This process typically takes several hours, depending on the calendar server and the current state of the Bitcoin network. As such, + the status of all timestamps added will be 'pending' until they are subsequently confirmed (see 'Upgrading Timestamps' below). - ### Verifying Timestamps Later - If you wish to verify a timestamp (ots) file later, you can install the opentimestamps-client command line tool and use the `ots verify` command. + There are two possible statuses for a timestamp: + - `Pending`: The timestamp has been submitted to the calendar server but has not yet been confirmed in the Bitcoin blockchain. + - `Confirmed`: The timestamp has been confirmed in the Bitcoin or Litecoin blockchain. + + ### Upgrading Timestamps + To upgrade a timestamp from 'pending' to 'confirmed', you can use the `ots upgrade` command from the opentimestamps-client package + (install it with `pip install opentimesptamps-client`). + Example: `ots upgrade my_file.ots` + + Here is a useful script that could be used to upgrade all timestamps in a directory, which could be run on a cron job: +```{code} bash +find . -name "*.ots" -type f | while read file; do + echo "Upgrading OTS $file" + ots upgrade $file +done +# The result might look like: +# Upgrading OTS ./my_file.ots +# Got 1 attestation(s) from https://alice.btc.calendar.opentimestamps.org +# Success! Timestamp complete +``` + +```{note} Note: this will only upgrade the .ots files, and will not change the status text in any output .html files or any databases where the +metadata is stored (e.g. Google Sheets, CSV database, API database etc.). +``` + + ### Verifying Timestamps + The easiest way to verify a timestamp (ots) file is to install the opentimestamps-client command line tool and use the `ots verify` command. Example: `ots verify my_file.ots` - Note: if you're using local storage with a filename_generator set to 'static' (a hash) or random, the files will be renamed when they are saved to the + ```{code} bash +$ ots verify my_file.ots +Calendar https://bob.btc.calendar.opentimestamps.org: Pending confirmation in Bitcoin blockchain +Calendar https://finney.calendar.eternitywall.com: Pending confirmation in Bitcoin blockchain +Calendar https://alice.btc.calendar.opentimestamps.org: Timestamped by transaction 12345; waiting for 6 confirmations +``` + + Note: if you're using a storage with `filename_generator` set to `static` or `random`, the files will be renamed when they are saved to the final location meaning you will need to specify the original filename when verifying the timestamp with `ots verify -f original_filename my_file.ots`. + + ### Choosing Calendar Servers + + By default, the OpenTimestamps enricher uses a set of public calendar servers provided by the 'opentimestamps' project. + You can customize the list of calendar servers by providing URLs in the `calendar_urls` configuration option. + + ### Calendar WhiteList + + By default, the opentimestamps package only allows their own calendars to be used (see `DEFAULT_CALENDAR_WHITELIST` in `opentimestamps.calendar`), + if you want to use your own calendars, then you can override this setting in the `calendar_whitelist` configuration option. + + """ } \ No newline at end of file diff --git a/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py b/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py index cf110a2..d6e8add 100644 --- a/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py +++ b/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py @@ -5,7 +5,7 @@ from loguru import logger import opentimestamps from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile -from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation +from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation, LitecoinBlockHeaderAttestation from opentimestamps.core.op import OpSHA256 from opentimestamps.core import serialize from auto_archiver.core import Enricher @@ -53,44 +53,36 @@ class OpentimestampsEnricher(Enricher): # Submit to calendar servers submitted_to_calendar = False - if self.use_calendars: - logger.debug(f"Submitting timestamp to calendar servers for {file_path}") - calendars = [] - whitelist = DEFAULT_CALENDAR_WHITELIST - - if self.calendar_whitelist: - whitelist = set(self.calendar_whitelist) - - # Create calendar instances - calendar_urls = [] - for url in self.calendar_urls: - if url in whitelist: - calendars.append(RemoteCalendar(url)) - calendar_urls.append(url) - - # Submit the hash to each calendar - for calendar in calendars: - try: - calendar_timestamp = calendar.submit(file_hash) - timestamp.merge(calendar_timestamp) - logger.debug(f"Successfully submitted to calendar: {calendar.url}") - submitted_to_calendar = True - except Exception as e: - logger.warning(f"Failed to submit to calendar {calendar.url}: {e}") - - # If all calendar submissions failed, add pending attestations - if not submitted_to_calendar and not timestamp.attestations: - logger.info("All calendar submissions failed, creating pending attestations") - for url in calendar_urls: - pending = PendingAttestation(url) - timestamp.attestations.add(pending) - else: - logger.info("Skipping calendar submission as per configuration") - - # Add dummy pending attestation for testing when calendars are disabled - for url in self.calendar_urls: - pending = PendingAttestation(url) - timestamp.attestations.add(pending) + + logger.debug(f"Submitting timestamp to calendar servers for {file_path}") + calendars = [] + whitelist = DEFAULT_CALENDAR_WHITELIST + + if self.calendar_whitelist: + whitelist = set(self.calendar_whitelist) + + # Create calendar instances + calendar_urls = [] + for url in self.calendar_urls: + if url in whitelist: + calendars.append(RemoteCalendar(url)) + calendar_urls.append(url) + + # Submit the hash to each calendar + for calendar in calendars: + try: + calendar_timestamp = calendar.submit(file_hash) + timestamp.merge(calendar_timestamp) + logger.debug(f"Successfully submitted to calendar: {calendar.url}") + submitted_to_calendar = True + except Exception as e: + logger.warning(f"Failed to submit to calendar {calendar.url}: {e}") + + # If all calendar submissions failed, add pending attestations + if not submitted_to_calendar and not timestamp.attestations: + logger.error(f"Failed to submit to any calendar for {file_path}. **This file will not be timestamped.**") + media.set("opentimestamps", False) + continue # Save the timestamp proof to a file timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots") @@ -110,13 +102,9 @@ class OpentimestampsEnricher(Enricher): timestamp_media.mimetype = "application/vnd.opentimestamps" timestamp_media.set("opentimestamps_version", opentimestamps.__version__) - # Verify the timestamp if needed - if self.verify_timestamps: - verification_info = self.verify_timestamp(detached_timestamp) - for key, value in verification_info.items(): - timestamp_media.set(key, value) - else: - logger.warning(f"Not verifying the timestamp for media file {file_path}") + verification_info = self.verify_timestamp(detached_timestamp) + for key, value in verification_info.items(): + timestamp_media.set(key, value) media.set("opentimestamp_files", [timestamp_media]) timestamp_files.append(timestamp_media.filename) @@ -132,6 +120,7 @@ class OpentimestampsEnricher(Enricher): to_enrich.set("opentimestamps_count", len(timestamp_files)) logger.success(f"{len(timestamp_files)} OpenTimestamps proofs created for {url=}") else: + to_enrich.set("opentimestamped", False) logger.warning(f"No successful timestamps created for {url=}") def verify_timestamp(self, detached_timestamp): @@ -157,11 +146,14 @@ class OpentimestampsEnricher(Enricher): # Process different types of attestations if isinstance(attestation, PendingAttestation): - info["type"] = f"pending" + info["status"] = "pending" info["uri"] = attestation.uri elif isinstance(attestation, BitcoinBlockHeaderAttestation): - info["type"] = "bitcoin" + info["status"] = "confirmed - bitcoin" + info["block_height"] = attestation.height + elif isinstance(attestation, LitecoinBlockHeaderAttestation): + info["status"] = "confirmed - litecoin" info["block_height"] = attestation.height info["last_check"] = datetime.datetime.now().isoformat()[:-7] @@ -171,14 +163,12 @@ class OpentimestampsEnricher(Enricher): result["attestations"] = attestation_info # For at least one confirmed attestation - if any(a.get("type") == "bitcoin" for a in attestation_info): + if any("confirmed" in a.get("status") for a in attestation_info): result["verified"] = True else: result["verified"] = False - result["pending"] = True else: result["verified"] = False - result["pending"] = False result["last_updated"] = datetime.datetime.now().isoformat()[:-7] return result \ No newline at end of file diff --git a/tests/enrichers/test_opentimestamps_enricher.py b/tests/enrichers/test_opentimestamps_enricher.py index 391fb06..2cdefdf 100644 --- a/tests/enrichers/test_opentimestamps_enricher.py +++ b/tests/enrichers/test_opentimestamps_enricher.py @@ -109,12 +109,12 @@ def test_verify_timestamp(setup_module, detached_timestamp_file): assert len(verification_info["attestations"]) == 2 # Check attestation types - assertion_types = [a["type"] for a in verification_info["attestations"]] + assertion_types = [a["status"] for a in verification_info["attestations"]] assert "pending" in assertion_types - assert "bitcoin" in assertion_types + assert "confirmed - bitcoin" in assertion_types # Check Bitcoin attestation details - bitcoin_attestation = next(a for a in verification_info["attestations"] if a["type"] == "bitcoin") + bitcoin_attestation = next(a for a in verification_info["attestations"] if a["status"] == "confirmed - bitcoin") assert bitcoin_attestation["block_height"] == 783000 def test_verify_pending_only(setup_module, pending_timestamp_file): @@ -125,10 +125,9 @@ def test_verify_pending_only(setup_module, pending_timestamp_file): assert verification_info["attestation_count"] == 2 assert verification_info["verified"] == False - assert verification_info["pending"] == True # All attestations should be of type "pending" - assert all(a["type"] == "pending" for a in verification_info["attestations"]) + assert all(a["status"] == "pending" for a in verification_info["attestations"]) # Check URIs of pending attestations uris = [a["uri"] for a in verification_info["attestations"]] @@ -148,7 +147,7 @@ def test_verify_bitcoin_completed(setup_module, verified_timestamp_file): # Check that the attestation is a Bitcoin attestation attestation = verification_info["attestations"][0] - assert attestation["type"] == "bitcoin" + assert attestation["status"] == "confirmed - bitcoin" assert attestation["block_height"] == 783000 def test_full_enriching(setup_module, sample_file_path, sample_media, mocker): @@ -199,28 +198,6 @@ def test_full_enriching(setup_module, sample_file_path, sample_media, mocker): assert timestamp_media.get("verified") == True assert timestamp_media.get("attestation_count") == 1 -def test_full_enriching_no_calendars(setup_module, sample_file_path, sample_media, mocker): - ots = setup_module("opentimestamps_enricher", {"use_calendars": False}) - - # Create test metadata with sample file - metadata = Metadata().set_url("https://example.com") - sample_media.filename = sample_file_path - metadata.add_media(sample_media) - - # Run enrichment - ots.enrich(metadata) - - # Verify results - assert metadata.get("opentimestamped") == True - assert metadata.get("opentimestamps_count") == 1 - - timestamp_media = metadata.media[0].get("opentimestamp_files")[0] - - # Verify status should be false since we didn't use calendars - assert timestamp_media.get("verified") == False - # We expect 3 pending attestations (one for each calendar URL) - assert timestamp_media.get("attestation_count") == 3 - def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_media, mocker): """Test enrichment when calendar servers return errors""" # Mock the calendar submission to raise an exception @@ -239,14 +216,8 @@ def test_full_enriching_calendar_error(setup_module, sample_file_path, sample_me ots.enrich(metadata) # Verify results - assert metadata.get("opentimestamped") == True - assert metadata.get("opentimestamps_count") == 1 - - # Verify status should be false since calendar submissions failed - timestamp_media = metadata.media[0].get("opentimestamp_files")[0] - assert timestamp_media.get("verified") == False - # We expect 3 pending attestations (one for each calendar URL that's enabled by default in __manifest__) - assert timestamp_media.get("attestation_count") == 3 + assert metadata.get("opentimestamped") == False + assert metadata.get("opentimestamps_count") is None def test_no_files_to_stamp(setup_module): """Test enrichment with no files to timestamp"""