Finish off timestamping module

This commit is contained in:
Patrick Robertson
2025-03-12 10:24:57 +00:00
parent 28041d94d9
commit 1423c10363
7 changed files with 130 additions and 63 deletions

View File

@@ -6,7 +6,6 @@
"python": [
"loguru",
"opentimestamps",
"slugify",
],
},
"configs": {
@@ -19,14 +18,16 @@
"default": [
"https://alice.btc.calendar.opentimestamps.org",
"https://bob.btc.calendar.opentimestamps.org",
"https://finney.calendar.eternitywall.com"
"https://finney.calendar.eternitywall.com",
# "https://ots.btc.catallaxy.com/", # ipv4 only
],
"help": "List of OpenTimestamps calendar servers to use for timestamping.",
"help": "List of OpenTimestamps calendar servers to use for timestamping. See here for a list of calendars maintained by opentimestamps:\
https://opentimestamps.org/#calendars",
"type": "list"
},
"calendar_whitelist": {
"default": [],
"help": "Optional whitelist of calendar servers. If empty, all calendar servers are allowed.",
"help": "Optional whitelist of calendar servers. Override this if you are using your own calendar servers. e.g. ['https://mycalendar.com']",
"type": "list"
},
"verify_timestamps": {
@@ -38,6 +39,9 @@
"description": """
Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time.
Uses OpenTimestamps a service that timestamps data using the Bitcoin blockchain, providing a decentralized
and secure way to prove that data existed at a certain point in time.
### Features
- Creates cryptographic timestamp proofs that link files to the Bitcoin blockchain
- Verifies existing timestamp proofs to confirm the time a file existed

View File

@@ -1,36 +1,19 @@
import os
import hashlib
from importlib.metadata import version
from typing import TYPE_CHECKING
from slugify import slugify
from loguru import logger
import opentimestamps
from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST
from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile
from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation
from opentimestamps.core.op import OpSHA256
from opentimestamps.core import serialize
from auto_archiver.core import Enricher
from auto_archiver.core import Metadata, Media
from auto_archiver.version import __version__
from auto_archiver.utils.misc import calculate_file_hash
class OpentimestampsEnricher(Enricher):
"""
Uses OpenTimestamps to create and verify timestamps for files. OpenTimestamps is a service that
timestamps data using the Bitcoin blockchain, providing a decentralized and secure way to prove
that data existed at a certain point in time.
The enricher hashes files in the archive and creates timestamp proofs that can later be verified.
These proofs are stored alongside the original files and can be used to verify the timestamp
even if the OpenTimestamps calendar servers are unavailable.
"""
def setup(self):
# Initialize any resources needed
pass
def cleanup(self) -> None:
# Clean up any resources used
pass
def enrich(self, to_enrich: Metadata) -> None:
url = to_enrich.get_url()
@@ -38,7 +21,7 @@ class OpentimestampsEnricher(Enricher):
# Get the media files to timestamp
media_files = [m for m in to_enrich.media if m.get("filename") and not m.get("opentimestamps")]
if not media_files:
logger.warning(f"No files found to timestamp in {url=}")
return
@@ -52,21 +35,26 @@ class OpentimestampsEnricher(Enricher):
logger.warning(f"File not found: {file_path}")
continue
# Create timestamp for the file
# Create timestamp for the file - hash is SHA256
# Note: ONLY SHA256 is used/supported here. Opentimestamps supports other hashes, but not SHA3-512
# see opentimestamps.core.op
logger.debug(f"Creating timestamp for {file_path}")
# Hash the file
file_hash = None
with open(file_path, 'rb') as f:
file_bytes = f.read()
file_hash = hashlib.sha256(file_bytes).digest()
file_hash = OpSHA256().hash_fd(f)
if not file_hash:
logger.warning(f"Failed to hash file for timestamping, skipping: {file_path}")
continue
# Create a timestamp with the file hash
timestamp = Timestamp(file_hash)
# Create a detached timestamp file with the timestamp
detached_timestamp = DetachedTimestampFile(timestamp)
# Create a detached timestamp file with the hash operation and timestamp
detached_timestamp = DetachedTimestampFile(OpSHA256(), timestamp)
# Submit to calendar servers
submitted_to_calendar = False
if self.use_calendars:
logger.debug(f"Submitting timestamp to calendar servers for {file_path}")
calendars = []
@@ -76,9 +64,11 @@ class OpentimestampsEnricher(Enricher):
whitelist = set(self.calendar_whitelist)
# Create calendar instances
calendar_urls = []
for url in self.calendar_urls:
if url in whitelist:
calendars.append(RemoteCalendar(url))
calendar_urls.append(url)
# Submit the hash to each calendar
for calendar in calendars:
@@ -86,15 +76,35 @@ class OpentimestampsEnricher(Enricher):
calendar_timestamp = calendar.submit(file_hash)
timestamp.merge(calendar_timestamp)
logger.debug(f"Successfully submitted to calendar: {calendar.url}")
submitted_to_calendar = True
except Exception as e:
logger.warning(f"Failed to submit to calendar {calendar.url}: {e}")
# If all calendar submissions failed, add pending attestations
if not submitted_to_calendar and not timestamp.attestations:
logger.info("All calendar submissions failed, creating pending attestations")
for url in calendar_urls:
pending = PendingAttestation(url)
timestamp.attestations.add(pending)
else:
logger.info("Skipping calendar submission as per configuration")
# Add dummy pending attestation for testing when calendars are disabled
for url in self.calendar_urls:
pending = PendingAttestation(url)
timestamp.attestations.add(pending)
# Save the timestamp proof to a file
timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots")
with open(timestamp_path, 'wb') as f:
detached_timestamp.serialize(f)
try:
with open(timestamp_path, 'wb') as f:
# Create a serialization context and write to the file
ctx = serialize.BytesSerializationContext()
detached_timestamp.serialize(ctx)
f.write(ctx.getbytes())
except Exception as e:
logger.warning(f"Failed to serialize timestamp file: {e}")
continue
# Create media for the timestamp file
timestamp_media = Media(filename=timestamp_path)
@@ -106,6 +116,8 @@ class OpentimestampsEnricher(Enricher):
verification_info = self.verify_timestamp(detached_timestamp)
for key, value in verification_info.items():
timestamp_media.set(key, value)
else:
logger.warning(f"Not verifying the timestamp for media file {file_path}")
timestamp_files.append(timestamp_media)
@@ -151,7 +163,7 @@ class OpentimestampsEnricher(Enricher):
# Process different types of attestations
if isinstance(attestation, PendingAttestation):
info["type"] = "pending"
info["uri"] = attestation.uri.decode('utf-8')
info["uri"] = attestation.uri
elif isinstance(attestation, BitcoinBlockHeaderAttestation):
info["type"] = "bitcoin"

View File

@@ -30,7 +30,7 @@ class TimestampingEnricher(Enricher):
if not len(hashes):
logger.warning(f"No hashes found in {url=}")
return
tmp_dir = self.tmp_dir
hashes_fn = os.path.join(tmp_dir, "hashes.txt")