Merge pull request #247 from bellingcat/opentimestamps

Opentimestamps Module
This commit is contained in:
Patrick Robertson
2025-03-14 13:41:46 +00:00
committed by GitHub
10 changed files with 879 additions and 342 deletions

View File

@@ -7,7 +7,7 @@ by handling user configuration, validating the steps properties, and implementin
from __future__ import annotations
from dataclasses import dataclass
from typing import List, TYPE_CHECKING
from typing import List, TYPE_CHECKING, Type
import shutil
import ast
import copy
@@ -60,7 +60,7 @@ class ModuleFactory:
HAS_SETUP_PATHS = True
def get_module(self, module_name: str, config: dict) -> BaseModule:
def get_module(self, module_name: str, config: dict) -> Type[BaseModule]:
"""
Gets and sets up a module using the provided config

View File

@@ -19,7 +19,7 @@
},
"filename_generator": {
"default": "static",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a hash, with the settings of the 'hash_enricher' module (defaults to SHA256 if not enabled).",
"choices": ["random", "static"],
},
"root_folder_id": {

View File

@@ -13,7 +13,7 @@
},
"filename_generator": {
"default": "static",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a hash, with the settings of the 'hash_enricher' module (defaults to SHA256 if not enabled)",
"choices": ["random", "static"],
},
"save_to": {"default": "./local_archive", "help": "folder where to save archived content"},

View File

@@ -0,0 +1,100 @@
{
"name": "OpenTimestamps Enricher",
"type": ["enricher"],
"requires_setup": True,
"dependencies": {
"python": [
"loguru",
"opentimestamps",
],
},
"configs": {
"calendar_urls": {
"default": [
"https://alice.btc.calendar.opentimestamps.org",
"https://bob.btc.calendar.opentimestamps.org",
"https://finney.calendar.eternitywall.com",
# "https://ots.btc.catallaxy.com/", # ipv4 only
],
"help": "List of OpenTimestamps calendar servers to use for timestamping. See here for a list of calendars maintained by opentimestamps:\
https://opentimestamps.org/#calendars",
"type": "list",
},
"calendar_whitelist": {
"default": [],
"help": "Optional whitelist of calendar servers. Override this if you are using your own calendar servers. e.g. ['https://mycalendar.com']",
"type": "list",
},
},
"description": """
Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time.
Uses OpenTimestamps a service that timestamps data using the Bitcoin blockchain, providing a decentralized
and secure way to prove that data existed at a certain point in time. A SHA256 hash of the file to be timestamped is used as the token
and sent to each of the 'timestamp calendars' for inclusion in the blockchain. The proof is then saved alongside the original file in a file with
the '.ots' extension.
### Features
- Creates cryptographic timestamp proofs that link files to the Bitcoin
- Verifies timestamp proofs have been submitted to the blockchain (note: does not confirm they have been *added*)
- Can use multiple calendar servers to ensure reliability and redundancy
- Stores timestamp proofs alongside original files for future verification
### Timestamp status
An opentimestamp, when submitted to a timestmap server will have a 'pending' status (Pending Attestation) as it waits to be added
to the blockchain. Once it has been added to the blockchain, it will have a 'confirmed' status (Bitcoin Block Timestamp).
This process typically takes several hours, depending on the calendar server and the current state of the Bitcoin network. As such,
the status of all timestamps added will be 'pending' until they are subsequently confirmed (see 'Upgrading Timestamps' below).
There are two possible statuses for a timestamp:
- `Pending`: The timestamp has been submitted to the calendar server but has not yet been confirmed in the Bitcoin blockchain.
- `Confirmed`: The timestamp has been confirmed in the Bitcoin blockchain.
### Upgrading Timestamps
To upgrade a timestamp from 'pending' to 'confirmed', you can use the `ots upgrade` command from the opentimestamps-client package
(install it with `pip install opentimesptamps-client`).
Example: `ots upgrade my_file.ots`
Here is a useful script that could be used to upgrade all timestamps in a directory, which could be run on a cron job:
```{code} bash
find . -name "*.ots" -type f | while read file; do
echo "Upgrading OTS $file"
ots upgrade $file
done
# The result might look like:
# Upgrading OTS ./my_file.ots
# Got 1 attestation(s) from https://alice.btc.calendar.opentimestamps.org
# Success! Timestamp complete
```
```{note} Note: this will only upgrade the .ots files, and will not change the status text in any output .html files or any databases where the
metadata is stored (e.g. Google Sheets, CSV database, API database etc.).
```
### Verifying Timestamps
The easiest way to verify a timestamp (ots) file is to install the opentimestamps-client command line tool and use the `ots verify` command.
Example: `ots verify my_file.ots`
```{code} bash
$ ots verify my_file.ots
Calendar https://bob.btc.calendar.opentimestamps.org: Pending confirmation in Bitcoin blockchain
Calendar https://finney.calendar.eternitywall.com: Pending confirmation in Bitcoin blockchain
Calendar https://alice.btc.calendar.opentimestamps.org: Timestamped by transaction 12345; waiting for 6 confirmations
```
Note: if you're using a storage with `filename_generator` set to `static` or `random`, the files will be renamed when they are saved to the
final location meaning you will need to specify the original filename when verifying the timestamp with `ots verify -f original_filename my_file.ots`.
### Choosing Calendar Servers
By default, the OpenTimestamps enricher uses a set of public calendar servers provided by the 'opentimestamps' project.
You can customize the list of calendar servers by providing URLs in the `calendar_urls` configuration option.
### Calendar WhiteList
By default, the opentimestamps package only allows their own calendars to be used (see `DEFAULT_CALENDAR_WHITELIST` in `opentimestamps.calendar`),
if you want to use your own calendars, then you can override this setting in the `calendar_whitelist` configuration option.
""",
}

View File

@@ -0,0 +1,172 @@
import os
from loguru import logger
import opentimestamps
from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST
from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile
from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation
from opentimestamps.core.op import OpSHA256
from opentimestamps.core import serialize
from auto_archiver.core import Enricher
from auto_archiver.core import Metadata, Media
from auto_archiver.utils.misc import get_current_timestamp
class OpentimestampsEnricher(Enricher):
def enrich(self, to_enrich: Metadata) -> None:
url = to_enrich.get_url()
logger.debug(f"OpenTimestamps timestamping files for {url=}")
# Get the media files to timestamp
media_files = [m for m in to_enrich.media if m.filename and not m.get("opentimestamps")]
if not media_files:
logger.warning(f"No files found to timestamp in {url=}")
return
timestamp_files = []
for media in media_files:
try:
# Get the file path from the media
file_path = media.filename
if not os.path.exists(file_path):
logger.warning(f"File not found: {file_path}")
continue
# Create timestamp for the file - hash is SHA256
# Note: hash is hard-coded to SHA256 and does not use hash_enricher to set it.
# SHA256 is the recommended hash, ref: https://github.com/bellingcat/auto-archiver/pull/247#discussion_r1992433181
logger.debug(f"Creating timestamp for {file_path}")
file_hash = None
with open(file_path, "rb") as f:
file_hash = OpSHA256().hash_fd(f)
if not file_hash:
logger.warning(f"Failed to hash file for timestamping, skipping: {file_path}")
continue
# Create a timestamp with the file hash
timestamp = Timestamp(file_hash)
# Create a detached timestamp file with the hash operation and timestamp
detached_timestamp = DetachedTimestampFile(OpSHA256(), timestamp)
# Submit to calendar servers
submitted_to_calendar = False
logger.debug(f"Submitting timestamp to calendar servers for {file_path}")
calendars = []
whitelist = DEFAULT_CALENDAR_WHITELIST
if self.calendar_whitelist:
whitelist = set(self.calendar_whitelist)
# Create calendar instances
calendar_urls = []
for url in self.calendar_urls:
if url in whitelist:
calendars.append(RemoteCalendar(url))
calendar_urls.append(url)
# Submit the hash to each calendar
for calendar in calendars:
try:
calendar_timestamp = calendar.submit(file_hash)
timestamp.merge(calendar_timestamp)
logger.debug(f"Successfully submitted to calendar: {calendar.url}")
submitted_to_calendar = True
except Exception as e:
logger.warning(f"Failed to submit to calendar {calendar.url}: {e}")
# If all calendar submissions failed, add pending attestations
if not submitted_to_calendar and not timestamp.attestations:
logger.error(
f"Failed to submit to any calendar for {file_path}. **This file will not be timestamped.**"
)
media.set("opentimestamps", False)
continue
# Save the timestamp proof to a file
timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots")
try:
with open(timestamp_path, "wb") as f:
# Create a serialization context and write to the file
ctx = serialize.BytesSerializationContext()
detached_timestamp.serialize(ctx)
f.write(ctx.getbytes())
except Exception as e:
logger.warning(f"Failed to serialize timestamp file: {e}")
continue
# Create media for the timestamp file
timestamp_media = Media(filename=timestamp_path)
# explicitly set the mimetype, normally .ots files are 'application/vnd.oasis.opendocument.spreadsheet-template'
timestamp_media.mimetype = "application/vnd.opentimestamps"
timestamp_media.set("opentimestamps_version", opentimestamps.__version__)
verification_info = self.verify_timestamp(detached_timestamp)
for key, value in verification_info.items():
timestamp_media.set(key, value)
media.set("opentimestamp_files", [timestamp_media])
timestamp_files.append(timestamp_media.filename)
# Update the original media to indicate it's been timestamped
media.set("opentimestamps", True)
except Exception as e:
logger.warning(f"Error while timestamping {media.filename}: {e}")
# Add timestamp files to the metadata
if timestamp_files:
to_enrich.set("opentimestamped", True)
to_enrich.set("opentimestamps_count", len(timestamp_files))
logger.success(f"{len(timestamp_files)} OpenTimestamps proofs created for {url=}")
else:
to_enrich.set("opentimestamped", False)
logger.warning(f"No successful timestamps created for {url=}")
def verify_timestamp(self, detached_timestamp):
"""
Verify a timestamp and extract verification information.
Args:
detached_timestamp: The detached timestamp to verify.
Returns:
dict: Information about the verification result.
"""
result = {}
# Check if we have attestations
attestations = list(detached_timestamp.timestamp.all_attestations())
result["attestation_count"] = len(attestations)
if attestations:
attestation_info = []
for msg, attestation in attestations:
info = {}
# Process different types of attestations
if isinstance(attestation, PendingAttestation):
info["status"] = "pending"
info["uri"] = attestation.uri
elif isinstance(attestation, BitcoinBlockHeaderAttestation):
info["status"] = "confirmed"
info["block_height"] = attestation.height
info["last_check"] = get_current_timestamp()
attestation_info.append(info)
result["attestations"] = attestation_info
# For at least one confirmed attestation
if any("confirmed" in a.get("status") for a in attestation_info):
result["verified"] = True
else:
result["verified"] = False
else:
result["verified"] = False
result["last_updated"] = get_current_timestamp()
return result

View File

@@ -13,7 +13,7 @@
},
"filename_generator": {
"default": "static",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a hash, with the settings of the 'hash_enricher' module (defaults to SHA256 if not enabled).",
"choices": ["random", "static"],
},
"bucket": {"default": None, "help": "S3 bucket name"},