mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-13 05:38:29 +03:00
Merge pull request #247 from bellingcat/opentimestamps
Opentimestamps Module
This commit is contained in:
@@ -7,7 +7,7 @@ by handling user configuration, validating the steps properties, and implementin
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, TYPE_CHECKING
|
||||
from typing import List, TYPE_CHECKING, Type
|
||||
import shutil
|
||||
import ast
|
||||
import copy
|
||||
@@ -60,7 +60,7 @@ class ModuleFactory:
|
||||
|
||||
HAS_SETUP_PATHS = True
|
||||
|
||||
def get_module(self, module_name: str, config: dict) -> BaseModule:
|
||||
def get_module(self, module_name: str, config: dict) -> Type[BaseModule]:
|
||||
"""
|
||||
Gets and sets up a module using the provided config
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
},
|
||||
"filename_generator": {
|
||||
"default": "static",
|
||||
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
|
||||
"help": "how to name stored files: 'random' creates a random string; 'static' uses a hash, with the settings of the 'hash_enricher' module (defaults to SHA256 if not enabled).",
|
||||
"choices": ["random", "static"],
|
||||
},
|
||||
"root_folder_id": {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
},
|
||||
"filename_generator": {
|
||||
"default": "static",
|
||||
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
|
||||
"help": "how to name stored files: 'random' creates a random string; 'static' uses a hash, with the settings of the 'hash_enricher' module (defaults to SHA256 if not enabled)",
|
||||
"choices": ["random", "static"],
|
||||
},
|
||||
"save_to": {"default": "./local_archive", "help": "folder where to save archived content"},
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
{
|
||||
"name": "OpenTimestamps Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": True,
|
||||
"dependencies": {
|
||||
"python": [
|
||||
"loguru",
|
||||
"opentimestamps",
|
||||
],
|
||||
},
|
||||
"configs": {
|
||||
"calendar_urls": {
|
||||
"default": [
|
||||
"https://alice.btc.calendar.opentimestamps.org",
|
||||
"https://bob.btc.calendar.opentimestamps.org",
|
||||
"https://finney.calendar.eternitywall.com",
|
||||
# "https://ots.btc.catallaxy.com/", # ipv4 only
|
||||
],
|
||||
"help": "List of OpenTimestamps calendar servers to use for timestamping. See here for a list of calendars maintained by opentimestamps:\
|
||||
https://opentimestamps.org/#calendars",
|
||||
"type": "list",
|
||||
},
|
||||
"calendar_whitelist": {
|
||||
"default": [],
|
||||
"help": "Optional whitelist of calendar servers. Override this if you are using your own calendar servers. e.g. ['https://mycalendar.com']",
|
||||
"type": "list",
|
||||
},
|
||||
},
|
||||
"description": """
|
||||
Creates OpenTimestamps proofs for archived files, providing blockchain-backed evidence of file existence at a specific time.
|
||||
|
||||
Uses OpenTimestamps – a service that timestamps data using the Bitcoin blockchain, providing a decentralized
|
||||
and secure way to prove that data existed at a certain point in time. A SHA256 hash of the file to be timestamped is used as the token
|
||||
and sent to each of the 'timestamp calendars' for inclusion in the blockchain. The proof is then saved alongside the original file in a file with
|
||||
the '.ots' extension.
|
||||
|
||||
### Features
|
||||
- Creates cryptographic timestamp proofs that link files to the Bitcoin
|
||||
- Verifies timestamp proofs have been submitted to the blockchain (note: does not confirm they have been *added*)
|
||||
- Can use multiple calendar servers to ensure reliability and redundancy
|
||||
- Stores timestamp proofs alongside original files for future verification
|
||||
|
||||
### Timestamp status
|
||||
An opentimestamp, when submitted to a timestmap server will have a 'pending' status (Pending Attestation) as it waits to be added
|
||||
to the blockchain. Once it has been added to the blockchain, it will have a 'confirmed' status (Bitcoin Block Timestamp).
|
||||
This process typically takes several hours, depending on the calendar server and the current state of the Bitcoin network. As such,
|
||||
the status of all timestamps added will be 'pending' until they are subsequently confirmed (see 'Upgrading Timestamps' below).
|
||||
|
||||
There are two possible statuses for a timestamp:
|
||||
- `Pending`: The timestamp has been submitted to the calendar server but has not yet been confirmed in the Bitcoin blockchain.
|
||||
- `Confirmed`: The timestamp has been confirmed in the Bitcoin blockchain.
|
||||
|
||||
### Upgrading Timestamps
|
||||
To upgrade a timestamp from 'pending' to 'confirmed', you can use the `ots upgrade` command from the opentimestamps-client package
|
||||
(install it with `pip install opentimesptamps-client`).
|
||||
Example: `ots upgrade my_file.ots`
|
||||
|
||||
Here is a useful script that could be used to upgrade all timestamps in a directory, which could be run on a cron job:
|
||||
```{code} bash
|
||||
find . -name "*.ots" -type f | while read file; do
|
||||
echo "Upgrading OTS $file"
|
||||
ots upgrade $file
|
||||
done
|
||||
# The result might look like:
|
||||
# Upgrading OTS ./my_file.ots
|
||||
# Got 1 attestation(s) from https://alice.btc.calendar.opentimestamps.org
|
||||
# Success! Timestamp complete
|
||||
```
|
||||
|
||||
```{note} Note: this will only upgrade the .ots files, and will not change the status text in any output .html files or any databases where the
|
||||
metadata is stored (e.g. Google Sheets, CSV database, API database etc.).
|
||||
```
|
||||
|
||||
### Verifying Timestamps
|
||||
The easiest way to verify a timestamp (ots) file is to install the opentimestamps-client command line tool and use the `ots verify` command.
|
||||
Example: `ots verify my_file.ots`
|
||||
|
||||
```{code} bash
|
||||
$ ots verify my_file.ots
|
||||
Calendar https://bob.btc.calendar.opentimestamps.org: Pending confirmation in Bitcoin blockchain
|
||||
Calendar https://finney.calendar.eternitywall.com: Pending confirmation in Bitcoin blockchain
|
||||
Calendar https://alice.btc.calendar.opentimestamps.org: Timestamped by transaction 12345; waiting for 6 confirmations
|
||||
```
|
||||
|
||||
Note: if you're using a storage with `filename_generator` set to `static` or `random`, the files will be renamed when they are saved to the
|
||||
final location meaning you will need to specify the original filename when verifying the timestamp with `ots verify -f original_filename my_file.ots`.
|
||||
|
||||
### Choosing Calendar Servers
|
||||
|
||||
By default, the OpenTimestamps enricher uses a set of public calendar servers provided by the 'opentimestamps' project.
|
||||
You can customize the list of calendar servers by providing URLs in the `calendar_urls` configuration option.
|
||||
|
||||
### Calendar WhiteList
|
||||
|
||||
By default, the opentimestamps package only allows their own calendars to be used (see `DEFAULT_CALENDAR_WHITELIST` in `opentimestamps.calendar`),
|
||||
if you want to use your own calendars, then you can override this setting in the `calendar_whitelist` configuration option.
|
||||
|
||||
|
||||
""",
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
import os
|
||||
|
||||
from loguru import logger
|
||||
import opentimestamps
|
||||
from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST
|
||||
from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile
|
||||
from opentimestamps.core.notary import PendingAttestation, BitcoinBlockHeaderAttestation
|
||||
from opentimestamps.core.op import OpSHA256
|
||||
from opentimestamps.core import serialize
|
||||
from auto_archiver.core import Enricher
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.utils.misc import get_current_timestamp
|
||||
|
||||
|
||||
class OpentimestampsEnricher(Enricher):
|
||||
def enrich(self, to_enrich: Metadata) -> None:
|
||||
url = to_enrich.get_url()
|
||||
logger.debug(f"OpenTimestamps timestamping files for {url=}")
|
||||
|
||||
# Get the media files to timestamp
|
||||
media_files = [m for m in to_enrich.media if m.filename and not m.get("opentimestamps")]
|
||||
if not media_files:
|
||||
logger.warning(f"No files found to timestamp in {url=}")
|
||||
return
|
||||
|
||||
timestamp_files = []
|
||||
for media in media_files:
|
||||
try:
|
||||
# Get the file path from the media
|
||||
file_path = media.filename
|
||||
if not os.path.exists(file_path):
|
||||
logger.warning(f"File not found: {file_path}")
|
||||
continue
|
||||
|
||||
# Create timestamp for the file - hash is SHA256
|
||||
# Note: hash is hard-coded to SHA256 and does not use hash_enricher to set it.
|
||||
# SHA256 is the recommended hash, ref: https://github.com/bellingcat/auto-archiver/pull/247#discussion_r1992433181
|
||||
logger.debug(f"Creating timestamp for {file_path}")
|
||||
file_hash = None
|
||||
with open(file_path, "rb") as f:
|
||||
file_hash = OpSHA256().hash_fd(f)
|
||||
|
||||
if not file_hash:
|
||||
logger.warning(f"Failed to hash file for timestamping, skipping: {file_path}")
|
||||
continue
|
||||
|
||||
# Create a timestamp with the file hash
|
||||
timestamp = Timestamp(file_hash)
|
||||
|
||||
# Create a detached timestamp file with the hash operation and timestamp
|
||||
detached_timestamp = DetachedTimestampFile(OpSHA256(), timestamp)
|
||||
|
||||
# Submit to calendar servers
|
||||
submitted_to_calendar = False
|
||||
|
||||
logger.debug(f"Submitting timestamp to calendar servers for {file_path}")
|
||||
calendars = []
|
||||
whitelist = DEFAULT_CALENDAR_WHITELIST
|
||||
|
||||
if self.calendar_whitelist:
|
||||
whitelist = set(self.calendar_whitelist)
|
||||
|
||||
# Create calendar instances
|
||||
calendar_urls = []
|
||||
for url in self.calendar_urls:
|
||||
if url in whitelist:
|
||||
calendars.append(RemoteCalendar(url))
|
||||
calendar_urls.append(url)
|
||||
|
||||
# Submit the hash to each calendar
|
||||
for calendar in calendars:
|
||||
try:
|
||||
calendar_timestamp = calendar.submit(file_hash)
|
||||
timestamp.merge(calendar_timestamp)
|
||||
logger.debug(f"Successfully submitted to calendar: {calendar.url}")
|
||||
submitted_to_calendar = True
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to submit to calendar {calendar.url}: {e}")
|
||||
|
||||
# If all calendar submissions failed, add pending attestations
|
||||
if not submitted_to_calendar and not timestamp.attestations:
|
||||
logger.error(
|
||||
f"Failed to submit to any calendar for {file_path}. **This file will not be timestamped.**"
|
||||
)
|
||||
media.set("opentimestamps", False)
|
||||
continue
|
||||
|
||||
# Save the timestamp proof to a file
|
||||
timestamp_path = os.path.join(self.tmp_dir, f"{os.path.basename(file_path)}.ots")
|
||||
try:
|
||||
with open(timestamp_path, "wb") as f:
|
||||
# Create a serialization context and write to the file
|
||||
ctx = serialize.BytesSerializationContext()
|
||||
detached_timestamp.serialize(ctx)
|
||||
f.write(ctx.getbytes())
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to serialize timestamp file: {e}")
|
||||
continue
|
||||
|
||||
# Create media for the timestamp file
|
||||
timestamp_media = Media(filename=timestamp_path)
|
||||
# explicitly set the mimetype, normally .ots files are 'application/vnd.oasis.opendocument.spreadsheet-template'
|
||||
timestamp_media.mimetype = "application/vnd.opentimestamps"
|
||||
timestamp_media.set("opentimestamps_version", opentimestamps.__version__)
|
||||
|
||||
verification_info = self.verify_timestamp(detached_timestamp)
|
||||
for key, value in verification_info.items():
|
||||
timestamp_media.set(key, value)
|
||||
|
||||
media.set("opentimestamp_files", [timestamp_media])
|
||||
timestamp_files.append(timestamp_media.filename)
|
||||
# Update the original media to indicate it's been timestamped
|
||||
media.set("opentimestamps", True)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error while timestamping {media.filename}: {e}")
|
||||
|
||||
# Add timestamp files to the metadata
|
||||
if timestamp_files:
|
||||
to_enrich.set("opentimestamped", True)
|
||||
to_enrich.set("opentimestamps_count", len(timestamp_files))
|
||||
logger.success(f"{len(timestamp_files)} OpenTimestamps proofs created for {url=}")
|
||||
else:
|
||||
to_enrich.set("opentimestamped", False)
|
||||
logger.warning(f"No successful timestamps created for {url=}")
|
||||
|
||||
def verify_timestamp(self, detached_timestamp):
|
||||
"""
|
||||
Verify a timestamp and extract verification information.
|
||||
|
||||
Args:
|
||||
detached_timestamp: The detached timestamp to verify.
|
||||
|
||||
Returns:
|
||||
dict: Information about the verification result.
|
||||
"""
|
||||
result = {}
|
||||
|
||||
# Check if we have attestations
|
||||
attestations = list(detached_timestamp.timestamp.all_attestations())
|
||||
result["attestation_count"] = len(attestations)
|
||||
|
||||
if attestations:
|
||||
attestation_info = []
|
||||
for msg, attestation in attestations:
|
||||
info = {}
|
||||
|
||||
# Process different types of attestations
|
||||
if isinstance(attestation, PendingAttestation):
|
||||
info["status"] = "pending"
|
||||
info["uri"] = attestation.uri
|
||||
|
||||
elif isinstance(attestation, BitcoinBlockHeaderAttestation):
|
||||
info["status"] = "confirmed"
|
||||
info["block_height"] = attestation.height
|
||||
|
||||
info["last_check"] = get_current_timestamp()
|
||||
|
||||
attestation_info.append(info)
|
||||
|
||||
result["attestations"] = attestation_info
|
||||
|
||||
# For at least one confirmed attestation
|
||||
if any("confirmed" in a.get("status") for a in attestation_info):
|
||||
result["verified"] = True
|
||||
else:
|
||||
result["verified"] = False
|
||||
else:
|
||||
result["verified"] = False
|
||||
result["last_updated"] = get_current_timestamp()
|
||||
|
||||
return result
|
||||
@@ -13,7 +13,7 @@
|
||||
},
|
||||
"filename_generator": {
|
||||
"default": "static",
|
||||
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
|
||||
"help": "how to name stored files: 'random' creates a random string; 'static' uses a hash, with the settings of the 'hash_enricher' module (defaults to SHA256 if not enabled).",
|
||||
"choices": ["random", "static"],
|
||||
},
|
||||
"bucket": {"default": None, "help": "S3 bucket name"},
|
||||
|
||||
Reference in New Issue
Block a user