mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 13:18:28 +03:00
experimental feature for one-click deployment
This commit is contained in:
163
deploy/generate_config.py
Normal file
163
deploy/generate_config.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generates orchestration.yaml from environment variables.
|
||||
|
||||
This script bridges Railway's env-var-based configuration with
|
||||
auto-archiver's YAML-based configuration system. It runs at container
|
||||
startup before the web UI server starts.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
CONFIG_PATH = Path("/app/secrets/orchestration.yaml")
|
||||
SECRETS_DIR = Path("/app/secrets")
|
||||
|
||||
|
||||
def build_config() -> dict:
|
||||
"""Build an orchestration config dict from environment variables."""
|
||||
|
||||
# -- Base config: always present ------------------------------------
|
||||
config = {
|
||||
"steps": {
|
||||
"feeders": ["cli_feeder"],
|
||||
"extractors": ["generic_extractor"],
|
||||
"enrichers": ["hash_enricher"],
|
||||
"databases": ["console_db"],
|
||||
"storages": ["local_storage"],
|
||||
"formatters": ["html_formatter"],
|
||||
},
|
||||
"logging": {
|
||||
"level": os.environ.get("LOG_LEVEL", "INFO"),
|
||||
},
|
||||
"local_storage": {
|
||||
"save_to": "/app/local_archive",
|
||||
"path_generator": "flat",
|
||||
"filename_generator": "static",
|
||||
},
|
||||
"generic_extractor": {
|
||||
"subtitles": os.environ.get("SUBTITLES", "false").lower() == "true",
|
||||
"comments": False,
|
||||
"livestreams": False,
|
||||
"live_from_start": False,
|
||||
"end_means_success": True,
|
||||
"allow_playlist": False,
|
||||
},
|
||||
"hash_enricher": {
|
||||
"algorithm": "SHA-256",
|
||||
},
|
||||
"html_formatter": {
|
||||
"detect_thumbnails": True,
|
||||
},
|
||||
"authentication": {},
|
||||
}
|
||||
|
||||
# -- Google Sheets feeder (optional) --------------------------------
|
||||
gsheet_url = os.environ.get("GSHEET_URL", "")
|
||||
if gsheet_url:
|
||||
config["steps"]["feeders"].append("gsheet_feeder")
|
||||
config["steps"]["databases"].append("gsheet_db")
|
||||
config["gsheet_feeder"] = {
|
||||
"sheet": gsheet_url,
|
||||
"header": 1,
|
||||
"service_account": str(SECRETS_DIR / "service_account.json"),
|
||||
"use_sheet_names_in_stored_paths": False,
|
||||
"columns": {
|
||||
"url": "link",
|
||||
"status": "archive status",
|
||||
"folder": "destination folder",
|
||||
"archive": "archive location",
|
||||
"date": "archive date",
|
||||
"thumbnail": "thumbnail",
|
||||
"timestamp": "upload timestamp",
|
||||
"title": "upload title",
|
||||
"text": "textual content",
|
||||
"screenshot": "screenshot",
|
||||
"hash": "hash",
|
||||
"pdq_hash": "perceptual hashes",
|
||||
},
|
||||
}
|
||||
|
||||
# -- Google service account JSON (optional) -------------------------
|
||||
sa_json = os.environ.get("GOOGLE_SERVICE_ACCOUNT_JSON", "")
|
||||
if sa_json:
|
||||
SECRETS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
sa_path = SECRETS_DIR / "service_account.json"
|
||||
sa_path.write_text(sa_json)
|
||||
print(f"[deploy] Wrote Google service account to {sa_path}")
|
||||
|
||||
# -- S3 storage (optional) ------------------------------------------
|
||||
s3_bucket = os.environ.get("S3_BUCKET", "")
|
||||
if s3_bucket:
|
||||
config["steps"]["storages"].append("s3_storage")
|
||||
config["s3_storage"] = {
|
||||
"bucket": s3_bucket,
|
||||
"region": os.environ.get("S3_REGION", "us-east-1"),
|
||||
"key": os.environ.get("S3_KEY", ""),
|
||||
"secret": os.environ.get("S3_SECRET", ""),
|
||||
"endpoint_url": os.environ.get("S3_ENDPOINT", "https://s3.{region}.amazonaws.com"),
|
||||
"cdn_url": os.environ.get(
|
||||
"S3_CDN_URL",
|
||||
"https://{bucket}.s3.{region}.amazonaws.com/{key}",
|
||||
),
|
||||
"private": os.environ.get("S3_PRIVATE", "false").lower() == "true",
|
||||
"random_no_duplicate": True,
|
||||
"key_path": "random",
|
||||
}
|
||||
|
||||
# -- Telegram extractor (optional) ----------------------------------
|
||||
tg_api_id = os.environ.get("TELEGRAM_API_ID", "")
|
||||
tg_api_hash = os.environ.get("TELEGRAM_API_HASH", "")
|
||||
if tg_api_id and tg_api_hash:
|
||||
config["steps"]["extractors"].append("telegram_extractor")
|
||||
config["telegram_extractor"] = {
|
||||
"api_id": tg_api_id,
|
||||
"api_hash": tg_api_hash,
|
||||
}
|
||||
bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "")
|
||||
if bot_token:
|
||||
config["telegram_extractor"]["bot_token"] = bot_token
|
||||
|
||||
# -- Screenshot enricher (optional) ---------------------------------
|
||||
if os.environ.get("ENABLE_SCREENSHOTS", "").lower() == "true":
|
||||
config["steps"]["enrichers"].append("screenshot_enricher")
|
||||
config["screenshot_enricher"] = {
|
||||
"width": 1280,
|
||||
"height": 7200,
|
||||
"save_to_pdf": True,
|
||||
}
|
||||
|
||||
# -- Thumbnail enricher (optional) ----------------------------------
|
||||
if os.environ.get("ENABLE_THUMBNAILS", "").lower() == "true":
|
||||
config["steps"]["enrichers"].append("thumbnail_enricher")
|
||||
config["thumbnail_enricher"] = {
|
||||
"thumbnails_per_minute": 60,
|
||||
"max_thumbnails": 16,
|
||||
}
|
||||
|
||||
# -- CSV database (optional) ----------------------------------------
|
||||
if os.environ.get("ENABLE_CSV_DB", "").lower() == "true":
|
||||
config["steps"]["databases"].append("csv_db")
|
||||
config["csv_db"] = {
|
||||
"csv_file": "/app/local_archive/db.csv",
|
||||
}
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def main():
|
||||
config = build_config()
|
||||
|
||||
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(CONFIG_PATH, "w") as f:
|
||||
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
print(f"[deploy] Generated config at {CONFIG_PATH}")
|
||||
print(f"[deploy] Active steps: {config['steps']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user