Files
auto-archiver/deploy/generate_config.py
2026-03-12 11:47:20 +00:00

164 lines
5.7 KiB
Python

#!/usr/bin/env python3
"""
Generates orchestration.yaml from environment variables.
This script bridges Railway's env-var-based configuration with
auto-archiver's YAML-based configuration system. It runs at container
startup before the web UI server starts.
"""
import os
from pathlib import Path
import yaml
CONFIG_PATH = Path("/app/secrets/orchestration.yaml")
SECRETS_DIR = Path("/app/secrets")
def build_config() -> dict:
"""Build an orchestration config dict from environment variables."""
# -- Base config: always present ------------------------------------
config = {
"steps": {
"feeders": ["cli_feeder"],
"extractors": ["generic_extractor"],
"enrichers": ["hash_enricher"],
"databases": ["console_db"],
"storages": ["local_storage"],
"formatters": ["html_formatter"],
},
"logging": {
"level": os.environ.get("LOG_LEVEL", "INFO"),
},
"local_storage": {
"save_to": "/app/local_archive",
"path_generator": "flat",
"filename_generator": "static",
},
"generic_extractor": {
"subtitles": os.environ.get("SUBTITLES", "false").lower() == "true",
"comments": False,
"livestreams": False,
"live_from_start": False,
"end_means_success": True,
"allow_playlist": False,
},
"hash_enricher": {
"algorithm": "SHA-256",
},
"html_formatter": {
"detect_thumbnails": True,
},
"authentication": {},
}
# -- Google Sheets feeder (optional) --------------------------------
gsheet_url = os.environ.get("GSHEET_URL", "")
if gsheet_url:
config["steps"]["feeders"].append("gsheet_feeder")
config["steps"]["databases"].append("gsheet_db")
config["gsheet_feeder"] = {
"sheet": gsheet_url,
"header": 1,
"service_account": str(SECRETS_DIR / "service_account.json"),
"use_sheet_names_in_stored_paths": False,
"columns": {
"url": "link",
"status": "archive status",
"folder": "destination folder",
"archive": "archive location",
"date": "archive date",
"thumbnail": "thumbnail",
"timestamp": "upload timestamp",
"title": "upload title",
"text": "textual content",
"screenshot": "screenshot",
"hash": "hash",
"pdq_hash": "perceptual hashes",
},
}
# -- Google service account JSON (optional) -------------------------
sa_json = os.environ.get("GOOGLE_SERVICE_ACCOUNT_JSON", "")
if sa_json:
SECRETS_DIR.mkdir(parents=True, exist_ok=True)
sa_path = SECRETS_DIR / "service_account.json"
sa_path.write_text(sa_json)
print(f"[deploy] Wrote Google service account to {sa_path}")
# -- S3 storage (optional) ------------------------------------------
s3_bucket = os.environ.get("S3_BUCKET", "")
if s3_bucket:
config["steps"]["storages"].append("s3_storage")
config["s3_storage"] = {
"bucket": s3_bucket,
"region": os.environ.get("S3_REGION", "us-east-1"),
"key": os.environ.get("S3_KEY", ""),
"secret": os.environ.get("S3_SECRET", ""),
"endpoint_url": os.environ.get("S3_ENDPOINT", "https://s3.{region}.amazonaws.com"),
"cdn_url": os.environ.get(
"S3_CDN_URL",
"https://{bucket}.s3.{region}.amazonaws.com/{key}",
),
"private": os.environ.get("S3_PRIVATE", "false").lower() == "true",
"random_no_duplicate": True,
"key_path": "random",
}
# -- Telegram extractor (optional) ----------------------------------
tg_api_id = os.environ.get("TELEGRAM_API_ID", "")
tg_api_hash = os.environ.get("TELEGRAM_API_HASH", "")
if tg_api_id and tg_api_hash:
config["steps"]["extractors"].append("telegram_extractor")
config["telegram_extractor"] = {
"api_id": tg_api_id,
"api_hash": tg_api_hash,
}
bot_token = os.environ.get("TELEGRAM_BOT_TOKEN", "")
if bot_token:
config["telegram_extractor"]["bot_token"] = bot_token
# -- Screenshot enricher (optional) ---------------------------------
if os.environ.get("ENABLE_SCREENSHOTS", "").lower() == "true":
config["steps"]["enrichers"].append("screenshot_enricher")
config["screenshot_enricher"] = {
"width": 1280,
"height": 7200,
"save_to_pdf": True,
}
# -- Thumbnail enricher (optional) ----------------------------------
if os.environ.get("ENABLE_THUMBNAILS", "").lower() == "true":
config["steps"]["enrichers"].append("thumbnail_enricher")
config["thumbnail_enricher"] = {
"thumbnails_per_minute": 60,
"max_thumbnails": 16,
}
# -- CSV database (optional) ----------------------------------------
if os.environ.get("ENABLE_CSV_DB", "").lower() == "true":
config["steps"]["databases"].append("csv_db")
config["csv_db"] = {
"csv_file": "/app/local_archive/db.csv",
}
return config
def main():
config = build_config()
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(CONFIG_PATH, "w") as f:
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
print(f"[deploy] Generated config at {CONFIG_PATH}")
print(f"[deploy] Active steps: {config['steps']}")
if __name__ == "__main__":
main()