mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 20:58:29 +03:00
experimental feature for one-click deployment
This commit is contained in:
354
deploy/tests/test_generate_config.py
Normal file
354
deploy/tests/test_generate_config.py
Normal file
@@ -0,0 +1,354 @@
|
||||
"""Tests for deploy/generate_config.py – config generation from env vars."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import yaml
|
||||
|
||||
from deploy.generate_config import build_config, main
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _env(**overrides):
|
||||
"""Return a clean env dict with only the given overrides (no leak from host)."""
|
||||
# Clear all deploy-relevant env vars, then apply overrides
|
||||
deploy_vars = [
|
||||
"LOG_LEVEL",
|
||||
"SUBTITLES",
|
||||
"GSHEET_URL",
|
||||
"GOOGLE_SERVICE_ACCOUNT_JSON",
|
||||
"S3_BUCKET",
|
||||
"S3_KEY",
|
||||
"S3_SECRET",
|
||||
"S3_REGION",
|
||||
"S3_ENDPOINT",
|
||||
"S3_CDN_URL",
|
||||
"S3_PRIVATE",
|
||||
"TELEGRAM_API_ID",
|
||||
"TELEGRAM_API_HASH",
|
||||
"TELEGRAM_BOT_TOKEN",
|
||||
"ENABLE_SCREENSHOTS",
|
||||
"ENABLE_THUMBNAILS",
|
||||
"ENABLE_CSV_DB",
|
||||
]
|
||||
clean = {k: v for k, v in os.environ.items() if k not in deploy_vars}
|
||||
clean.update(overrides)
|
||||
return clean
|
||||
|
||||
|
||||
# ── Base config (no optional env vars) ────────────────────────────────
|
||||
|
||||
|
||||
class TestBaseConfig:
|
||||
"""When no optional env vars are set, build_config returns a minimal working config."""
|
||||
|
||||
def test_base_steps(self):
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
steps = cfg["steps"]
|
||||
assert steps["feeders"] == ["cli_feeder"]
|
||||
assert steps["extractors"] == ["generic_extractor"]
|
||||
assert steps["enrichers"] == ["hash_enricher"]
|
||||
assert steps["databases"] == ["console_db"]
|
||||
assert steps["storages"] == ["local_storage"]
|
||||
assert steps["formatters"] == ["html_formatter"]
|
||||
|
||||
def test_base_has_required_module_configs(self):
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
assert "local_storage" in cfg
|
||||
assert "generic_extractor" in cfg
|
||||
assert "hash_enricher" in cfg
|
||||
assert "html_formatter" in cfg
|
||||
|
||||
def test_default_log_level_is_info(self):
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["logging"]["level"] == "INFO"
|
||||
|
||||
def test_custom_log_level(self):
|
||||
with patch.dict(os.environ, _env(LOG_LEVEL="DEBUG"), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["logging"]["level"] == "DEBUG"
|
||||
|
||||
def test_authentication_present_and_empty(self):
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["authentication"] == {}
|
||||
|
||||
def test_local_storage_defaults(self):
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
ls = cfg["local_storage"]
|
||||
assert ls["save_to"] == "/app/local_archive"
|
||||
assert ls["path_generator"] == "flat"
|
||||
assert ls["filename_generator"] == "static"
|
||||
|
||||
def test_subtitles_default_false(self):
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["generic_extractor"]["subtitles"] is False
|
||||
|
||||
def test_subtitles_enabled(self):
|
||||
with patch.dict(os.environ, _env(SUBTITLES="true"), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["generic_extractor"]["subtitles"] is True
|
||||
|
||||
def test_subtitles_case_insensitive(self):
|
||||
with patch.dict(os.environ, _env(SUBTITLES="True"), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["generic_extractor"]["subtitles"] is True
|
||||
|
||||
def test_no_optional_modules_present(self):
|
||||
"""Ensure optional modules don't appear when their env vars are absent."""
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
assert "gsheet_feeder" not in cfg
|
||||
assert "s3_storage" not in cfg
|
||||
assert "telegram_extractor" not in cfg
|
||||
assert "screenshot_enricher" not in cfg
|
||||
assert "thumbnail_enricher" not in cfg
|
||||
assert "csv_db" not in cfg
|
||||
|
||||
def test_config_is_valid_yaml(self):
|
||||
"""The output dict should round-trip through YAML cleanly."""
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
dumped = yaml.dump(cfg)
|
||||
reloaded = yaml.safe_load(dumped)
|
||||
assert reloaded == cfg
|
||||
|
||||
|
||||
# ── Google Sheets ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestGSheetConfig:
|
||||
def test_gsheet_adds_feeder_and_db(self):
|
||||
with patch.dict(os.environ, _env(GSHEET_URL="https://docs.google.com/spreadsheets/d/abc"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "gsheet_feeder" in cfg["steps"]["feeders"]
|
||||
assert "gsheet_db" in cfg["steps"]["databases"]
|
||||
|
||||
def test_gsheet_feeder_config(self):
|
||||
url = "https://docs.google.com/spreadsheets/d/abc123"
|
||||
with patch.dict(os.environ, _env(GSHEET_URL=url), clear=True):
|
||||
cfg = build_config()
|
||||
gf = cfg["gsheet_feeder"]
|
||||
assert gf["sheet"] == url
|
||||
assert gf["header"] == 1
|
||||
assert "service_account" in gf
|
||||
assert gf["columns"]["url"] == "link"
|
||||
assert gf["columns"]["status"] == "archive status"
|
||||
|
||||
def test_gsheet_preserves_cli_feeder(self):
|
||||
"""cli_feeder should still be present even when gsheet is added."""
|
||||
with patch.dict(os.environ, _env(GSHEET_URL="https://example.com/sheet"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "cli_feeder" in cfg["steps"]["feeders"]
|
||||
|
||||
def test_service_account_json_written(self, tmp_path):
|
||||
"""When GOOGLE_SERVICE_ACCOUNT_JSON is set, it writes the file."""
|
||||
sa_data = json.dumps({"type": "service_account", "project_id": "test"})
|
||||
secrets_dir = tmp_path / "secrets"
|
||||
with (
|
||||
patch.dict(os.environ, _env(GOOGLE_SERVICE_ACCOUNT_JSON=sa_data), clear=True),
|
||||
patch("deploy.generate_config.SECRETS_DIR", secrets_dir),
|
||||
):
|
||||
build_config()
|
||||
sa_path = secrets_dir / "service_account.json"
|
||||
assert sa_path.exists()
|
||||
assert json.loads(sa_path.read_text())["project_id"] == "test"
|
||||
|
||||
|
||||
# ── S3 storage ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestS3Config:
|
||||
def test_s3_adds_storage(self):
|
||||
with patch.dict(os.environ, _env(S3_BUCKET="my-bucket"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "s3_storage" in cfg["steps"]["storages"]
|
||||
assert "local_storage" in cfg["steps"]["storages"] # local still there
|
||||
|
||||
def test_s3_config_values(self):
|
||||
env = _env(
|
||||
S3_BUCKET="my-bucket",
|
||||
S3_KEY="AKID",
|
||||
S3_SECRET="shhh",
|
||||
S3_REGION="eu-west-1",
|
||||
)
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
cfg = build_config()
|
||||
s3 = cfg["s3_storage"]
|
||||
assert s3["bucket"] == "my-bucket"
|
||||
assert s3["key"] == "AKID"
|
||||
assert s3["secret"] == "shhh"
|
||||
assert s3["region"] == "eu-west-1"
|
||||
assert s3["private"] is False
|
||||
assert s3["random_no_duplicate"] is True
|
||||
|
||||
def test_s3_defaults(self):
|
||||
with patch.dict(os.environ, _env(S3_BUCKET="b"), clear=True):
|
||||
cfg = build_config()
|
||||
s3 = cfg["s3_storage"]
|
||||
assert s3["region"] == "us-east-1"
|
||||
assert "{region}" in s3["endpoint_url"]
|
||||
|
||||
def test_s3_private_flag(self):
|
||||
with patch.dict(os.environ, _env(S3_BUCKET="b", S3_PRIVATE="true"), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["s3_storage"]["private"] is True
|
||||
|
||||
def test_s3_custom_endpoint(self):
|
||||
endpoint = "https://nyc3.digitaloceanspaces.com"
|
||||
with patch.dict(os.environ, _env(S3_BUCKET="b", S3_ENDPOINT=endpoint), clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["s3_storage"]["endpoint_url"] == endpoint
|
||||
|
||||
|
||||
# ── Telegram ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestTelegramConfig:
|
||||
def test_telegram_added_when_both_set(self):
|
||||
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc")
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
cfg = build_config()
|
||||
assert "telegram_extractor" in cfg["steps"]["extractors"]
|
||||
assert cfg["telegram_extractor"]["api_id"] == "12345"
|
||||
assert cfg["telegram_extractor"]["api_hash"] == "abc"
|
||||
|
||||
def test_telegram_not_added_if_only_id(self):
|
||||
with patch.dict(os.environ, _env(TELEGRAM_API_ID="12345"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "telegram_extractor" not in cfg["steps"]["extractors"]
|
||||
|
||||
def test_telegram_not_added_if_only_hash(self):
|
||||
with patch.dict(os.environ, _env(TELEGRAM_API_HASH="abc"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "telegram_extractor" not in cfg["steps"]["extractors"]
|
||||
|
||||
def test_telegram_bot_token_optional(self):
|
||||
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc", TELEGRAM_BOT_TOKEN="bot:tok")
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
cfg = build_config()
|
||||
assert cfg["telegram_extractor"]["bot_token"] == "bot:tok"
|
||||
|
||||
def test_telegram_no_bot_token(self):
|
||||
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc")
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
cfg = build_config()
|
||||
assert "bot_token" not in cfg["telegram_extractor"]
|
||||
|
||||
|
||||
# ── Optional enrichers / databases ────────────────────────────────────
|
||||
|
||||
|
||||
class TestOptionalModules:
|
||||
def test_screenshots_disabled_by_default(self):
|
||||
with patch.dict(os.environ, _env(), clear=True):
|
||||
cfg = build_config()
|
||||
assert "screenshot_enricher" not in cfg["steps"]["enrichers"]
|
||||
|
||||
def test_screenshots_enabled(self):
|
||||
with patch.dict(os.environ, _env(ENABLE_SCREENSHOTS="true"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "screenshot_enricher" in cfg["steps"]["enrichers"]
|
||||
assert cfg["screenshot_enricher"]["width"] == 1280
|
||||
|
||||
def test_thumbnails_enabled(self):
|
||||
with patch.dict(os.environ, _env(ENABLE_THUMBNAILS="true"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "thumbnail_enricher" in cfg["steps"]["enrichers"]
|
||||
assert cfg["thumbnail_enricher"]["max_thumbnails"] == 16
|
||||
|
||||
def test_csv_db_enabled(self):
|
||||
with patch.dict(os.environ, _env(ENABLE_CSV_DB="true"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "csv_db" in cfg["steps"]["databases"]
|
||||
assert cfg["csv_db"]["csv_file"] == "/app/local_archive/db.csv"
|
||||
|
||||
def test_case_insensitive_boolean(self):
|
||||
with patch.dict(os.environ, _env(ENABLE_SCREENSHOTS="TRUE"), clear=True):
|
||||
cfg = build_config()
|
||||
assert "screenshot_enricher" in cfg["steps"]["enrichers"]
|
||||
|
||||
|
||||
# ── Combined / full config ────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestCombinedConfig:
|
||||
def test_all_optional_modules_together(self):
|
||||
"""Enable everything at once and verify no conflicts."""
|
||||
env = _env(
|
||||
GSHEET_URL="https://example.com/sheet",
|
||||
S3_BUCKET="bucket",
|
||||
S3_KEY="key",
|
||||
S3_SECRET="secret",
|
||||
TELEGRAM_API_ID="123",
|
||||
TELEGRAM_API_HASH="abc",
|
||||
TELEGRAM_BOT_TOKEN="tok",
|
||||
ENABLE_SCREENSHOTS="true",
|
||||
ENABLE_THUMBNAILS="true",
|
||||
ENABLE_CSV_DB="true",
|
||||
)
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
cfg = build_config()
|
||||
|
||||
steps = cfg["steps"]
|
||||
assert "gsheet_feeder" in steps["feeders"]
|
||||
assert "telegram_extractor" in steps["extractors"]
|
||||
assert "screenshot_enricher" in steps["enrichers"]
|
||||
assert "thumbnail_enricher" in steps["enrichers"]
|
||||
assert "csv_db" in steps["databases"]
|
||||
assert "gsheet_db" in steps["databases"]
|
||||
assert "s3_storage" in steps["storages"]
|
||||
assert "local_storage" in steps["storages"]
|
||||
|
||||
# All module configs present
|
||||
for key in [
|
||||
"gsheet_feeder",
|
||||
"s3_storage",
|
||||
"telegram_extractor",
|
||||
"screenshot_enricher",
|
||||
"thumbnail_enricher",
|
||||
"csv_db",
|
||||
]:
|
||||
assert key in cfg, f"{key} config missing"
|
||||
|
||||
def test_full_config_valid_yaml(self):
|
||||
env = _env(
|
||||
GSHEET_URL="https://example.com/sheet",
|
||||
S3_BUCKET="bucket",
|
||||
TELEGRAM_API_ID="123",
|
||||
TELEGRAM_API_HASH="abc",
|
||||
ENABLE_SCREENSHOTS="true",
|
||||
ENABLE_CSV_DB="true",
|
||||
)
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
cfg = build_config()
|
||||
dumped = yaml.dump(cfg)
|
||||
reloaded = yaml.safe_load(dumped)
|
||||
assert reloaded == cfg
|
||||
|
||||
|
||||
# ── main() writes file ───────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestMainFunction:
|
||||
def test_main_writes_config_file(self, tmp_path):
|
||||
config_path = tmp_path / "orchestration.yaml"
|
||||
with patch.dict(os.environ, _env(), clear=True), patch("deploy.generate_config.CONFIG_PATH", config_path):
|
||||
main()
|
||||
assert config_path.exists()
|
||||
cfg = yaml.safe_load(config_path.read_text())
|
||||
assert cfg["steps"]["feeders"] == ["cli_feeder"]
|
||||
|
||||
def test_main_creates_parent_dirs(self, tmp_path):
|
||||
config_path = tmp_path / "nested" / "dir" / "orchestration.yaml"
|
||||
with patch.dict(os.environ, _env(), clear=True), patch("deploy.generate_config.CONFIG_PATH", config_path):
|
||||
main()
|
||||
assert config_path.exists()
|
||||
Reference in New Issue
Block a user