mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-07 19:08:30 +03:00
355 lines
14 KiB
Python
355 lines
14 KiB
Python
"""Tests for deploy/generate_config.py – config generation from env vars."""
|
||
|
||
import json
|
||
import os
|
||
from unittest.mock import patch
|
||
|
||
import yaml
|
||
|
||
from deploy.generate_config import build_config, main
|
||
|
||
|
||
# ── Helpers ───────────────────────────────────────────────────────────
|
||
|
||
|
||
def _env(**overrides):
|
||
"""Return a clean env dict with only the given overrides (no leak from host)."""
|
||
# Clear all deploy-relevant env vars, then apply overrides
|
||
deploy_vars = [
|
||
"LOG_LEVEL",
|
||
"SUBTITLES",
|
||
"GSHEET_URL",
|
||
"GOOGLE_SERVICE_ACCOUNT_JSON",
|
||
"S3_BUCKET",
|
||
"S3_KEY",
|
||
"S3_SECRET",
|
||
"S3_REGION",
|
||
"S3_ENDPOINT",
|
||
"S3_CDN_URL",
|
||
"S3_PRIVATE",
|
||
"TELEGRAM_API_ID",
|
||
"TELEGRAM_API_HASH",
|
||
"TELEGRAM_BOT_TOKEN",
|
||
"ENABLE_SCREENSHOTS",
|
||
"ENABLE_THUMBNAILS",
|
||
"ENABLE_CSV_DB",
|
||
]
|
||
clean = {k: v for k, v in os.environ.items() if k not in deploy_vars}
|
||
clean.update(overrides)
|
||
return clean
|
||
|
||
|
||
# ── Base config (no optional env vars) ────────────────────────────────
|
||
|
||
|
||
class TestBaseConfig:
|
||
"""When no optional env vars are set, build_config returns a minimal working config."""
|
||
|
||
def test_base_steps(self):
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
steps = cfg["steps"]
|
||
assert steps["feeders"] == ["cli_feeder"]
|
||
assert steps["extractors"] == ["generic_extractor"]
|
||
assert steps["enrichers"] == ["hash_enricher"]
|
||
assert steps["databases"] == ["console_db"]
|
||
assert steps["storages"] == ["local_storage"]
|
||
assert steps["formatters"] == ["html_formatter"]
|
||
|
||
def test_base_has_required_module_configs(self):
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
assert "local_storage" in cfg
|
||
assert "generic_extractor" in cfg
|
||
assert "hash_enricher" in cfg
|
||
assert "html_formatter" in cfg
|
||
|
||
def test_default_log_level_is_info(self):
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["logging"]["level"] == "INFO"
|
||
|
||
def test_custom_log_level(self):
|
||
with patch.dict(os.environ, _env(LOG_LEVEL="DEBUG"), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["logging"]["level"] == "DEBUG"
|
||
|
||
def test_authentication_present_and_empty(self):
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["authentication"] == {}
|
||
|
||
def test_local_storage_defaults(self):
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
ls = cfg["local_storage"]
|
||
assert ls["save_to"] == "/app/local_archive"
|
||
assert ls["path_generator"] == "flat"
|
||
assert ls["filename_generator"] == "static"
|
||
|
||
def test_subtitles_default_false(self):
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["generic_extractor"]["subtitles"] is False
|
||
|
||
def test_subtitles_enabled(self):
|
||
with patch.dict(os.environ, _env(SUBTITLES="true"), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["generic_extractor"]["subtitles"] is True
|
||
|
||
def test_subtitles_case_insensitive(self):
|
||
with patch.dict(os.environ, _env(SUBTITLES="True"), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["generic_extractor"]["subtitles"] is True
|
||
|
||
def test_no_optional_modules_present(self):
|
||
"""Ensure optional modules don't appear when their env vars are absent."""
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
assert "gsheet_feeder" not in cfg
|
||
assert "s3_storage" not in cfg
|
||
assert "telegram_extractor" not in cfg
|
||
assert "screenshot_enricher" not in cfg
|
||
assert "thumbnail_enricher" not in cfg
|
||
assert "csv_db" not in cfg
|
||
|
||
def test_config_is_valid_yaml(self):
|
||
"""The output dict should round-trip through YAML cleanly."""
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
dumped = yaml.dump(cfg)
|
||
reloaded = yaml.safe_load(dumped)
|
||
assert reloaded == cfg
|
||
|
||
|
||
# ── Google Sheets ─────────────────────────────────────────────────────
|
||
|
||
|
||
class TestGSheetConfig:
|
||
def test_gsheet_adds_feeder_and_db(self):
|
||
with patch.dict(os.environ, _env(GSHEET_URL="https://docs.google.com/spreadsheets/d/abc"), clear=True):
|
||
cfg = build_config()
|
||
assert "gsheet_feeder" in cfg["steps"]["feeders"]
|
||
assert "gsheet_db" in cfg["steps"]["databases"]
|
||
|
||
def test_gsheet_feeder_config(self):
|
||
url = "https://docs.google.com/spreadsheets/d/abc123"
|
||
with patch.dict(os.environ, _env(GSHEET_URL=url), clear=True):
|
||
cfg = build_config()
|
||
gf = cfg["gsheet_feeder"]
|
||
assert gf["sheet"] == url
|
||
assert gf["header"] == 1
|
||
assert "service_account" in gf
|
||
assert gf["columns"]["url"] == "link"
|
||
assert gf["columns"]["status"] == "archive status"
|
||
|
||
def test_gsheet_preserves_cli_feeder(self):
|
||
"""cli_feeder should still be present even when gsheet is added."""
|
||
with patch.dict(os.environ, _env(GSHEET_URL="https://example.com/sheet"), clear=True):
|
||
cfg = build_config()
|
||
assert "cli_feeder" in cfg["steps"]["feeders"]
|
||
|
||
def test_service_account_json_written(self, tmp_path):
|
||
"""When GOOGLE_SERVICE_ACCOUNT_JSON is set, it writes the file."""
|
||
sa_data = json.dumps({"type": "service_account", "project_id": "test"})
|
||
secrets_dir = tmp_path / "secrets"
|
||
with (
|
||
patch.dict(os.environ, _env(GOOGLE_SERVICE_ACCOUNT_JSON=sa_data), clear=True),
|
||
patch("deploy.generate_config.SECRETS_DIR", secrets_dir),
|
||
):
|
||
build_config()
|
||
sa_path = secrets_dir / "service_account.json"
|
||
assert sa_path.exists()
|
||
assert json.loads(sa_path.read_text())["project_id"] == "test"
|
||
|
||
|
||
# ── S3 storage ────────────────────────────────────────────────────────
|
||
|
||
|
||
class TestS3Config:
|
||
def test_s3_adds_storage(self):
|
||
with patch.dict(os.environ, _env(S3_BUCKET="my-bucket"), clear=True):
|
||
cfg = build_config()
|
||
assert "s3_storage" in cfg["steps"]["storages"]
|
||
assert "local_storage" in cfg["steps"]["storages"] # local still there
|
||
|
||
def test_s3_config_values(self):
|
||
env = _env(
|
||
S3_BUCKET="my-bucket",
|
||
S3_KEY="AKID",
|
||
S3_SECRET="shhh",
|
||
S3_REGION="eu-west-1",
|
||
)
|
||
with patch.dict(os.environ, env, clear=True):
|
||
cfg = build_config()
|
||
s3 = cfg["s3_storage"]
|
||
assert s3["bucket"] == "my-bucket"
|
||
assert s3["key"] == "AKID"
|
||
assert s3["secret"] == "shhh"
|
||
assert s3["region"] == "eu-west-1"
|
||
assert s3["private"] is False
|
||
assert s3["random_no_duplicate"] is True
|
||
|
||
def test_s3_defaults(self):
|
||
with patch.dict(os.environ, _env(S3_BUCKET="b"), clear=True):
|
||
cfg = build_config()
|
||
s3 = cfg["s3_storage"]
|
||
assert s3["region"] == "us-east-1"
|
||
assert "{region}" in s3["endpoint_url"]
|
||
|
||
def test_s3_private_flag(self):
|
||
with patch.dict(os.environ, _env(S3_BUCKET="b", S3_PRIVATE="true"), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["s3_storage"]["private"] is True
|
||
|
||
def test_s3_custom_endpoint(self):
|
||
endpoint = "https://nyc3.digitaloceanspaces.com"
|
||
with patch.dict(os.environ, _env(S3_BUCKET="b", S3_ENDPOINT=endpoint), clear=True):
|
||
cfg = build_config()
|
||
assert cfg["s3_storage"]["endpoint_url"] == endpoint
|
||
|
||
|
||
# ── Telegram ──────────────────────────────────────────────────────────
|
||
|
||
|
||
class TestTelegramConfig:
|
||
def test_telegram_added_when_both_set(self):
|
||
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc")
|
||
with patch.dict(os.environ, env, clear=True):
|
||
cfg = build_config()
|
||
assert "telegram_extractor" in cfg["steps"]["extractors"]
|
||
assert cfg["telegram_extractor"]["api_id"] == "12345"
|
||
assert cfg["telegram_extractor"]["api_hash"] == "abc"
|
||
|
||
def test_telegram_not_added_if_only_id(self):
|
||
with patch.dict(os.environ, _env(TELEGRAM_API_ID="12345"), clear=True):
|
||
cfg = build_config()
|
||
assert "telegram_extractor" not in cfg["steps"]["extractors"]
|
||
|
||
def test_telegram_not_added_if_only_hash(self):
|
||
with patch.dict(os.environ, _env(TELEGRAM_API_HASH="abc"), clear=True):
|
||
cfg = build_config()
|
||
assert "telegram_extractor" not in cfg["steps"]["extractors"]
|
||
|
||
def test_telegram_bot_token_optional(self):
|
||
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc", TELEGRAM_BOT_TOKEN="bot:tok")
|
||
with patch.dict(os.environ, env, clear=True):
|
||
cfg = build_config()
|
||
assert cfg["telegram_extractor"]["bot_token"] == "bot:tok"
|
||
|
||
def test_telegram_no_bot_token(self):
|
||
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc")
|
||
with patch.dict(os.environ, env, clear=True):
|
||
cfg = build_config()
|
||
assert "bot_token" not in cfg["telegram_extractor"]
|
||
|
||
|
||
# ── Optional enrichers / databases ────────────────────────────────────
|
||
|
||
|
||
class TestOptionalModules:
|
||
def test_screenshots_disabled_by_default(self):
|
||
with patch.dict(os.environ, _env(), clear=True):
|
||
cfg = build_config()
|
||
assert "screenshot_enricher" not in cfg["steps"]["enrichers"]
|
||
|
||
def test_screenshots_enabled(self):
|
||
with patch.dict(os.environ, _env(ENABLE_SCREENSHOTS="true"), clear=True):
|
||
cfg = build_config()
|
||
assert "screenshot_enricher" in cfg["steps"]["enrichers"]
|
||
assert cfg["screenshot_enricher"]["width"] == 1280
|
||
|
||
def test_thumbnails_enabled(self):
|
||
with patch.dict(os.environ, _env(ENABLE_THUMBNAILS="true"), clear=True):
|
||
cfg = build_config()
|
||
assert "thumbnail_enricher" in cfg["steps"]["enrichers"]
|
||
assert cfg["thumbnail_enricher"]["max_thumbnails"] == 16
|
||
|
||
def test_csv_db_enabled(self):
|
||
with patch.dict(os.environ, _env(ENABLE_CSV_DB="true"), clear=True):
|
||
cfg = build_config()
|
||
assert "csv_db" in cfg["steps"]["databases"]
|
||
assert cfg["csv_db"]["csv_file"] == "/app/local_archive/db.csv"
|
||
|
||
def test_case_insensitive_boolean(self):
|
||
with patch.dict(os.environ, _env(ENABLE_SCREENSHOTS="TRUE"), clear=True):
|
||
cfg = build_config()
|
||
assert "screenshot_enricher" in cfg["steps"]["enrichers"]
|
||
|
||
|
||
# ── Combined / full config ────────────────────────────────────────────
|
||
|
||
|
||
class TestCombinedConfig:
|
||
def test_all_optional_modules_together(self):
|
||
"""Enable everything at once and verify no conflicts."""
|
||
env = _env(
|
||
GSHEET_URL="https://example.com/sheet",
|
||
S3_BUCKET="bucket",
|
||
S3_KEY="key",
|
||
S3_SECRET="secret",
|
||
TELEGRAM_API_ID="123",
|
||
TELEGRAM_API_HASH="abc",
|
||
TELEGRAM_BOT_TOKEN="tok",
|
||
ENABLE_SCREENSHOTS="true",
|
||
ENABLE_THUMBNAILS="true",
|
||
ENABLE_CSV_DB="true",
|
||
)
|
||
with patch.dict(os.environ, env, clear=True):
|
||
cfg = build_config()
|
||
|
||
steps = cfg["steps"]
|
||
assert "gsheet_feeder" in steps["feeders"]
|
||
assert "telegram_extractor" in steps["extractors"]
|
||
assert "screenshot_enricher" in steps["enrichers"]
|
||
assert "thumbnail_enricher" in steps["enrichers"]
|
||
assert "csv_db" in steps["databases"]
|
||
assert "gsheet_db" in steps["databases"]
|
||
assert "s3_storage" in steps["storages"]
|
||
assert "local_storage" in steps["storages"]
|
||
|
||
# All module configs present
|
||
for key in [
|
||
"gsheet_feeder",
|
||
"s3_storage",
|
||
"telegram_extractor",
|
||
"screenshot_enricher",
|
||
"thumbnail_enricher",
|
||
"csv_db",
|
||
]:
|
||
assert key in cfg, f"{key} config missing"
|
||
|
||
def test_full_config_valid_yaml(self):
|
||
env = _env(
|
||
GSHEET_URL="https://example.com/sheet",
|
||
S3_BUCKET="bucket",
|
||
TELEGRAM_API_ID="123",
|
||
TELEGRAM_API_HASH="abc",
|
||
ENABLE_SCREENSHOTS="true",
|
||
ENABLE_CSV_DB="true",
|
||
)
|
||
with patch.dict(os.environ, env, clear=True):
|
||
cfg = build_config()
|
||
dumped = yaml.dump(cfg)
|
||
reloaded = yaml.safe_load(dumped)
|
||
assert reloaded == cfg
|
||
|
||
|
||
# ── main() writes file ───────────────────────────────────────────────
|
||
|
||
|
||
class TestMainFunction:
|
||
def test_main_writes_config_file(self, tmp_path):
|
||
config_path = tmp_path / "orchestration.yaml"
|
||
with patch.dict(os.environ, _env(), clear=True), patch("deploy.generate_config.CONFIG_PATH", config_path):
|
||
main()
|
||
assert config_path.exists()
|
||
cfg = yaml.safe_load(config_path.read_text())
|
||
assert cfg["steps"]["feeders"] == ["cli_feeder"]
|
||
|
||
def test_main_creates_parent_dirs(self, tmp_path):
|
||
config_path = tmp_path / "nested" / "dir" / "orchestration.yaml"
|
||
with patch.dict(os.environ, _env(), clear=True), patch("deploy.generate_config.CONFIG_PATH", config_path):
|
||
main()
|
||
assert config_path.exists()
|