Files
auto-archiver/deploy/tests/test_generate_config.py
2026-03-12 11:47:20 +00:00

355 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for deploy/generate_config.py config generation from env vars."""
import json
import os
from unittest.mock import patch
import yaml
from deploy.generate_config import build_config, main
# ── Helpers ───────────────────────────────────────────────────────────
def _env(**overrides):
"""Return a clean env dict with only the given overrides (no leak from host)."""
# Clear all deploy-relevant env vars, then apply overrides
deploy_vars = [
"LOG_LEVEL",
"SUBTITLES",
"GSHEET_URL",
"GOOGLE_SERVICE_ACCOUNT_JSON",
"S3_BUCKET",
"S3_KEY",
"S3_SECRET",
"S3_REGION",
"S3_ENDPOINT",
"S3_CDN_URL",
"S3_PRIVATE",
"TELEGRAM_API_ID",
"TELEGRAM_API_HASH",
"TELEGRAM_BOT_TOKEN",
"ENABLE_SCREENSHOTS",
"ENABLE_THUMBNAILS",
"ENABLE_CSV_DB",
]
clean = {k: v for k, v in os.environ.items() if k not in deploy_vars}
clean.update(overrides)
return clean
# ── Base config (no optional env vars) ────────────────────────────────
class TestBaseConfig:
"""When no optional env vars are set, build_config returns a minimal working config."""
def test_base_steps(self):
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
steps = cfg["steps"]
assert steps["feeders"] == ["cli_feeder"]
assert steps["extractors"] == ["generic_extractor"]
assert steps["enrichers"] == ["hash_enricher"]
assert steps["databases"] == ["console_db"]
assert steps["storages"] == ["local_storage"]
assert steps["formatters"] == ["html_formatter"]
def test_base_has_required_module_configs(self):
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
assert "local_storage" in cfg
assert "generic_extractor" in cfg
assert "hash_enricher" in cfg
assert "html_formatter" in cfg
def test_default_log_level_is_info(self):
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
assert cfg["logging"]["level"] == "INFO"
def test_custom_log_level(self):
with patch.dict(os.environ, _env(LOG_LEVEL="DEBUG"), clear=True):
cfg = build_config()
assert cfg["logging"]["level"] == "DEBUG"
def test_authentication_present_and_empty(self):
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
assert cfg["authentication"] == {}
def test_local_storage_defaults(self):
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
ls = cfg["local_storage"]
assert ls["save_to"] == "/app/local_archive"
assert ls["path_generator"] == "flat"
assert ls["filename_generator"] == "static"
def test_subtitles_default_false(self):
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
assert cfg["generic_extractor"]["subtitles"] is False
def test_subtitles_enabled(self):
with patch.dict(os.environ, _env(SUBTITLES="true"), clear=True):
cfg = build_config()
assert cfg["generic_extractor"]["subtitles"] is True
def test_subtitles_case_insensitive(self):
with patch.dict(os.environ, _env(SUBTITLES="True"), clear=True):
cfg = build_config()
assert cfg["generic_extractor"]["subtitles"] is True
def test_no_optional_modules_present(self):
"""Ensure optional modules don't appear when their env vars are absent."""
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
assert "gsheet_feeder" not in cfg
assert "s3_storage" not in cfg
assert "telegram_extractor" not in cfg
assert "screenshot_enricher" not in cfg
assert "thumbnail_enricher" not in cfg
assert "csv_db" not in cfg
def test_config_is_valid_yaml(self):
"""The output dict should round-trip through YAML cleanly."""
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
dumped = yaml.dump(cfg)
reloaded = yaml.safe_load(dumped)
assert reloaded == cfg
# ── Google Sheets ─────────────────────────────────────────────────────
class TestGSheetConfig:
def test_gsheet_adds_feeder_and_db(self):
with patch.dict(os.environ, _env(GSHEET_URL="https://docs.google.com/spreadsheets/d/abc"), clear=True):
cfg = build_config()
assert "gsheet_feeder" in cfg["steps"]["feeders"]
assert "gsheet_db" in cfg["steps"]["databases"]
def test_gsheet_feeder_config(self):
url = "https://docs.google.com/spreadsheets/d/abc123"
with patch.dict(os.environ, _env(GSHEET_URL=url), clear=True):
cfg = build_config()
gf = cfg["gsheet_feeder"]
assert gf["sheet"] == url
assert gf["header"] == 1
assert "service_account" in gf
assert gf["columns"]["url"] == "link"
assert gf["columns"]["status"] == "archive status"
def test_gsheet_preserves_cli_feeder(self):
"""cli_feeder should still be present even when gsheet is added."""
with patch.dict(os.environ, _env(GSHEET_URL="https://example.com/sheet"), clear=True):
cfg = build_config()
assert "cli_feeder" in cfg["steps"]["feeders"]
def test_service_account_json_written(self, tmp_path):
"""When GOOGLE_SERVICE_ACCOUNT_JSON is set, it writes the file."""
sa_data = json.dumps({"type": "service_account", "project_id": "test"})
secrets_dir = tmp_path / "secrets"
with (
patch.dict(os.environ, _env(GOOGLE_SERVICE_ACCOUNT_JSON=sa_data), clear=True),
patch("deploy.generate_config.SECRETS_DIR", secrets_dir),
):
build_config()
sa_path = secrets_dir / "service_account.json"
assert sa_path.exists()
assert json.loads(sa_path.read_text())["project_id"] == "test"
# ── S3 storage ────────────────────────────────────────────────────────
class TestS3Config:
def test_s3_adds_storage(self):
with patch.dict(os.environ, _env(S3_BUCKET="my-bucket"), clear=True):
cfg = build_config()
assert "s3_storage" in cfg["steps"]["storages"]
assert "local_storage" in cfg["steps"]["storages"] # local still there
def test_s3_config_values(self):
env = _env(
S3_BUCKET="my-bucket",
S3_KEY="AKID",
S3_SECRET="shhh",
S3_REGION="eu-west-1",
)
with patch.dict(os.environ, env, clear=True):
cfg = build_config()
s3 = cfg["s3_storage"]
assert s3["bucket"] == "my-bucket"
assert s3["key"] == "AKID"
assert s3["secret"] == "shhh"
assert s3["region"] == "eu-west-1"
assert s3["private"] is False
assert s3["random_no_duplicate"] is True
def test_s3_defaults(self):
with patch.dict(os.environ, _env(S3_BUCKET="b"), clear=True):
cfg = build_config()
s3 = cfg["s3_storage"]
assert s3["region"] == "us-east-1"
assert "{region}" in s3["endpoint_url"]
def test_s3_private_flag(self):
with patch.dict(os.environ, _env(S3_BUCKET="b", S3_PRIVATE="true"), clear=True):
cfg = build_config()
assert cfg["s3_storage"]["private"] is True
def test_s3_custom_endpoint(self):
endpoint = "https://nyc3.digitaloceanspaces.com"
with patch.dict(os.environ, _env(S3_BUCKET="b", S3_ENDPOINT=endpoint), clear=True):
cfg = build_config()
assert cfg["s3_storage"]["endpoint_url"] == endpoint
# ── Telegram ──────────────────────────────────────────────────────────
class TestTelegramConfig:
def test_telegram_added_when_both_set(self):
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc")
with patch.dict(os.environ, env, clear=True):
cfg = build_config()
assert "telegram_extractor" in cfg["steps"]["extractors"]
assert cfg["telegram_extractor"]["api_id"] == "12345"
assert cfg["telegram_extractor"]["api_hash"] == "abc"
def test_telegram_not_added_if_only_id(self):
with patch.dict(os.environ, _env(TELEGRAM_API_ID="12345"), clear=True):
cfg = build_config()
assert "telegram_extractor" not in cfg["steps"]["extractors"]
def test_telegram_not_added_if_only_hash(self):
with patch.dict(os.environ, _env(TELEGRAM_API_HASH="abc"), clear=True):
cfg = build_config()
assert "telegram_extractor" not in cfg["steps"]["extractors"]
def test_telegram_bot_token_optional(self):
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc", TELEGRAM_BOT_TOKEN="bot:tok")
with patch.dict(os.environ, env, clear=True):
cfg = build_config()
assert cfg["telegram_extractor"]["bot_token"] == "bot:tok"
def test_telegram_no_bot_token(self):
env = _env(TELEGRAM_API_ID="12345", TELEGRAM_API_HASH="abc")
with patch.dict(os.environ, env, clear=True):
cfg = build_config()
assert "bot_token" not in cfg["telegram_extractor"]
# ── Optional enrichers / databases ────────────────────────────────────
class TestOptionalModules:
def test_screenshots_disabled_by_default(self):
with patch.dict(os.environ, _env(), clear=True):
cfg = build_config()
assert "screenshot_enricher" not in cfg["steps"]["enrichers"]
def test_screenshots_enabled(self):
with patch.dict(os.environ, _env(ENABLE_SCREENSHOTS="true"), clear=True):
cfg = build_config()
assert "screenshot_enricher" in cfg["steps"]["enrichers"]
assert cfg["screenshot_enricher"]["width"] == 1280
def test_thumbnails_enabled(self):
with patch.dict(os.environ, _env(ENABLE_THUMBNAILS="true"), clear=True):
cfg = build_config()
assert "thumbnail_enricher" in cfg["steps"]["enrichers"]
assert cfg["thumbnail_enricher"]["max_thumbnails"] == 16
def test_csv_db_enabled(self):
with patch.dict(os.environ, _env(ENABLE_CSV_DB="true"), clear=True):
cfg = build_config()
assert "csv_db" in cfg["steps"]["databases"]
assert cfg["csv_db"]["csv_file"] == "/app/local_archive/db.csv"
def test_case_insensitive_boolean(self):
with patch.dict(os.environ, _env(ENABLE_SCREENSHOTS="TRUE"), clear=True):
cfg = build_config()
assert "screenshot_enricher" in cfg["steps"]["enrichers"]
# ── Combined / full config ────────────────────────────────────────────
class TestCombinedConfig:
def test_all_optional_modules_together(self):
"""Enable everything at once and verify no conflicts."""
env = _env(
GSHEET_URL="https://example.com/sheet",
S3_BUCKET="bucket",
S3_KEY="key",
S3_SECRET="secret",
TELEGRAM_API_ID="123",
TELEGRAM_API_HASH="abc",
TELEGRAM_BOT_TOKEN="tok",
ENABLE_SCREENSHOTS="true",
ENABLE_THUMBNAILS="true",
ENABLE_CSV_DB="true",
)
with patch.dict(os.environ, env, clear=True):
cfg = build_config()
steps = cfg["steps"]
assert "gsheet_feeder" in steps["feeders"]
assert "telegram_extractor" in steps["extractors"]
assert "screenshot_enricher" in steps["enrichers"]
assert "thumbnail_enricher" in steps["enrichers"]
assert "csv_db" in steps["databases"]
assert "gsheet_db" in steps["databases"]
assert "s3_storage" in steps["storages"]
assert "local_storage" in steps["storages"]
# All module configs present
for key in [
"gsheet_feeder",
"s3_storage",
"telegram_extractor",
"screenshot_enricher",
"thumbnail_enricher",
"csv_db",
]:
assert key in cfg, f"{key} config missing"
def test_full_config_valid_yaml(self):
env = _env(
GSHEET_URL="https://example.com/sheet",
S3_BUCKET="bucket",
TELEGRAM_API_ID="123",
TELEGRAM_API_HASH="abc",
ENABLE_SCREENSHOTS="true",
ENABLE_CSV_DB="true",
)
with patch.dict(os.environ, env, clear=True):
cfg = build_config()
dumped = yaml.dump(cfg)
reloaded = yaml.safe_load(dumped)
assert reloaded == cfg
# ── main() writes file ───────────────────────────────────────────────
class TestMainFunction:
def test_main_writes_config_file(self, tmp_path):
config_path = tmp_path / "orchestration.yaml"
with patch.dict(os.environ, _env(), clear=True), patch("deploy.generate_config.CONFIG_PATH", config_path):
main()
assert config_path.exists()
cfg = yaml.safe_load(config_path.read_text())
assert cfg["steps"]["feeders"] == ["cli_feeder"]
def test_main_creates_parent_dirs(self, tmp_path):
config_path = tmp_path / "nested" / "dir" / "orchestration.yaml"
with patch.dict(os.environ, _env(), clear=True), patch("deploy.generate_config.CONFIG_PATH", config_path):
main()
assert config_path.exists()