Various fixes for issues with new architecture (#208)

* Add formatters to the TOC - fixes #204 * Add 'steps' settings to the example YAML in the docs. Fixes #206 * Improved docs on authentication architecture * Fix setting modules on the command line - they now override any module settings in the orchestration as opposed to appending * Fix tests for gsheet-feeder: add a test service_account.json (note: not real keys in there) * Rename the command line entrypoint to _command_line_run Also: make it clear that code implementation should not call this Make sure the command line entry returns (we don't want a generator) * Fix unit tests to use now code-entry points * Version bump * Move iterating of generator up to __main__ * Breakpoint * two minor fixes * Fix unit tests + add new '__main__' entry point implementation test * Skip youtube tests if running on CI. Should still run them locally * Fix full implementation run on GH actions * Fix skipif test for GH Actions CI * Add skipifs for truth - it blocks GH: --------- Co-authored-by: msramalho <19508417+msramalho@users.noreply.github.com>
2026-06-08 03:18:28 +03:00 · 2025-02-18 19:10:09 +00:00
parent 6d43bc7d4d
commit 3c543a3a6a
18 changed files with 314 additions and 84 deletions
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -75,18 +75,36 @@ def test_help(orchestrator, basic_parser, capsys):
        orchestrator.show_help(args)

    assert exit_error.value.code == 0
-    assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in capsys.readouterr().out
+
+    logs = capsys.readouterr().out
+    assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in logs
+
+    # basic config options
+    assert "--version" in logs
+
+    # setting modules options
+    assert "--feeders" in logs
+    assert "--extractors" in logs
+
+    # authentication options
+    assert "--authentication" in logs
+
+    # logging options
+    assert "--logging.level" in logs
+
+    # individual module configs
+    assert "--gsheet_feeder.sheet_id" in logs


 def test_add_custom_modules_path(orchestrator, test_args):
-    orchestrator.run(test_args)
+    orchestrator.setup_config(test_args)
    
    import auto_archiver
    assert "tests/data/test_modules/" in auto_archiver.modules.__path__

 def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):

-    orchestrator.run(test_args +  # we still need to load the real path to get the example_module 
+    orchestrator.setup_config(test_args +  # we still need to load the real path to get the example_module 
                          ["--module_paths", "tests/data/invalid_test_modules/"])

    assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."
@@ -97,7 +115,7 @@ def test_check_required_values(orchestrator, caplog, test_args):
    test_args = test_args[:-2]

    with pytest.raises(SystemExit) as exit_error:
-        orchestrator.run(test_args)
+        config = orchestrator.setup_config(test_args)

    assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"

@@ -111,24 +129,50 @@ def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
    store_yaml(test_yaml, tmp_file)

    # run the orchestrator
-    orchestrator.run(["--config", tmp_file, "--module_paths", TEST_MODULES])
-    assert orchestrator.config is not None
+    config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
+    assert config is not None

 def test_load_authentication_string(orchestrator, test_args):

-    orchestrator.run(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
-    assert orchestrator.config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
+    config = orchestrator.setup_config(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
+    assert config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}

 def test_load_authentication_string_concat_site(orchestrator, test_args):
    
-    orchestrator.run(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
-    assert orchestrator.config['authentication'] == {"x.com": {"api_key": "my_key"},
+    config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
+    assert config['authentication'] == {"x.com": {"api_key": "my_key"},
                                                     "twitter.com": {"api_key": "my_key"}}

 def test_load_invalid_authentication_string(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
-        orchestrator.run(test_args + ["--authentication", "{\''invalid_json"])
+        orchestrator.setup_config(test_args + ["--authentication", "{\''invalid_json"])

 def test_load_authentication_invalid_dict(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
-        orchestrator.run(test_args + ["--authentication", "[true, false]"])
+        orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])
+
+def test_load_modules_from_commandline(orchestrator, test_args):
+    args = test_args + ["--feeders", "example_module", "--extractors", "example_module", "--databases", "example_module", "--enrichers", "example_module", "--formatters", "example_module"]
+
+    orchestrator.setup(args)
+
+    assert len(orchestrator.feeders) == 1
+    assert len(orchestrator.extractors) == 1
+    assert len(orchestrator.databases) == 1
+    assert len(orchestrator.enrichers) == 1
+    assert len(orchestrator.formatters) == 1
+
+    assert orchestrator.feeders[0].name == "example_module"
+    assert orchestrator.extractors[0].name == "example_module"
+    assert orchestrator.databases[0].name == "example_module"
+    assert orchestrator.enrichers[0].name == "example_module"
+    assert orchestrator.formatters[0].name == "example_module"
+
+def test_load_settings_for_module_from_commandline(orchestrator, test_args):
+    args = test_args + ["--feeders", "gsheet_feeder", "--gsheet_feeder.sheet_id", "123", "--gsheet_feeder.service_account", "tests/data/test_service_account.json"]
+
+    orchestrator.setup(args)
+
+    assert len(orchestrator.feeders) == 1
+    assert orchestrator.feeders[0].name == "gsheet_feeder"
+    assert orchestrator.config['gsheet_feeder']['sheet_id'] == "123"