Various fixes for issues with new architecture (#208)

* Add formatters to the TOC - fixes #204

* Add 'steps' settings to the example YAML in the docs. Fixes #206

* Improved docs on authentication architecture

* Fix setting modules on the command line - they now override any module settings in the orchestration as opposed to appending

* Fix tests for gsheet-feeder: add a test service_account.json (note: not real keys in there)

* Rename the command line entrypoint to _command_line_run

Also: make it clear that code implementation should not call this
Make sure the command line entry returns (we don't want a generator)

* Fix unit tests to use now code-entry points

* Version bump

* Move iterating of generator up to __main__

* Breakpoint

* two minor fixes

* Fix unit tests + add new '__main__' entry point implementation test

* Skip youtube tests if running on CI. Should still run them locally

* Fix full implementation run on GH actions

* Fix skipif test for GH Actions CI

* Add skipifs for truth - it blocks GH:

---------

Co-authored-by: msramalho <19508417+msramalho@users.noreply.github.com>
This commit is contained in:
Patrick Robertson
2025-02-18 19:10:09 +00:00
committed by GitHub
parent 6d43bc7d4d
commit 3c543a3a6a
18 changed files with 314 additions and 84 deletions

View File

@@ -75,18 +75,36 @@ def test_help(orchestrator, basic_parser, capsys):
orchestrator.show_help(args)
assert exit_error.value.code == 0
assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in capsys.readouterr().out
logs = capsys.readouterr().out
assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in logs
# basic config options
assert "--version" in logs
# setting modules options
assert "--feeders" in logs
assert "--extractors" in logs
# authentication options
assert "--authentication" in logs
# logging options
assert "--logging.level" in logs
# individual module configs
assert "--gsheet_feeder.sheet_id" in logs
def test_add_custom_modules_path(orchestrator, test_args):
orchestrator.run(test_args)
orchestrator.setup_config(test_args)
import auto_archiver
assert "tests/data/test_modules/" in auto_archiver.modules.__path__
def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):
orchestrator.run(test_args + # we still need to load the real path to get the example_module
orchestrator.setup_config(test_args + # we still need to load the real path to get the example_module
["--module_paths", "tests/data/invalid_test_modules/"])
assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."
@@ -97,7 +115,7 @@ def test_check_required_values(orchestrator, caplog, test_args):
test_args = test_args[:-2]
with pytest.raises(SystemExit) as exit_error:
orchestrator.run(test_args)
config = orchestrator.setup_config(test_args)
assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"
@@ -111,24 +129,50 @@ def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
store_yaml(test_yaml, tmp_file)
# run the orchestrator
orchestrator.run(["--config", tmp_file, "--module_paths", TEST_MODULES])
assert orchestrator.config is not None
config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
assert config is not None
def test_load_authentication_string(orchestrator, test_args):
orchestrator.run(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
assert orchestrator.config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
config = orchestrator.setup_config(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
assert config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
def test_load_authentication_string_concat_site(orchestrator, test_args):
orchestrator.run(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
assert orchestrator.config['authentication'] == {"x.com": {"api_key": "my_key"},
config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
assert config['authentication'] == {"x.com": {"api_key": "my_key"},
"twitter.com": {"api_key": "my_key"}}
def test_load_invalid_authentication_string(orchestrator, test_args):
with pytest.raises(ArgumentTypeError):
orchestrator.run(test_args + ["--authentication", "{\''invalid_json"])
orchestrator.setup_config(test_args + ["--authentication", "{\''invalid_json"])
def test_load_authentication_invalid_dict(orchestrator, test_args):
with pytest.raises(ArgumentTypeError):
orchestrator.run(test_args + ["--authentication", "[true, false]"])
orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])
def test_load_modules_from_commandline(orchestrator, test_args):
args = test_args + ["--feeders", "example_module", "--extractors", "example_module", "--databases", "example_module", "--enrichers", "example_module", "--formatters", "example_module"]
orchestrator.setup(args)
assert len(orchestrator.feeders) == 1
assert len(orchestrator.extractors) == 1
assert len(orchestrator.databases) == 1
assert len(orchestrator.enrichers) == 1
assert len(orchestrator.formatters) == 1
assert orchestrator.feeders[0].name == "example_module"
assert orchestrator.extractors[0].name == "example_module"
assert orchestrator.databases[0].name == "example_module"
assert orchestrator.enrichers[0].name == "example_module"
assert orchestrator.formatters[0].name == "example_module"
def test_load_settings_for_module_from_commandline(orchestrator, test_args):
args = test_args + ["--feeders", "gsheet_feeder", "--gsheet_feeder.sheet_id", "123", "--gsheet_feeder.service_account", "tests/data/test_service_account.json"]
orchestrator.setup(args)
assert len(orchestrator.feeders) == 1
assert orchestrator.feeders[0].name == "gsheet_feeder"
assert orchestrator.config['gsheet_feeder']['sheet_id'] == "123"