mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 13:18:28 +03:00
Merge branch 'main' into tests/add_module_tests
This commit is contained in:
14
tests/data/test_service_account.json
Normal file
14
tests/data/test_service_account.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "some-project-id",
|
||||
"private_key_id": "some-private-key-id",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDPlcaFJgt7HzoC\n4z0b18PzI2R5c892mLnNwRO8DOKid5INt6z5RAWKDPdnIyHjRBx74qNZl6768pia\nztQNgnud7mKcmvOvGrpUbFx2BdAw8xTyAlRVMalOBhUS9RKvjP5WgSwR5EKwfvzy\nrGioC6ml/segz5EchSaIzgASwB17ir0w6IrymBxUeNelfzCGJpCRhqG5nG+eEjct\nUYU0QIyihRD1Lq0f3Z3D0xfTLLZ630iFBj/Wr0BCJHkl6hdVuGhnyn4S98sMX1Bd\ntaJF/lWi4jdt7SoXD3+FWv66kHPpFfINMpReuB9u0ogfYkORgiRBOMhYBkGGQjUG\nOnBTxEc3AgMBAAECgf9bKiK8DdSz0ALzQbRLhgj2B9485jHI49wjgINOyceZ23uS\nQYXaO+DFLcgLqBkVSGanuHMpU0+qCpeM0v9yXSTIW8RguWMnFd8ID/yLRktxfQa1\n1FAQh+NlF4/gnuUoM8N/FYSy6R5grfaxwU8Qfg66IQXUB52OezSVu5lxNO4G5Rwv\nJ2e/+XYBUv/H26BnQSmjFCzbJkdbtrOeThpaLwLexKcollvoHKGyus0jpWg4C9Ez\n9EJaE+on4nd+cM1Vd+dWaHXoZ9Db9IvxPBqFJE8fynap7RDBeZK678OuCvQntrp4\nrTsE9hW8073Jhl/LbhfbDC0lhFR0JUHygVGE01ECgYEA+g+ddpGGY90yhhM76bTr\nkU6WwislMmfS0WDdLPemNgzLwCtkC2vsQgzg/egxqkVF5dJ9upiFhVgpYxY7ap9U\nSGFemb6T1ASl/1yeNhd0yc4PZFsJ29k+kNgSIlJYm9KDCIMqS1wPoXvFQhbMitOf\n/gLCPugxl67c+qg6nfuODTkCgYEA1IPngESOJnV8oa2WReWrO6+u6xb/OhqdmBzI\n5yq1z3f5gb98XESZR/rCH2vAOmHIJPn3XdZHsznOuxhZwGr1oztiRIurLmBlxQoL\n7tq0jDOUVSD2yeyQwKt5LaBH94P598FiauGxXM4raREWKtcNBGoOX1u1+kEBsoL4\ntf10Z+8CgYEA3QFkB+ECR8y91KW3NAzEjj5JG/8J9wyv1IGpuQ5/hhG1Gni/CSEv\nRAkh6QaIrpZe+ooYuQwIJhwPKBYEGW4MDZSRCYzYFnCtTY5L/j6o55sJG4cipX3R\nwC5XiKIC0mUxjhpvDP+miPBdHNYNnT0AkH1btEF/YzIW+Coq9GnZ2HECgYAOOpax\ne+WYpZ0mphy9qVcBtA2eJ/gGx+ltWeAJuk5aCcpm6Y9GDkHFFAETYX+JaSqhbysk\n2UgLs/8nf8XioEa6GyvFMyTPAh1OSBHseDBGgt2XpZFgi7pVbCW87FJlPCzsbcJN\nLbdWY2d8rWwyihuRBBjaQaW5j8ixTxuf88xreQKBgQCST4Fr8C5CkpakTA+KOost\nLOlziUBm0534mTg7dTcOE1H1+gxtqpXlXcJylpGz1lUXRlHCIutN5iPJcN5cxFES\nsP7wBd7BhficsMKDiWPm9XbP2zXVZu0ldUxA1mONMsS1P4p7i3Dh4uzrRDmSkTUL\njUpppYDumg3oM7wSJ6sTQA==\n-----END PRIVATE KEY-----",
|
||||
"client_email": "some-email",
|
||||
"client_id": "some-client-email",
|
||||
"auth_uri": "https://example.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.example.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.example.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.example.com/robot/v1/metadata/x509/some-email",
|
||||
"universe_domain": "example.com"
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import pytest
|
||||
from auto_archiver.modules.generic_extractor.generic_extractor import GenericExtractor
|
||||
from .test_extractor_base import TestExtractorBase
|
||||
|
||||
CI=os.getenv("GITHUB_ACTIONS", '') == 'true'
|
||||
class TestGenericExtractor(TestExtractorBase):
|
||||
"""Tests Generic Extractor
|
||||
"""
|
||||
@@ -77,10 +78,11 @@ class TestGenericExtractor(TestExtractorBase):
|
||||
result = self.extractor.download(item)
|
||||
assert not result
|
||||
|
||||
|
||||
@pytest.mark.skipif(CI, reason="Currently no way to authenticate when on CI. Youtube (yt-dlp) doesn't support logging in with username/password.")
|
||||
@pytest.mark.download
|
||||
def test_youtube_download(self, make_item):
|
||||
# url https://www.youtube.com/watch?v=5qap5aO4i9A
|
||||
|
||||
item = make_item("https://www.youtube.com/watch?v=J---aiyznGQ")
|
||||
result = self.extractor.download(item)
|
||||
assert result.get_url() == "https://www.youtube.com/watch?v=J---aiyznGQ"
|
||||
@@ -114,6 +116,7 @@ class TestGenericExtractor(TestExtractorBase):
|
||||
result = self.extractor.download(item)
|
||||
assert result is not False
|
||||
|
||||
@pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
|
||||
@pytest.mark.download
|
||||
def test_truthsocial_download_video(self, make_item):
|
||||
item = make_item("https://truthsocial.com/@DaynaTrueman/posts/110602446619561579")
|
||||
@@ -121,18 +124,21 @@ class TestGenericExtractor(TestExtractorBase):
|
||||
assert len(result.media) == 1
|
||||
assert result is not False
|
||||
|
||||
@pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
|
||||
@pytest.mark.download
|
||||
def test_truthsocial_download_no_media(self, make_item):
|
||||
item = make_item("https://truthsocial.com/@bbcnewa/posts/109598702184774628")
|
||||
result = self.extractor.download(item)
|
||||
assert result is not False
|
||||
|
||||
@pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
|
||||
@pytest.mark.download
|
||||
def test_truthsocial_download_poll(self, make_item):
|
||||
item = make_item("https://truthsocial.com/@CNN_US/posts/113724326568555098")
|
||||
result = self.extractor.download(item)
|
||||
assert result is not False
|
||||
|
||||
@pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
|
||||
@pytest.mark.download
|
||||
def test_truthsocial_download_single_image(self, make_item):
|
||||
item = make_item("https://truthsocial.com/@mariabartiromo/posts/113861116433335006")
|
||||
@@ -140,6 +146,7 @@ class TestGenericExtractor(TestExtractorBase):
|
||||
assert len(result.media) == 1
|
||||
assert result is not False
|
||||
|
||||
@pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
|
||||
@pytest.mark.download
|
||||
def test_truthsocial_download_multiple_images(self, make_item):
|
||||
item = make_item("https://truthsocial.com/@trrth/posts/113861302149349135")
|
||||
|
||||
@@ -34,7 +34,7 @@ class TestTwitterApiExtractor(TestExtractorBase):
|
||||
|
||||
@pytest.mark.download
|
||||
def test_sanitize_url_download(self):
|
||||
assert "https://t.co/yl3oOJatFp" == self.extractor.sanitize_url("https://www.bellingcat.com/category/resources/")
|
||||
assert "https://www.bellingcat.com/category/resources/" == self.extractor.sanitize_url("https://t.co/yl3oOJatFp")
|
||||
|
||||
@pytest.mark.parametrize("url, exptected_username, exptected_tweetid", [
|
||||
("https://twitter.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
|
||||
|
||||
@@ -60,3 +60,15 @@ def test_run_auto_archiver_empty_file(caplog, autoarchiver, orchestration_file):
|
||||
|
||||
# should treat an empty file as if there is no file at all
|
||||
assert " No URLs provided. Please provide at least one URL via the com" in caplog.text
|
||||
|
||||
def test_call_autoarchiver_main(caplog, monkeypatch, tmp_path):
|
||||
from auto_archiver.__main__ import main
|
||||
|
||||
# monkey patch to change the current working directory, so that we don't use the user's real config file
|
||||
monkeypatch.chdir(tmp_path)
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(sys, "argv", ["auto-archiver"])
|
||||
with pytest.raises(SystemExit):
|
||||
main()
|
||||
|
||||
assert "No URLs provided. Please provide at least one" in caplog.text
|
||||
@@ -75,18 +75,36 @@ def test_help(orchestrator, basic_parser, capsys):
|
||||
orchestrator.show_help(args)
|
||||
|
||||
assert exit_error.value.code == 0
|
||||
assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in capsys.readouterr().out
|
||||
|
||||
logs = capsys.readouterr().out
|
||||
assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in logs
|
||||
|
||||
# basic config options
|
||||
assert "--version" in logs
|
||||
|
||||
# setting modules options
|
||||
assert "--feeders" in logs
|
||||
assert "--extractors" in logs
|
||||
|
||||
# authentication options
|
||||
assert "--authentication" in logs
|
||||
|
||||
# logging options
|
||||
assert "--logging.level" in logs
|
||||
|
||||
# individual module configs
|
||||
assert "--gsheet_feeder.sheet_id" in logs
|
||||
|
||||
|
||||
def test_add_custom_modules_path(orchestrator, test_args):
|
||||
orchestrator.run(test_args)
|
||||
orchestrator.setup_config(test_args)
|
||||
|
||||
import auto_archiver
|
||||
assert "tests/data/test_modules/" in auto_archiver.modules.__path__
|
||||
|
||||
def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):
|
||||
|
||||
orchestrator.run(test_args + # we still need to load the real path to get the example_module
|
||||
orchestrator.setup_config(test_args + # we still need to load the real path to get the example_module
|
||||
["--module_paths", "tests/data/invalid_test_modules/"])
|
||||
|
||||
assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."
|
||||
@@ -97,7 +115,7 @@ def test_check_required_values(orchestrator, caplog, test_args):
|
||||
test_args = test_args[:-2]
|
||||
|
||||
with pytest.raises(SystemExit) as exit_error:
|
||||
orchestrator.run(test_args)
|
||||
config = orchestrator.setup_config(test_args)
|
||||
|
||||
assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"
|
||||
|
||||
@@ -111,24 +129,50 @@ def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
|
||||
store_yaml(test_yaml, tmp_file)
|
||||
|
||||
# run the orchestrator
|
||||
orchestrator.run(["--config", tmp_file, "--module_paths", TEST_MODULES])
|
||||
assert orchestrator.config is not None
|
||||
config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
|
||||
assert config is not None
|
||||
|
||||
def test_load_authentication_string(orchestrator, test_args):
|
||||
|
||||
orchestrator.run(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
|
||||
assert orchestrator.config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
|
||||
config = orchestrator.setup_config(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
|
||||
assert config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
|
||||
|
||||
def test_load_authentication_string_concat_site(orchestrator, test_args):
|
||||
|
||||
orchestrator.run(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
|
||||
assert orchestrator.config['authentication'] == {"x.com": {"api_key": "my_key"},
|
||||
config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
|
||||
assert config['authentication'] == {"x.com": {"api_key": "my_key"},
|
||||
"twitter.com": {"api_key": "my_key"}}
|
||||
|
||||
def test_load_invalid_authentication_string(orchestrator, test_args):
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
orchestrator.run(test_args + ["--authentication", "{\''invalid_json"])
|
||||
orchestrator.setup_config(test_args + ["--authentication", "{\''invalid_json"])
|
||||
|
||||
def test_load_authentication_invalid_dict(orchestrator, test_args):
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
orchestrator.run(test_args + ["--authentication", "[true, false]"])
|
||||
orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])
|
||||
|
||||
def test_load_modules_from_commandline(orchestrator, test_args):
|
||||
args = test_args + ["--feeders", "example_module", "--extractors", "example_module", "--databases", "example_module", "--enrichers", "example_module", "--formatters", "example_module"]
|
||||
|
||||
orchestrator.setup(args)
|
||||
|
||||
assert len(orchestrator.feeders) == 1
|
||||
assert len(orchestrator.extractors) == 1
|
||||
assert len(orchestrator.databases) == 1
|
||||
assert len(orchestrator.enrichers) == 1
|
||||
assert len(orchestrator.formatters) == 1
|
||||
|
||||
assert orchestrator.feeders[0].name == "example_module"
|
||||
assert orchestrator.extractors[0].name == "example_module"
|
||||
assert orchestrator.databases[0].name == "example_module"
|
||||
assert orchestrator.enrichers[0].name == "example_module"
|
||||
assert orchestrator.formatters[0].name == "example_module"
|
||||
|
||||
def test_load_settings_for_module_from_commandline(orchestrator, test_args):
|
||||
args = test_args + ["--feeders", "gsheet_feeder", "--gsheet_feeder.sheet_id", "123", "--gsheet_feeder.service_account", "tests/data/test_service_account.json"]
|
||||
|
||||
orchestrator.setup(args)
|
||||
|
||||
assert len(orchestrator.feeders) == 1
|
||||
assert orchestrator.feeders[0].name == "gsheet_feeder"
|
||||
assert orchestrator.config['gsheet_feeder']['sheet_id'] == "123"
|
||||
Reference in New Issue
Block a user