mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Merge main
This commit is contained in:
@@ -1,15 +1,12 @@
|
||||
from datetime import datetime
|
||||
from typing import Type
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from auto_archiver.core import Metadata
|
||||
from auto_archiver.modules.instagram_api_extractor.instagram_api_extractor import InstagramAPIExtractor
|
||||
from .test_extractor_base import TestExtractorBase
|
||||
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_user_response():
|
||||
return {
|
||||
@@ -115,74 +112,74 @@ class TestInstagramAPIExtractor(TestExtractorBase):
|
||||
# test gets text (metadata title)
|
||||
pass
|
||||
|
||||
def test_download_profile_basic(self, metadata, mock_user_response):
|
||||
def test_download_profile_basic(self, metadata, mock_user_response, mocker):
|
||||
"""Test basic profile download without full_profile"""
|
||||
with patch.object(self.extractor, 'call_api') as mock_call, \
|
||||
patch.object(self.extractor, 'download_from_url') as mock_download:
|
||||
# Mock API responses
|
||||
mock_call.return_value = mock_user_response
|
||||
mock_download.return_value = "profile.jpg"
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_download = mocker.patch.object(self.extractor, 'download_from_url')
|
||||
# Mock API responses
|
||||
mock_call.return_value = mock_user_response
|
||||
mock_download.return_value = "profile.jpg"
|
||||
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.status == "insta profile: success"
|
||||
assert result.get_title() == "Test User"
|
||||
assert result.get("data") == self.extractor.cleanup_dict(mock_user_response["user"])
|
||||
# Verify profile picture download
|
||||
mock_call.assert_called_once_with("v2/user/by/username", {"username": "test_user"})
|
||||
mock_download.assert_called_once_with("http://example.com/profile.jpg")
|
||||
assert len(result.media) == 1
|
||||
assert result.media[0].filename == "profile.jpg"
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.status == "insta profile: success"
|
||||
assert result.get_title() == "Test User"
|
||||
assert result.get("data") == self.extractor.cleanup_dict(mock_user_response["user"])
|
||||
# Verify profile picture download
|
||||
mock_call.assert_called_once_with("v2/user/by/username", {"username": "test_user"})
|
||||
mock_download.assert_called_once_with("http://example.com/profile.jpg")
|
||||
assert len(result.media) == 1
|
||||
assert result.media[0].filename == "profile.jpg"
|
||||
|
||||
def test_download_profile_full(self, metadata, mock_user_response, mock_story_response):
|
||||
def test_download_profile_full(self, metadata, mock_user_response, mock_story_response, mocker):
|
||||
"""Test full profile download with stories/posts"""
|
||||
with patch.object(self.extractor, 'call_api') as mock_call, \
|
||||
patch.object(self.extractor, 'download_all_posts') as mock_posts, \
|
||||
patch.object(self.extractor, 'download_all_highlights') as mock_highlights, \
|
||||
patch.object(self.extractor, 'download_all_tagged') as mock_tagged, \
|
||||
patch.object(self.extractor, '_download_stories_reusable') as mock_stories:
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')
|
||||
mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
|
||||
mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
|
||||
mock_stories = mocker.patch.object(self.extractor, '_download_stories_reusable')
|
||||
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
mock_story_response
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_stories.return_value = mock_story_response
|
||||
mock_posts.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
mock_story_response
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_stories.return_value = mock_story_response
|
||||
mock_posts.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.get("#stories") == len(mock_story_response)
|
||||
mock_posts.assert_called_once_with(result, "123")
|
||||
assert "errors" not in result.metadata
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.get("#stories") == len(mock_story_response)
|
||||
mock_posts.assert_called_once_with(result, "123")
|
||||
assert "errors" not in result.metadata
|
||||
|
||||
def test_download_profile_not_found(self, metadata):
|
||||
def test_download_profile_not_found(self, metadata, mocker):
|
||||
"""Test profile not found error"""
|
||||
with patch.object(self.extractor, 'call_api') as mock_call:
|
||||
mock_call.return_value = {"user": None}
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
self.extractor.download_profile(metadata, "invalid_user")
|
||||
assert "User invalid_user not found" in str(exc_info.value)
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_call.return_value = {"user": None}
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
self.extractor.download_profile(metadata, "invalid_user")
|
||||
assert "User invalid_user not found" in str(exc_info.value)
|
||||
|
||||
def test_download_profile_error_handling(self, metadata, mock_user_response):
|
||||
def test_download_profile_error_handling(self, metadata, mock_user_response, mocker):
|
||||
"""Test error handling in full profile mode"""
|
||||
with (patch.object(self.extractor, 'call_api') as mock_call, \
|
||||
patch.object(self.extractor, 'download_all_highlights') as mock_highlights, \
|
||||
patch.object(self.extractor, 'download_all_tagged') as mock_tagged, \
|
||||
patch.object(self.extractor, '_download_stories_reusable') as stories_tagged, \
|
||||
patch.object(self.extractor, 'download_all_posts') as mock_posts
|
||||
):
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
Exception("Stories API failed"),
|
||||
Exception("Posts API failed")
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
stories_tagged.return_value = None
|
||||
mock_posts.return_value = None
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
|
||||
mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
|
||||
stories_tagged = mocker.patch.object(self.extractor, '_download_stories_reusable')
|
||||
mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')
|
||||
|
||||
assert result.is_success()
|
||||
assert "Error downloading stories for test_user" in result.metadata["errors"]
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
Exception("Stories API failed"),
|
||||
Exception("Posts API failed")
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
stories_tagged.return_value = None
|
||||
mock_posts.return_value = None
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
|
||||
assert result.is_success()
|
||||
assert "Error downloading stories for test_user" in result.metadata["errors"]
|
||||
@@ -1,94 +1,108 @@
|
||||
import os
|
||||
from typing import Type
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata
|
||||
from auto_archiver.core.extractor import Extractor
|
||||
from auto_archiver.modules.instagram_tbot_extractor import InstagramTbotExtractor
|
||||
from tests.extractors.test_extractor_base import TestExtractorBase
|
||||
|
||||
TESTFILES = os.path.join(os.path.dirname(__file__), "testfiles")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session_file(tmpdir):
|
||||
"""Fixture to create a test session file."""
|
||||
session_file = os.path.join(tmpdir, "test_session.session")
|
||||
with open(session_file, "w") as f:
|
||||
f.write("mock_session_data")
|
||||
return session_file.replace(".session", "")
|
||||
def patch_extractor_methods(request, setup_module, mocker):
|
||||
mocker.patch.object(InstagramTbotExtractor, '_prepare_session_file', return_value=None)
|
||||
mocker.patch.object(InstagramTbotExtractor, '_initialize_telegram_client', return_value=None)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def patch_extractor_methods(request, setup_module):
|
||||
with patch.object(InstagramTbotExtractor, '_prepare_session_file', return_value=None), \
|
||||
patch.object(InstagramTbotExtractor, '_initialize_telegram_client', return_value=None):
|
||||
if hasattr(request, 'cls') and hasattr(request.cls, 'config'):
|
||||
request.cls.extractor = setup_module("instagram_tbot_extractor", request.cls.config)
|
||||
|
||||
yield
|
||||
|
||||
@pytest.fixture
|
||||
def metadata_sample():
|
||||
m = Metadata()
|
||||
m.set_title("Test Title")
|
||||
m.set_timestamp("2021-01-01T00:00:00Z")
|
||||
m.set_timestamp("2021-01-01T00:00:00")
|
||||
m.set_url("https://www.instagram.com/p/1234567890")
|
||||
return m
|
||||
|
||||
|
||||
class TestInstagramTbotExtractor:
|
||||
@pytest.fixture
|
||||
def mock_telegram_client(mocker):
|
||||
"""Fixture to mock TelegramClient interactions."""
|
||||
mock_client = mocker.patch("auto_archiver.modules.instagram_tbot_extractor.client")
|
||||
instance = mocker.MagicMock()
|
||||
mock_client.return_value = instance
|
||||
return instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def extractor(setup_module, patch_extractor_methods, mocker):
|
||||
extractor_module = "instagram_tbot_extractor"
|
||||
extractor: InstagramTbotExtractor
|
||||
config = {
|
||||
"api_id": 12345,
|
||||
"api_hash": "test_api_hash",
|
||||
"session_file": "test_session",
|
||||
"timeout": 4
|
||||
}
|
||||
extractor = setup_module(extractor_module, config)
|
||||
extractor.client = mocker.MagicMock()
|
||||
extractor.session_file = "test_session"
|
||||
return extractor
|
||||
|
||||
|
||||
def test_non_instagram_url(extractor, metadata_sample):
|
||||
metadata_sample.set_url("https://www.youtube.com")
|
||||
assert extractor.download(metadata_sample) is False
|
||||
|
||||
|
||||
def test_download_success(extractor, metadata_sample, mocker):
|
||||
mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101))
|
||||
mocker.patch.object(extractor, "_process_messages", return_value="Sample Instagram post caption")
|
||||
result = extractor.download(metadata_sample)
|
||||
assert result.is_success()
|
||||
assert result.status == "insta-via-bot: success"
|
||||
assert result.metadata.get("title") == "Sample Instagram post caption"
|
||||
|
||||
|
||||
def test_download_invalid(extractor, metadata_sample, mocker):
|
||||
mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101))
|
||||
mocker.patch.object(extractor, "_process_messages", return_value="You must enter a URL to a post")
|
||||
assert extractor.download(metadata_sample) is False
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Requires authentication.")
|
||||
class TestInstagramTbotExtractorReal(TestExtractorBase):
|
||||
# To run these tests set the TELEGRAM_API_ID and TELEGRAM_API_HASH environment variables, and ensure the session file exists.
|
||||
# Note these are true at this point in time, but changes to source media could be reason for failure.
|
||||
extractor_module = "instagram_tbot_extractor"
|
||||
extractor: InstagramTbotExtractor
|
||||
config = {
|
||||
"api_id": os.environ.get("TELEGRAM_API_ID"),
|
||||
"api_hash": os.environ.get("TELEGRAM_API_HASH"),
|
||||
"session_file": "secrets/anon-insta",
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def mock_telegram_client(self):
|
||||
"""Fixture to mock TelegramClient interactions."""
|
||||
with patch("auto_archiver.modules.instagram_tbot_extractor._initialize_telegram_client") as mock_client:
|
||||
instance = MagicMock()
|
||||
mock_client.return_value = instance
|
||||
yield instance
|
||||
|
||||
def test_extractor_is_initialized(self):
|
||||
assert self.extractor is not None
|
||||
|
||||
|
||||
@patch("time.sleep")
|
||||
@pytest.mark.parametrize("url, expected_status, bot_responses", [
|
||||
("https://www.instagram.com/p/C4QgLbrIKXG", "insta-via-bot: success", [MagicMock(id=101, media=None, message="Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou")]),
|
||||
("https://www.instagram.com/reel/DEVLK8qoIbg/", "insta-via-bot: success", [MagicMock(id=101, media=None, message="Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol")]),
|
||||
# todo tbot not working for stories :(
|
||||
("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, [MagicMock(id=101, media=None, message="Media not found or unavailable")]),
|
||||
("https://www.youtube.com/watch?v=ymCMy8OffHM", False, []),
|
||||
("https://www.instagram.com/p/INVALID", False, [MagicMock(id=101, media=None, message="You must enter a URL to a post")]),
|
||||
@pytest.mark.parametrize("url, expected_status, message, len_media", [
|
||||
("https://www.instagram.com/p/C4QgLbrIKXG", "insta-via-bot: success",
|
||||
"Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou",
|
||||
6),
|
||||
("https://www.instagram.com/reel/DEVLK8qoIbg/", "insta-via-bot: success",
|
||||
"Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol",
|
||||
3),
|
||||
# instagram tbot not working (potentially intermittently?) for stories - replace with a live story to retest
|
||||
# ("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, "Media not found or unavailable"),
|
||||
# Seems to be working intermittently for highlights
|
||||
# ("https://www.instagram.com/stories/highlights/17868810693068139/", "insta-via-bot: success", None, 50),
|
||||
# Marking invalid url as success
|
||||
("https://www.instagram.com/p/INVALID", "insta-via-bot: success", "Media not found or unavailable", 0),
|
||||
("https://www.youtube.com/watch?v=ymCMy8OffHM", False, None, 0),
|
||||
])
|
||||
def test_download(self, mock_sleep, url, expected_status, bot_responses, metadata_sample):
|
||||
def test_download(self, url, expected_status, message, len_media, metadata_sample):
|
||||
"""Test the `download()` method with various Instagram URLs."""
|
||||
metadata_sample.set_url(url)
|
||||
self.extractor.client = MagicMock()
|
||||
|
||||
result = self.extractor.download(metadata_sample)
|
||||
pass
|
||||
# TODO fully mock or use as authenticated test
|
||||
# if expected_status:
|
||||
# assert result.is_success()
|
||||
# assert result.status == expected_status
|
||||
# assert result.metadata.get("title") in [msg.message[:128] for msg in bot_responses if msg.message]
|
||||
# else:
|
||||
# assert result is False
|
||||
|
||||
|
||||
|
||||
|
||||
# Test story
|
||||
# Test expired story
|
||||
# Test requires login/ access (?)
|
||||
# Test post
|
||||
# Test multiple images?
|
||||
if expected_status:
|
||||
assert result.is_success()
|
||||
assert result.status == expected_status
|
||||
assert result.metadata.get("title") == message
|
||||
assert len(result.media) == len_media
|
||||
else:
|
||||
assert result is False
|
||||
|
||||
Reference in New Issue
Block a user