diff --git a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py index 81d2bf6..f63f2c1 100644 --- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py +++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py @@ -88,6 +88,9 @@ class InstagramTbotExtractor(Extractor): if message: result.set_content(message).set_title(message[:128]) + elif result.is_empty(): + logger.debug(f"No media found for link {url=} for {self.name}: {message}") + return False return result.success("insta-via-bot") def _send_url_to_bot(self, url: str): @@ -104,13 +107,13 @@ class InstagramTbotExtractor(Extractor): message = "" time.sleep(3) # media is added before text by the bot so it can be used as a stop-logic mechanism - while attempts < max(self.timeout - 3, 3) and (not message or not len(seen_media)): + while attempts < max(self.timeout - 3, 15) and (not message or not len(seen_media)): attempts += 1 time.sleep(1) for post in self.client.iter_messages(chat, min_id=since_id): since_id = max(since_id, post.id) # Skip known filler message: - if post.message == "The bot receives information through https://hikerapi.com/p/hJqpppqi": + if "The bot receives information through https://hikerapi.com/" in post.message: continue if post.media and post.id not in seen_media: filename_dest = os.path.join(tmp_dir, f"{chat.id}_{post.id}") diff --git a/tests/extractors/test_instagram_tbot_extractor.py b/tests/extractors/test_instagram_tbot_extractor.py index 47a4bec..7652038 100644 --- a/tests/extractors/test_instagram_tbot_extractor.py +++ b/tests/extractors/test_instagram_tbot_extractor.py @@ -68,6 +68,12 @@ def test_download_invalid(extractor, metadata_sample, mocker): assert extractor.download(metadata_sample) is False +def test_fails_with_empty_response(extractor, metadata_sample, mocker): + mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101)) + mocker.patch.object(extractor, "_process_messages", return_value="") + assert extractor.download(metadata_sample) is False + + @pytest.mark.skip(reason="Requires authentication.") class TestInstagramTbotExtractorReal(TestExtractorBase): # To run these tests set the TELEGRAM_API_ID and TELEGRAM_API_HASH environment variables, and ensure the session file exists.