diff --git a/src/auto_archiver/archivers/twitter_archiver.py b/src/auto_archiver/archivers/twitter_archiver.py index 5f9a6b5..d7fff39 100644 --- a/src/auto_archiver/archivers/twitter_archiver.py +++ b/src/auto_archiver/archivers/twitter_archiver.py @@ -108,9 +108,11 @@ class TwitterArchiver(Archiver): tweet = tie._extract_status(tweet_id) result = Metadata() try: + if not tweet.get("user") or not tweet.get("created_at"): + raise ValueError(f"Error retreiving post with id {tweet_id}. Are you sure it exists?") timestamp = datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y") - except Exception as ex: - logger.warning(f"Failed to get timestamp: {type(ex).__name__} occurred. args: {ex.args}") + except (ValueError, KeyError) as ex: + logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}") return False result\ diff --git a/tests/archivers/test_twitter_archiver.py b/tests/archivers/test_twitter_archiver.py index e7e015c..a4aaa22 100644 --- a/tests/archivers/test_twitter_archiver.py +++ b/tests/archivers/test_twitter_archiver.py @@ -38,7 +38,6 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): test_url = "https://www.bellingcat.com/category/resources/?s=20&t=3d0g4ZQis7dCbSDg-mE7-w" self.assertEqual(test_url, self.archiver.sanitize_url(test_url)) - def test_get_username_tweet_id_from_url(self): # test valid twitter URL @@ -70,8 +69,18 @@ class TestTwitterArchiver(TestArchiverBase, unittest.TestCase): "As 2024 comes to a close, here’s some examples of what Bellingcat investigated per month in our 10th year! 🧵", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc) ) - breakpoint() + + def test_download_nonexistend_tweet(self): + # this tweet does not exist + url = "https://x.com/Bellingcat/status/17197025860711058" + response = self.archiver.download(self.create_item(url)) + self.assertFalse(response) + def test_download_malformed_tweetid(self): + # this tweet does not exist + url = "https://x.com/Bellingcat/status/1719702586071100058" + response = self.archiver.download(self.create_item(url)) + self.assertFalse(response) def test_download_media_with_images(self): # url https://twitter.com/MeCookieMonster/status/1617921633456640001?s=20&t=3d0g4ZQis7dCbSDg-mE7-w