From add83c96500171ad61f5808d82ee4bdab6b7969a Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Tue, 7 Jan 2025 19:40:19 +0100
Subject: [PATCH] Remove snscrape from twitter_archiver

1. snscrape twitter downloader no longer works (ref: https://github.com/JustAnotherArchivist/snscrape/issues/1045)
2. snscrape limits python to versions <3.12
---
 .../archivers/twitter_archiver.py             | 42 +------------------
 1 file changed, 1 insertion(+), 41 deletions(-)

diff --git a/src/auto_archiver/archivers/twitter_archiver.py b/src/auto_archiver/archivers/twitter_archiver.py
index 6735488..1a356ca 100644
--- a/src/auto_archiver/archivers/twitter_archiver.py
+++ b/src/auto_archiver/archivers/twitter_archiver.py
@@ -2,7 +2,6 @@ import re, requests, mimetypes, json
 from typing import Union
 from datetime import datetime
 from loguru import logger
-from snscrape.modules.twitter import TwitterTweetScraper, Video, Gif, Photo
 from yt_dlp import YoutubeDL
 from yt_dlp.extractor.twitter import TwitterIE
 from slugify import slugify
@@ -49,7 +48,7 @@ class TwitterArchiver(Archiver):
         username, tweet_id = self.get_username_tweet_id(url)
         if not username: return False
 
-        strategies = [self.download_yt_dlp, self.download_snscrape, self.download_syndication]
+        strategies = [self.download_yt_dlp, self.download_syndication]
         for strategy in strategies:
             logger.debug(f"Trying {strategy.__name__} for {url=}")
             try:
@@ -61,45 +60,6 @@ class TwitterArchiver(Archiver):
         logger.warning(f"No free strategy worked for {url}")
         return False
 
-        
-    def download_snscrape(self, item: Metadata, url: str, tweet_id: str) -> Union[Metadata|bool]:
-        scr = TwitterTweetScraper(tweet_id)
-        try:
-            tweet = next(scr.get_items())
-        except Exception as ex:
-            logger.warning(f"SNSCRAPE FAILED, can't get tweet: {type(ex).__name__} occurred. args: {ex.args}")
-            return False
-        
-        result = Metadata()
-        result.set_title(tweet.content).set_content(tweet.json()).set_timestamp(tweet.date)
-        if tweet.media is None:
-            logger.debug(f'No media found, archiving tweet text only')
-            return result
-
-        for i, tweet_media in enumerate(tweet.media):
-            media = Media(filename="")
-            mimetype = ""
-            if type(tweet_media) == Video:
-                variant = max(
-                    [v for v in tweet_media.variants if v.bitrate], key=lambda v: v.bitrate)
-                media.set("src", variant.url).set("duration", tweet_media.duration)
-                mimetype = variant.contentType
-            elif type(tweet_media) == Gif:
-                variant = tweet_media.variants[0]
-                media.set("src", variant.url)
-                mimetype = variant.contentType
-            elif type(tweet_media) == Photo:
-                media.set("src", UrlUtil.twitter_best_quality_url(tweet_media.fullUrl))
-                mimetype = "image/jpeg"
-            else:
-                logger.warning(f"Could not get media URL of {tweet_media}")
-                continue
-            ext = mimetypes.guess_extension(mimetype)
-            media.filename = self.download_from_url(media.get("src"), f'{slugify(url)}_{i}{ext}')
-            result.add_media(media)
-
-        return result.success("twitter-snscrape")
-
     def download_syndication(self, item: Metadata, url: str, tweet_id: str) -> Union[Metadata|bool]:
         """
         Hack alternative working again.