Ruff format with defaults.

2026-06-12 13:18:28 +03:00 · 2025-03-10 18:44:54 +00:00
parent cbb0414e5f
commit 85abe1837a
155 changed files with 2539 additions and 1908 deletions
--- a/src/auto_archiver/modules/generic_extractor/init.py
+++ b/src/auto_archiver/modules/generic_extractor/init.py
@@ -1 +1 @@
-from .generic_extractor import GenericExtractor
+from .generic_extractor import GenericExtractor
--- a/src/auto_archiver/modules/generic_extractor/bluesky.py
+++ b/src/auto_archiver/modules/generic_extractor/bluesky.py
@@ -4,15 +4,16 @@ from auto_archiver.core.extractor import Extractor
 from auto_archiver.core.metadata import Metadata, Media
 from .dropin import GenericDropin, InfoExtractor

-class Bluesky(GenericDropin):

+class Bluesky(GenericDropin):
    def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
        result = Metadata()
        result.set_url(url)
        result.set_title(post["record"]["text"])
        result.set_timestamp(post["record"]["createdAt"])
        for k, v in self._get_post_data(post).items():
-            if v: result.set(k, v)
+            if v:
+                result.set(k, v)

        # download if embeds present (1 video XOR >=1 images)
        for media in self._download_bsky_embeds(post, archiver):
@@ -23,12 +24,12 @@ class Bluesky(GenericDropin):

    def extract_post(self, url: str, ie_instance: InfoExtractor) -> dict:
        # TODO: If/when this PR (https://github.com/yt-dlp/yt-dlp/pull/12098) is merged on ytdlp, remove the comments and delete the code below
-        handle, video_id = ie_instance._match_valid_url(url).group('handle', 'id')
+        handle, video_id = ie_instance._match_valid_url(url).group("handle", "id")
        return ie_instance._extract_post(handle=handle, post_id=video_id)

    def _download_bsky_embeds(self, post: dict, archiver: Extractor) -> list[Media]:
        """
-        Iterates over image(s) or video in a Bluesky post and downloads them        
+        Iterates over image(s) or video in a Bluesky post and downloads them
        """
        media = []
        embed = post.get("record", {}).get("embed", {})
@@ -37,16 +38,15 @@ class Bluesky(GenericDropin):

        media_url = "https://bsky.social/xrpc/com.atproto.sync.getBlob?cid={}&did={}"
        for image_media in image_medias:
-            url = media_url.format(image_media['image']['ref']['$link'], post['author']['did'])
+            url = media_url.format(image_media["image"]["ref"]["$link"], post["author"]["did"])
            image_media = archiver.download_from_url(url)
            media.append(Media(image_media))
        for video_media in video_medias:
-            url = media_url.format(video_media['ref']['$link'], post['author']['did'])
+            url = media_url.format(video_media["ref"]["$link"], post["author"]["did"])
            video_media = archiver.download_from_url(url)
            media.append(Media(video_media))
        return media

-
    def _get_post_data(self, post: dict) -> dict:
        """
        Extracts relevant information returned by the .getPostThread api call (excluding text/created_at): author, mentions, tags, links.
@@ -74,4 +74,4 @@ class Bluesky(GenericDropin):
            res["tags"] = tags
        if links:
            res["links"] = links
-        return res
+        return res
--- a/src/auto_archiver/modules/generic_extractor/dropin.py
+++ b/src/auto_archiver/modules/generic_extractor/dropin.py
@@ -2,11 +2,12 @@ from yt_dlp.extractor.common import InfoExtractor
 from auto_archiver.core.metadata import Metadata
 from auto_archiver.core.extractor import Extractor

+
 class GenericDropin:
    """Base class for dropins for the generic extractor.
-    
+
    In many instances, an extractor will exist in ytdlp, but it will only process videos.
-    Dropins can be created and used to make use of the already-written private code of a 
+    Dropins can be created and used to make use of the already-written private code of a
    specific extractor from ytdlp.

    The dropin should be able to handle the following methods:
@@ -28,21 +29,19 @@ class GenericDropin:
        This method should return the post data from the url.
        """
        raise NotImplementedError("This method should be implemented in the subclass")
-    

    def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
        """
        This method should create a Metadata object from the post data.
        """
        raise NotImplementedError("This method should be implemented in the subclass")
-    

    def skip_ytdlp_download(self, url: str, ie_instance: InfoExtractor):
        """
        This method should return True if you want to skip the ytdlp download method.
        """
        return False
-    
+
    def keys_to_clean(self, video_data: dict, info_extractor: InfoExtractor):
        """
        This method should return a list of strings (keys) to clean from the video_data dict.
@@ -50,9 +49,9 @@ class GenericDropin:
        E.g. ["uploader", "uploader_id", "tiktok_specific_field"]
        """
        return []
-    
+
    def download_additional_media(self, video_data: dict, info_extractor: InfoExtractor, metadata: Metadata):
        """
        This method should download any additional media from the post.
        """
-        return metadata
+        return metadata
--- a/src/auto_archiver/modules/generic_extractor/facebook.py
+++ b/src/auto_archiver/modules/generic_extractor/facebook.py
@@ -3,16 +3,15 @@ from .dropin import GenericDropin

 class Facebook(GenericDropin):
    def extract_post(self, url: str, ie_instance):
-        video_id = ie_instance._match_valid_url(url).group('id')
-        ie_instance._download_webpage(
-            url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
-        webpage = ie_instance._download_webpage(url, ie_instance._match_valid_url(url).group('id'))
+        video_id = ie_instance._match_valid_url(url).group("id")
+        ie_instance._download_webpage(url.replace("://m.facebook.com/", "://www.facebook.com/"), video_id)
+        webpage = ie_instance._download_webpage(url, ie_instance._match_valid_url(url).group("id"))

        # TODO: fix once https://github.com/yt-dlp/yt-dlp/pull/12275 is merged
        post_data = ie_instance._extract_metadata(webpage)
        return post_data
-    
+
    def create_metadata(self, post: dict, ie_instance, archiver, url):
        metadata = archiver.create_metadata(url)
-        metadata.set_title(post.get('title')).set_content(post.get('description')).set_post_data(post)
-        return metadata
+        metadata.set_title(post.get("title")).set_content(post.get("description")).set_post_data(post)
+        return metadata
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -12,6 +12,7 @@ from loguru import logger
 from auto_archiver.core.extractor import Extractor
 from auto_archiver.core import Metadata, Media

+
 class GenericExtractor(Extractor):
    _dropins = {}

@@ -19,14 +20,14 @@ class GenericExtractor(Extractor):
        # check for file .ytdlp-update in the secrets folder
        if self.ytdlp_update_interval < 0:
            return
-        
-        use_secrets = os.path.exists('secrets')
-        path = os.path.join('secrets' if use_secrets else '', '.ytdlp-update')
+
+        use_secrets = os.path.exists("secrets")
+        path = os.path.join("secrets" if use_secrets else "", ".ytdlp-update")
        next_update_check = None
        if os.path.exists(path):
            with open(path, "r") as f:
                next_update_check = datetime.datetime.fromisoformat(f.read())
-        
+
        if not next_update_check or next_update_check < datetime.datetime.now():
            self.update_ytdlp()

@@ -36,8 +37,11 @@ class GenericExtractor(Extractor):

    def update_ytdlp(self):
        logger.info("Checking and updating yt-dlp...")
-        logger.info(f"Tip: change the 'ytdlp_update_interval' setting to control how often yt-dlp is updated. Set to -1 to disable or 0 to enable on every run. Current setting: {self.ytdlp_update_interval}")
+        logger.info(
+            f"Tip: change the 'ytdlp_update_interval' setting to control how often yt-dlp is updated. Set to -1 to disable or 0 to enable on every run. Current setting: {self.ytdlp_update_interval}"
+        )
        from importlib.metadata import version as get_version
+
        old_version = get_version("yt-dlp")
        try:
            # try and update with pip (this works inside poetry environment and in a normal virtualenv)
@@ -59,15 +63,17 @@ class GenericExtractor(Extractor):
        for info_extractor in yt_dlp.YoutubeDL()._ies.values():
            if info_extractor.suitable(url) and info_extractor.working():
                yield info_extractor
-        
+
    def suitable(self, url: str) -> bool:
        """
        Checks for valid URLs out of all ytdlp extractors.
        Returns False for the GenericIE, which as labelled by yt-dlp: 'Generic downloader that works on some sites'
        """
        return any(self.suitable_extractors(url))
-    
-    def download_additional_media(self, video_data: dict, info_extractor: InfoExtractor, metadata: Metadata) -> Metadata:
+
+    def download_additional_media(
+        self, video_data: dict, info_extractor: InfoExtractor, metadata: Metadata
+    ) -> Metadata:
        """
        Downloads additional media like images, comments, subtitles, etc.

@@ -76,7 +82,7 @@ class GenericExtractor(Extractor):

        # Just get the main thumbnail. More thumbnails are available in
        # video_data['thumbnails'] should they be required
-        thumbnail_url = video_data.get('thumbnail')
+        thumbnail_url = video_data.get("thumbnail")
        if thumbnail_url:
            try:
                cover_image_path = self.download_from_url(thumbnail_url)
@@ -99,15 +105,65 @@ class GenericExtractor(Extractor):
        Clean up the ytdlp generic video data to make it more readable and remove unnecessary keys that ytdlp adds
        """

-        base_keys = ['formats', 'thumbnail', 'display_id', 'epoch', 'requested_downloads',
-                     'duration_string', 'thumbnails', 'http_headers', 'webpage_url_basename', 'webpage_url_domain',
-                     'extractor', 'extractor_key', 'playlist', 'playlist_index', 'duration_string', 'protocol', 'requested_subtitles',
-                     'format_id', 'acodec', 'vcodec', 'ext', 'epoch', '_has_drm', 'filesize', 'audio_ext', 'video_ext', 'vbr', 'abr',
-                     'resolution', 'dynamic_range', 'aspect_ratio', 'cookies', 'format', 'quality', 'preference', 'artists',
-                     'channel_id', 'subtitles', 'tbr', 'url', 'original_url', 'automatic_captions', 'playable_in_embed', 'live_status',
-                     '_format_sort_fields', 'chapters', 'requested_formats', 'format_note',
-                     'audio_channels', 'asr', 'fps', 'was_live', 'is_live', 'heatmap', 'age_limit', 'stretched_ratio']
-        
+        base_keys = [
+            "formats",
+            "thumbnail",
+            "display_id",
+            "epoch",
+            "requested_downloads",
+            "duration_string",
+            "thumbnails",
+            "http_headers",
+            "webpage_url_basename",
+            "webpage_url_domain",
+            "extractor",
+            "extractor_key",
+            "playlist",
+            "playlist_index",
+            "duration_string",
+            "protocol",
+            "requested_subtitles",
+            "format_id",
+            "acodec",
+            "vcodec",
+            "ext",
+            "epoch",
+            "_has_drm",
+            "filesize",
+            "audio_ext",
+            "video_ext",
+            "vbr",
+            "abr",
+            "resolution",
+            "dynamic_range",
+            "aspect_ratio",
+            "cookies",
+            "format",
+            "quality",
+            "preference",
+            "artists",
+            "channel_id",
+            "subtitles",
+            "tbr",
+            "url",
+            "original_url",
+            "automatic_captions",
+            "playable_in_embed",
+            "live_status",
+            "_format_sort_fields",
+            "chapters",
+            "requested_formats",
+            "format_note",
+            "audio_channels",
+            "asr",
+            "fps",
+            "was_live",
+            "is_live",
+            "heatmap",
+            "age_limit",
+            "stretched_ratio",
+        ]
+
        dropin = self.dropin_for_name(info_extractor.ie_key())
        if dropin:
            try:
@@ -116,8 +172,8 @@ class GenericExtractor(Extractor):
                pass

        return base_keys
-    
-    def add_metadata(self, video_data: dict, info_extractor: InfoExtractor, url:str, result: Metadata) -> Metadata:
+
+    def add_metadata(self, video_data: dict, info_extractor: InfoExtractor, url: str, result: Metadata) -> Metadata:
        """
        Creates a Metadata object from the given video_data
        """
@@ -126,29 +182,36 @@ class GenericExtractor(Extractor):
        result = self.download_additional_media(video_data, info_extractor, result)

        # keep both 'title' and 'fulltitle', but prefer 'title', falling back to 'fulltitle' if it doesn't exist
-        result.set_title(video_data.pop('title', video_data.pop('fulltitle', "")))
+        result.set_title(video_data.pop("title", video_data.pop("fulltitle", "")))
        result.set_url(url)
-        if "description" in video_data: result.set_content(video_data["description"])
+        if "description" in video_data:
+            result.set_content(video_data["description"])
        # extract comments if enabled
        if self.comments:
-            result.set("comments", [{
-                "text": c["text"],
-                "author": c["author"], 
-                "timestamp": datetime.datetime.fromtimestamp(c.get("timestamp"), tz = datetime.timezone.utc)
-            } for c in video_data.get("comments", [])])
+            result.set(
+                "comments",
+                [
+                    {
+                        "text": c["text"],
+                        "author": c["author"],
+                        "timestamp": datetime.datetime.fromtimestamp(c.get("timestamp"), tz=datetime.timezone.utc),
+                    }
+                    for c in video_data.get("comments", [])
+                ],
+            )

        # then add the common metadata
        if timestamp := video_data.pop("timestamp", None):
-            timestamp = datetime.datetime.fromtimestamp(timestamp, tz = datetime.timezone.utc).isoformat()
+            timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat()
            result.set_timestamp(timestamp)
        if upload_date := video_data.pop("upload_date", None):
-            upload_date = datetime.datetime.strptime(upload_date, '%Y%m%d').replace(tzinfo=datetime.timezone.utc)
+            upload_date = datetime.datetime.strptime(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
            result.set("upload_date", upload_date)
-        
+
        # then clean away any keys we don't want
        for clean_key in self.keys_to_clean(info_extractor, video_data):
            video_data.pop(clean_key, None)
-        
+
        # then add the rest of the video data
        for k, v in video_data.items():
            if v:
@@ -169,22 +232,24 @@ class GenericExtractor(Extractor):
            logger.debug(f"""Could not find valid dropin for {info_extractor.IE_NAME}.
                     Why not try creating your own, and make sure it has a valid function called 'create_metadata'. Learn more: https://auto-archiver.readthedocs.io/en/latest/user_guidelines.html#""")
            return False
-        
+
        post_data = dropin.extract_post(url, ie_instance)
        return dropin.create_metadata(post_data, ie_instance, self, url)

-    def get_metadata_for_video(self, data: dict, info_extractor: Type[InfoExtractor], url: str, ydl: yt_dlp.YoutubeDL) -> Metadata:
-
+    def get_metadata_for_video(
+        self, data: dict, info_extractor: Type[InfoExtractor], url: str, ydl: yt_dlp.YoutubeDL
+    ) -> Metadata:
        # this time download
-        ydl.params['getcomments'] = self.comments
-        #TODO: for playlist or long lists of videos, how to download one at a time so they can be stored before the next one is downloaded?
+        ydl.params["getcomments"] = self.comments
+        # TODO: for playlist or long lists of videos, how to download one at a time so they can be stored before the next one is downloaded?
        data = ydl.extract_info(url, ie_key=info_extractor.ie_key(), download=True)
        if "entries" in data:
            entries = data.get("entries", [])
            if not len(entries):
-                logger.warning('YoutubeDLArchiver could not find any video')
+                logger.warning("YoutubeDLArchiver could not find any video")
                return False
-        else: entries = [data]
+        else:
+            entries = [data]

        result = Metadata()

@@ -192,17 +257,18 @@ class GenericExtractor(Extractor):
            try:
                filename = ydl.prepare_filename(entry)
                if not os.path.exists(filename):
-                    filename = filename.split('.')[0] + '.mkv'
+                    filename = filename.split(".")[0] + ".mkv"

                new_media = Media(filename)
                for x in ["duration", "original_url", "fulltitle", "description", "upload_date"]:
-                    if x in entry: new_media.set(x, entry[x])
+                    if x in entry:
+                        new_media.set(x, entry[x])

                # read text from subtitles if enabled
                if self.subtitles:
-                    for lang, val in (data.get('requested_subtitles') or {}).items():
-                        try:    
-                            subs = pysubs2.load(val.get('filepath'), encoding="utf-8")
+                    for lang, val in (data.get("requested_subtitles") or {}).items():
+                        try:
+                            subs = pysubs2.load(val.get("filepath"), encoding="utf-8")
                            text = " ".join([line.text for line in subs])
                            new_media.set(f"subtitles_{lang}", text)
                        except Exception as e:
@@ -212,8 +278,8 @@ class GenericExtractor(Extractor):
                logger.error(f"Error processing entry {entry}: {e}")

        return self.add_metadata(data, info_extractor, url, result)
-    
-    def dropin_for_name(self, dropin_name: str, additional_paths = [], package=__package__) -> Type[InfoExtractor]:
+
+    def dropin_for_name(self, dropin_name: str, additional_paths=[], package=__package__) -> Type[InfoExtractor]:
        dropin_name = dropin_name.lower()

        if dropin_name == "generic":
@@ -221,6 +287,7 @@ class GenericExtractor(Extractor):
            return None

        dropin_class_name = dropin_name.title()
+
        def _load_dropin(dropin):
            dropin_class = getattr(dropin, dropin_class_name)()
            return self._dropins.setdefault(dropin_name, dropin_class)
@@ -244,7 +311,7 @@ class GenericExtractor(Extractor):
                return _load_dropin(dropin)
            except (FileNotFoundError, ModuleNotFoundError):
                pass
-        
+
        # fallback to loading the dropins within auto-archiver
        try:
            return _load_dropin(importlib.import_module(f".{dropin_name}", package=package))
@@ -256,12 +323,12 @@ class GenericExtractor(Extractor):
    def download_for_extractor(self, info_extractor: InfoExtractor, url: str, ydl: yt_dlp.YoutubeDL) -> Metadata:
        """
        Tries to download the given url using the specified extractor
-        
+
        It first tries to use ytdlp directly to download the video. If the post is not a video, it will then try to
        use the extractor's _extract_post method to get the post metadata if possible.
        """
        # when getting info without download, we also don't need the comments
-        ydl.params['getcomments'] = False
+        ydl.params["getcomments"] = False
        result = False

        dropin_submodule = self.dropin_for_name(info_extractor.ie_key())
@@ -272,7 +339,7 @@ class GenericExtractor(Extractor):

            # don't download since it can be a live stream
            data = ydl.extract_info(url, ie_key=info_extractor.ie_key(), download=False)
-            if data.get('is_live', False) and not self.livestreams:
+            if data.get("is_live", False) and not self.livestreams:
                logger.warning("Livestream detected, skipping due to 'livestreams' configuration setting")
                return False
            # it's a valid video, that the youtubdedl can download out of the box
@@ -283,16 +350,21 @@ class GenericExtractor(Extractor):
                # don't clutter the logs with issues about the 'generic' extractor not having a dropin
                return False

-            logger.debug(f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead')
+            logger.debug(
+                f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead'
+            )
            try:
                result = self.get_metadata_for_post(info_extractor, url, ydl)
            except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e:
-                logger.error(f'Error downloading metadata for post: {post_e}')
+                logger.error(f"Error downloading metadata for post: {post_e}")
                return False
            except Exception as generic_e:
-                logger.debug(f'Attempt to extract using ytdlp extractor "{info_extractor.IE_NAME}" failed:  \n  {repr(generic_e)}', exc_info=True)
+                logger.debug(
+                    f'Attempt to extract using ytdlp extractor "{info_extractor.IE_NAME}" failed:  \n  {repr(generic_e)}',
+                    exc_info=True,
+                )
                return False
-        
+
        if result:
            extractor_name = "yt-dlp"
            if info_extractor:
@@ -308,43 +380,49 @@ class GenericExtractor(Extractor):
    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()

-        #TODO: this is a temporary hack until this issue is closed: https://github.com/yt-dlp/yt-dlp/issues/11025
+        # TODO: this is a temporary hack until this issue is closed: https://github.com/yt-dlp/yt-dlp/issues/11025
        if url.startswith("https://ya.ru"):
            url = url.replace("https://ya.ru", "https://yandex.ru")
            item.set("replaced_url", url)

+        ydl_options = {
+            "outtmpl": os.path.join(self.tmp_dir, f"%(id)s.%(ext)s"),
+            "quiet": False,
+            "noplaylist": not self.allow_playlist,
+            "writesubtitles": self.subtitles,
+            "writeautomaticsub": self.subtitles,
+            "live_from_start": self.live_from_start,
+            "proxy": self.proxy,
+            "max_downloads": self.max_downloads,
+            "playlistend": self.max_downloads,
+        }

-        ydl_options = {'outtmpl': os.path.join(self.tmp_dir, f'%(id)s.%(ext)s'), 
-                       'quiet': False, 'noplaylist': not self.allow_playlist ,
-                       'writesubtitles': self.subtitles,'writeautomaticsub': self.subtitles,
-                       "live_from_start": self.live_from_start, "proxy": self.proxy,
-                       "max_downloads": self.max_downloads, "playlistend": self.max_downloads}
-        
        # set up auth
        auth = self.auth_for_site(url, extract_cookies=False)

        # order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file
        if auth:
-            if 'username' in auth and 'password' in auth:
-                logger.debug(f'Using provided auth username and password for {url}')
-                ydl_options['username'] = auth['username']
-                ydl_options['password'] = auth['password']
-            elif 'cookie' in auth:
-                logger.debug(f'Using provided auth cookie for {url}')
-                yt_dlp.utils.std_headers['cookie'] = auth['cookie']
-            elif 'cookies_from_browser' in auth:
-                logger.debug(f'Using extracted cookies from browser {auth["cookies_from_browser"]} for {url}')
-                ydl_options['cookiesfrombrowser'] = auth['cookies_from_browser']
-            elif 'cookies_file' in auth:
-                logger.debug(f'Using cookies from file {auth["cookies_file"]} for {url}')
-                ydl_options['cookiefile'] = auth['cookies_file']
+            if "username" in auth and "password" in auth:
+                logger.debug(f"Using provided auth username and password for {url}")
+                ydl_options["username"] = auth["username"]
+                ydl_options["password"] = auth["password"]
+            elif "cookie" in auth:
+                logger.debug(f"Using provided auth cookie for {url}")
+                yt_dlp.utils.std_headers["cookie"] = auth["cookie"]
+            elif "cookies_from_browser" in auth:
+                logger.debug(f"Using extracted cookies from browser {auth['cookies_from_browser']} for {url}")
+                ydl_options["cookiesfrombrowser"] = auth["cookies_from_browser"]
+            elif "cookies_file" in auth:
+                logger.debug(f"Using cookies from file {auth['cookies_file']} for {url}")
+                ydl_options["cookiefile"] = auth["cookies_file"]

-        ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"
+        ydl = yt_dlp.YoutubeDL(
+            ydl_options
+        )  # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"

        for info_extractor in self.suitable_extractors(url):
            result = self.download_for_extractor(info_extractor, url, ydl)
            if result:
                return result

-
        return False
--- a/src/auto_archiver/modules/generic_extractor/truth.py
+++ b/src/auto_archiver/modules/generic_extractor/truth.py
@@ -9,11 +9,11 @@ from dateutil.parser import parse as parse_dt

 from .dropin import GenericDropin

-class Truth(GenericDropin):

+class Truth(GenericDropin):
    def extract_post(self, url, ie_instance: InfoExtractor) -> dict:
        video_id = ie_instance._match_id(url)
-        truthsocial_url = f'https://truthsocial.com/api/v1/statuses/{video_id}'
+        truthsocial_url = f"https://truthsocial.com/api/v1/statuses/{video_id}"
        return ie_instance._download_json(truthsocial_url, video_id)

    def skip_ytdlp_download(self, url, ie_instance: Type[InfoExtractor]) -> bool:
@@ -22,31 +22,42 @@ class Truth(GenericDropin):
    def create_metadata(self, post: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
        """
        Creates metadata from a truth social post
-        
+
        Only used for posts that contain no media. ytdlp.TruthIE extractor can handle posts with media
-        
+
        Format is:
-        
+
        {'id': '109598702184774628', 'created_at': '2022-12-29T19:51:18.161Z', 'in_reply_to_id': None, 'quote_id': None, 'in_reply_to_account_id': None, 'sensitive': False, 'spoiler_text': '', 'visibility': 'public', 'language': 'en', 'uri': 'https://truthsocial.com/@bbcnewa/109598702184774628', 'url': 'https://truthsocial.com/@bbcnewa/109598702184774628', 'content': '<p>Pele, regarded by many as football\'s greatest ever player, has died in Brazil at the age of 82. <a href="https://www.bbc.com/sport/football/42751517" rel="nofollow noopener noreferrer" target="_blank"><span class="invisible">https://www.</span><span class="ellipsis">bbc.com/sport/football/4275151</span><span class="invisible">7</span></a></p>', 'account': {'id': '107905163010312793', 'username': 'bbcnewa', 'acct': 'bbcnewa', 'display_name': 'BBC News', 'locked': False, 'bot': False, 'discoverable': True, 'group': False, 'created_at': '2022-03-05T17:42:01.159Z', 'note': '<p>News, features and analysis by the BBC</p>', 'url': 'https://truthsocial.com/@bbcnewa', 'avatar': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/avatars/107/905/163/010/312/793/original/e7c07550dc22c23a.jpeg', 'avatar_static': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/avatars/107/905/163/010/312/793/original/e7c07550dc22c23a.jpeg', 'header': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/headers/107/905/163/010/312/793/original/a00eeec2b57206c7.jpeg', 'header_static': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/accounts/headers/107/905/163/010/312/793/original/a00eeec2b57206c7.jpeg', 'followers_count': 1131, 'following_count': 3, 'statuses_count': 9, 'last_status_at': '2024-11-12', 'verified': False, 'location': '', 'website': 'https://www.bbc.com/news', 'unauth_visibility': True, 'chats_onboarded': True, 'feeds_onboarded': True, 'accepting_messages': False, 'show_nonmember_group_statuses': None, 'emojis': [], 'fields': [], 'tv_onboarded': True, 'tv_account': False}, 'media_attachments': [], 'mentions': [], 'tags': [], 'card': None, 'group': None, 'quote': None, 'in_reply_to': None, 'reblog': None, 'sponsored': False, 'replies_count': 1, 'reblogs_count': 0, 'favourites_count': 2, 'favourited': False, 'reblogged': False, 'muted': False, 'pinned': False, 'bookmarked': False, 'poll': None, 'emojis': []}
        """

        result = Metadata()
        result.set_url(url)
-        timestamp = post['created_at'] # format is 2022-12-29T19:51:18.161Z
+        timestamp = post["created_at"]  # format is 2022-12-29T19:51:18.161Z
        result.set_timestamp(parse_dt(timestamp))
-        result.set('description', post['content'])
-        result.set('author', post['account']['username'])
+        result.set("description", post["content"])
+        result.set("author", post["account"]["username"])

-        for key in ['replies_count', 'reblogs_count', 'favourites_count', ('account', 'followers_count'), ('account', 'following_count'), ('account', 'statuses_count'), ('account', 'display_name'), 'language', 'in_reply_to_account', 'replies_count']:
+        for key in [
+            "replies_count",
+            "reblogs_count",
+            "favourites_count",
+            ("account", "followers_count"),
+            ("account", "following_count"),
+            ("account", "statuses_count"),
+            ("account", "display_name"),
+            "language",
+            "in_reply_to_account",
+            "replies_count",
+        ]:
            if isinstance(key, tuple):
                store_key = " ".join(key)
            else:
                store_key = key
            result.set(store_key, traverse_obj(post, key))
-        
-        # add the media
-        for media in post.get('media_attachments', []):
-            filename = archiver.download_from_url(media['url'])
-            result.add_media(Media(filename), id=media.get('id'))

-        return result
+        # add the media
+        for media in post.get("media_attachments", []):
+            filename = archiver.download_from_url(media["url"])
+            result.add_media(Media(filename), id=media.get("id"))
+
+        return result
--- a/src/auto_archiver/modules/generic_extractor/twitter.py
+++ b/src/auto_archiver/modules/generic_extractor/twitter.py
@@ -10,9 +10,8 @@ from auto_archiver.core.extractor import Extractor

 from .dropin import GenericDropin, InfoExtractor

+
 class Twitter(GenericDropin):
-
-
    def choose_variant(self, variants):
        # choosing the highest quality possible
        variant, width, height = None, 0, 0
@@ -27,9 +26,9 @@ class Twitter(GenericDropin):
            else:
                variant = var if not variant else variant
        return variant
-    
+
    def extract_post(self, url: str, ie_instance: InfoExtractor):
-        twid = ie_instance._match_valid_url(url).group('id')
+        twid = ie_instance._match_valid_url(url).group("id")
        return ie_instance._extract_status(twid=twid)

    def create_metadata(self, tweet: dict, ie_instance: InfoExtractor, archiver: Extractor, url: str) -> Metadata:
@@ -41,30 +40,29 @@ class Twitter(GenericDropin):
        except (ValueError, KeyError) as ex:
            logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
            return False
-                
-        result\
-            .set_title(tweet.get('full_text', ''))\
-            .set_content(json.dumps(tweet, ensure_ascii=False))\
-            .set_timestamp(timestamp)
+
+        result.set_title(tweet.get("full_text", "")).set_content(json.dumps(tweet, ensure_ascii=False)).set_timestamp(
+            timestamp
+        )
        if not tweet.get("entities", {}).get("media"):
-            logger.debug('No media found, archiving tweet text only')
+            logger.debug("No media found, archiving tweet text only")
            result.status = "twitter-ytdl"
            return result
        for i, tw_media in enumerate(tweet["entities"]["media"]):
            media = Media(filename="")
            mimetype = ""
            if tw_media["type"] == "photo":
-                media.set("src", UrlUtil.twitter_best_quality_url(tw_media['media_url_https']))
+                media.set("src", UrlUtil.twitter_best_quality_url(tw_media["media_url_https"]))
                mimetype = "image/jpeg"
            elif tw_media["type"] == "video":
-                variant = self.choose_variant(tw_media['video_info']['variants'])
-                media.set("src", variant['url'])
-                mimetype = variant['content_type']
+                variant = self.choose_variant(tw_media["video_info"]["variants"])
+                media.set("src", variant["url"])
+                mimetype = variant["content_type"]
            elif tw_media["type"] == "animated_gif":
-                variant = tw_media['video_info']['variants'][0]
-                media.set("src", variant['url'])
-                mimetype = variant['content_type']
+                variant = tw_media["video_info"]["variants"][0]
+                media.set("src", variant["url"])
+                mimetype = variant["content_type"]
            ext = mimetypes.guess_extension(mimetype)
-            media.filename = archiver.download_from_url(media.get("src"), f'{slugify(url)}_{i}{ext}')
+            media.filename = archiver.download_from_url(media.get("src"), f"{slugify(url)}_{i}{ext}")
            result.add_media(media)
-        return result
+        return result