Ruff format with defaults.

2026-06-12 05:08:28 +03:00 · 2025-03-10 18:44:54 +00:00
parent cbb0414e5f
commit 85abe1837a
155 changed files with 2539 additions and 1908 deletions
--- a/src/auto_archiver/modules/telegram_extractor/init.py
+++ b/src/auto_archiver/modules/telegram_extractor/init.py
@@ -1 +1 @@
-from .telegram_extractor import TelegramExtractor
+from .telegram_extractor import TelegramExtractor
--- a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
+++ b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
@@ -15,11 +15,11 @@ class TelegramExtractor(Extractor):
    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()
        # detect URLs that we definitely cannot handle
-        if 't.me' != item.netloc:
+        if "t.me" != item.netloc:
            return False

        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
        }

        # TODO: check if we can do this more resilient to variable URLs
@@ -27,11 +27,11 @@ class TelegramExtractor(Extractor):
            url += "?embed=1"

        t = requests.get(url, headers=headers)
-        s = BeautifulSoup(t.content, 'html.parser')
+        s = BeautifulSoup(t.content, "html.parser")

        result = Metadata()
        result.set_content(html.escape(str(t.content)))
-        if (timestamp := (s.find_all('time') or [{}])[0].get('datetime')):
+        if timestamp := (s.find_all("time") or [{}])[0].get("datetime"):
            result.set_timestamp(timestamp)

        video = s.find("video")
@@ -41,25 +41,26 @@ class TelegramExtractor(Extractor):

            image_urls = []
            for im in image_tags:
-                urls = [u.replace("'", "") for u in re.findall(r'url\((.*?)\)', im['style'])]
+                urls = [u.replace("'", "") for u in re.findall(r"url\((.*?)\)", im["style"])]
                image_urls += urls

-            if not len(image_urls): return False
+            if not len(image_urls):
+                return False
            for img_url in image_urls:
                result.add_media(Media(self.download_from_url(img_url)))
        else:
-            video_url = video.get('src')
+            video_url = video.get("src")
            m_video = Media(self.download_from_url(video_url))
            # extract duration from HTML
            try:
-                duration = s.find_all('time')[0].contents[0]
-                if ':' in duration:
-                    duration = float(duration.split(
-                        ':')[0]) * 60 + float(duration.split(':')[1])
+                duration = s.find_all("time")[0].contents[0]
+                if ":" in duration:
+                    duration = float(duration.split(":")[0]) * 60 + float(duration.split(":")[1])
                else:
                    duration = float(duration)
                m_video.set("duration", duration)
-            except: pass
+            except:
+                pass
            result.add_media(m_video)

        return result.success("telegram")