Skip individual Telegram photo/video links

2026-06-08 02:28:29 +03:00 · 2020-10-07 01:27:26 +00:00
parent 119e53d07c
commit b1a7b9607f
1 changed files with 5 additions and 0 deletions
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -1,12 +1,14 @@
 import bs4
 import datetime
 import logging
+import re
 import snscrape.base
 import typing
 import urllib.parse


 logger = logging.getLogger(__name__)
+_SINGLE_MEDIA_LINK_PATTERN = re.compile(r'^https://t\.me/[^/]+/\d+\?single$')


 class LinkPreview(typing.NamedTuple):
@@ -89,6 +91,9 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 					if link['href'] == rawUrl or link['href'] == url:
 						# Generic filter of links to the post itself, catches videos, photos, and the date link
 						continue
+					if _SINGLE_MEDIA_LINK_PATTERN.match(link['href']):
+						# Individual photo or video link
+						continue
 					href = urllib.parse.urljoin(pageUrl, link['href'])
 					if href not in outlinks:
 						outlinks.append(href)