From de4ebed81f3f6a4bb4c65630daab6ec63784959b Mon Sep 17 00:00:00 2001
From: Logan Williams <logan.williams@alum.mit.edu>
Date: Thu, 24 Feb 2022 18:08:12 +0100
Subject: [PATCH 01/32] Fix KeyError caused by retweets without URLs in
 TwitterProfileScraper

---
 snscrape/modules/twitter.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 7c78f69..337843f 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -605,6 +605,9 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 				i = kwargs['tcooutlinks'].index(card.url)
 			except ValueError:
 				_logger.warning('Could not find card URL in tcooutlinks')
+			except KeyError:
+				# retweets are missing this attribute
+				pass
 			else:
 				card.url = kwargs['outlinks'][i]
 		return Tweet(**kwargs)

From 2480b173f430df8c79d92bc9545546ea14ef0bee Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Sat, 26 Feb 2022 19:31:08 +0000
Subject: [PATCH 02/32] Fix crash on race condition in CLI guest token manager
 resets

Fixes #414
---
 snscrape/modules/twitter.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 7c78f69..655fc66 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -287,7 +287,11 @@ class _CLIGuestTokenManager(GuestTokenManager):
 	def reset(self):
 		super().reset()
 		with self._lock:
-			os.remove(self._file)
+			try:
+				os.remove(self._file)
+			except FileNotFoundError:
+				# Another process likely already removed the file
+				pass
 
 
 class _TwitterAPIType(enum.Enum):

From 3a92b5bf0d93142e75b64cfb3828d69143bd106c Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Sat, 26 Feb 2022 19:32:55 +0000
Subject: [PATCH 03/32] Add log message for guest token file deletion

---
 snscrape/modules/twitter.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 655fc66..48baf9d 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -287,6 +287,7 @@ class _CLIGuestTokenManager(GuestTokenManager):
 	def reset(self):
 		super().reset()
 		with self._lock:
+			_logger.info(f'Deleting guest token file {self._file}')
 			try:
 				os.remove(self._file)
 			except FileNotFoundError:

From 1ab0f4fccb0a694a2146436641bf578fa8d80d59 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Mon, 7 Mar 2022 22:16:58 +0000
Subject: [PATCH 04/32] Fix missing quoted tweet reference in certain buggy
 cases

---
 snscrape/modules/twitter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 48baf9d..d0f720f 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -666,6 +666,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 				kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
 			else:
 				kwargs['quotedTweet'] = TweetRef(id = int(result['quotedRefResult']['result']['rest_id']))
+		elif 'quoted_status_id_str' in tweet:
+			kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
 		if 'card' in result:
 			kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
 		return self._make_tweet(tweet, user, **kwargs)

From 694657ef801d51561e235c8351c2bc9e953b130c Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Wed, 9 Mar 2022 01:01:47 +0000
Subject: [PATCH 05/32] Fix broken exception references

---
 snscrape/modules/twitter.py | 4 ++--
 snscrape/modules/weibo.py   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index d0f720f..78d70a3 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -344,7 +344,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 				r = self._post('https://api.twitter.com/1.1/guest/activate.json', data = b'', headers = self._apiHeaders, responseOkCallback = self._check_guest_token_response)
 				o = r.json()
 				if not o.get('guest_token'):
-					raise snscrape.base.ScraperError('Unable to retrieve guest token')
+					raise snscrape.base.ScraperException('Unable to retrieve guest token')
 				self._guestTokenManager.token = o['guest_token']
 			assert self._guestTokenManager.token
 		_logger.debug(f'Using guest token {self._guestTokenManager.token}')
@@ -649,7 +649,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 			#TODO Include result['softInterventionPivot'] in the Tweet object
 			result = result['tweet']
 		else:
-			raise snscrape.base.ScraperError(f'Unknown result type {result["__typename"]!r}')
+			raise snscrape.base.ScraperException(f'Unknown result type {result["__typename"]!r}')
 		tweet = result['legacy']
 		userId = int(result['core']['user_results']['result']['rest_id'])
 		user = self._user_to_user(result['core']['user_results']['result']['legacy'], id_ = userId)
diff --git a/snscrape/modules/weibo.py b/snscrape/modules/weibo.py
index 38318bd..796f864 100644
--- a/snscrape/modules/weibo.py
+++ b/snscrape/modules/weibo.py
@@ -70,7 +70,7 @@ class WeiboUserScraper(snscrape.base.Scraper):
 			_logger.warning('User does not exist')
 			self._user = _userDoesNotExist
 		else:
-			raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})')
+			raise snscrape.base.ScraperException(f'Got unexpected response on resolving username ({r.status_code})')
 
 	def _check_timeline_response(self, r):
 		if r.status_code == 200 and r.content == b'{"ok":0,"msg":"\\u8fd9\\u91cc\\u8fd8\\u6ca1\\u6709\\u5185\\u5bb9","data":{"cards":[]}}':

From ed829163a09d5153a6489e2a01ce6b1421356fce Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Tue, 29 Mar 2022 01:12:07 -0500
Subject: [PATCH 06/32] added capability to extract the number of channel
 members when the the string in membersDiv has the word 'subscribers' rather
 than 'members'.

---
 snscrape/modules/telegram.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 8b0ece7..a65671c 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -177,8 +177,8 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
 		soup = bs4.BeautifulSoup(r.text, 'lxml')
 		membersDiv = soup.find('div', class_ = 'tgme_page_extra')
-		if membersDiv.text.endswith(' members'):
-			kwargs['members'] = int(membersDiv.text[:-8].replace(' ', ''))
+		if membersDiv.text.endswith((' members', ' subscribers')):
+			kwargs['members'] = int(''.join(membersDiv.text.split(' ')[:-1]))
 		kwargs['photo'] = soup.find('img', class_ = 'tgme_page_photo_image').attrs['src']
 
 		r, soup = self._initial_page()

From fb8d73ac95011b7ad848a6048d3eed1880e80f21 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Tue, 29 Mar 2022 13:15:53 -0500
Subject: [PATCH 07/32] handled case where channel has no profile image

---
 snscrape/modules/telegram.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index a65671c..399ce95 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -179,7 +179,11 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 		membersDiv = soup.find('div', class_ = 'tgme_page_extra')
 		if membersDiv.text.endswith((' members', ' subscribers')):
 			kwargs['members'] = int(''.join(membersDiv.text.split(' ')[:-1]))
-		kwargs['photo'] = soup.find('img', class_ = 'tgme_page_photo_image').attrs['src']
+		photoImg = soup.find('img', class_ = 'tgme_page_photo_image')
+		if photoImg is not None:
+			kwargs['photo'] = photoImg.attrs['src']
+		else:
+			kwargs['photo'] = None
 
 		r, soup = self._initial_page()
 		if '/s/' not in r.url: # Redirect on channels without public posts

From d32c9add8a3691c81c9091dc1a7d079e9871379f Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Wed, 30 Mar 2022 18:13:15 -0500
Subject: [PATCH 08/32] added capability to scrape multiple videos from a
 single post

---
 snscrape/modules/telegram.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 399ce95..0f093ee 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -31,7 +31,7 @@ class TelegramPost(snscrape.base.Item):
 	content: str
 	outlinks: list
 	images: list
-	video: str
+	videos: list
 	forwarded: str
 	linkPreview: typing.Optional[LinkPreview] = None
 
@@ -94,13 +94,13 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			url = rawUrl.replace('//t.me/', '//t.me/s/')
 			date = datetime.datetime.strptime(dateDiv.find('time', datetime = True)['datetime'].replace('-', '', 2).replace(':', ''), '%Y%m%dT%H%M%S%z')
 			images = []
-			video = None
+			videos = []
 			forwarded = None
 			if (message := post.find('div', class_ = 'tgme_widget_message_text')):
 				content = message.get_text(separator="\n")
 
 				for video_tag in post.find_all('video'):
-					video = video_tag['src']	
+					videos.append(video_tag['src'])
 
 				if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
 					forwarded = forward_tag['href'].split('t.me/')[1].split('/')[0]			
@@ -134,7 +134,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 				content = None
 				outlinks = []
 				images = []
-				video = None
+				videos = []
 			linkPreview = None
 			if (linkPreviewA := post.find('a', class_ = 'tgme_widget_message_link_preview')):
 				kwargs = {}
@@ -151,7 +151,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 					else:
 						_logger.warning(f'Could not process link preview image on {url}')
 				linkPreview = LinkPreview(**kwargs)
-			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, images = images, video = video, forwarded = forwarded)
+			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, images = images, videos = videos, forwarded = forwarded)
 
 	def get_items(self):
 		r, soup = self._initial_page()

From a7eb54d226b5397a7a3c1cd502f22a8a31f719fd Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Wed, 30 Mar 2022 21:07:17 -0500
Subject: [PATCH 09/32] implemented Media dataclasses for Telegram, and added
 variable for extracting a post's view count

---
 snscrape/modules/telegram.py | 94 ++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 25 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 0f093ee..0484c9c 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -30,9 +30,9 @@ class TelegramPost(snscrape.base.Item):
 	date: datetime.datetime
 	content: str
 	outlinks: list
-	images: list
-	videos: list
+	media: typing.Optional[typing.List['Medium']]
 	forwarded: str
+	views: int = None
 	linkPreview: typing.Optional[LinkPreview] = None
 
 	outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks')
@@ -62,6 +62,29 @@ class Channel(snscrape.base.Entity):
 	def __str__(self):
 		return f'https://t.me/s/{self.username}'
 
+class Medium:
+	pass
+
+
+@dataclasses.dataclass
+class Photo(Medium):
+	previewUrl: str
+	fullUrl: str
+
+@dataclasses.dataclass
+class Image(Medium):
+	url: str
+
+@dataclasses.dataclass
+class Video(Medium):
+	thumbnailUrl: str
+	duration: float
+	url: typing.Optional[str] = None
+
+@dataclasses.dataclass
+class Gif(Medium):
+	thumbnailUrl: str
+	url: typing.Optional[str] = None
 
 class TelegramChannelScraper(snscrape.base.Scraper):
 	name = 'telegram-channel'
@@ -93,18 +116,34 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 				_logger.warning(f'Possibly incorrect URL: {rawUrl!r}')
 			url = rawUrl.replace('//t.me/', '//t.me/s/')
 			date = datetime.datetime.strptime(dateDiv.find('time', datetime = True)['datetime'].replace('-', '', 2).replace(':', ''), '%Y%m%dT%H%M%S%z')
-			images = []
-			videos = []
+			media = []
 			forwarded = None
 			if (message := post.find('div', class_ = 'tgme_widget_message_text')):
 				content = message.get_text(separator="\n")
 
-				for video_tag in post.find_all('video'):
-					videos.append(video_tag['src'])
+				for video_player in post.find_all('a', {'class': 'tgme_widget_message_video_player'}):
 
+					style = video_player.find('i')['style']
+					videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style)
+					videoTag = video_player.find('video')
+					if videoTag is None:
+						videoUrl = None
+					else:
+						videoUrl = videoTag['src']
+					mKwargs = {
+						'thumbnailUrl': videoThumbnailUrl,
+						'url': videoUrl,
+					}
+					timeTag = video_player.find('time')
+					if timeTag is None:
+						cls = Gif
+					else:
+						cls = Video
+						durationStr = video_player.find('time').text.split(':')
+						mKwargs['duration'] = sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationStr))])
+					media.append(cls(**mKwargs))
 				if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
 					forwarded = forward_tag['href'].split('t.me/')[1].split('/')[0]			
-
 				outlinks = []
 				for link in post.find_all('a'):
 					if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')):
@@ -114,15 +153,15 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 						style = link.attrs.get('style', '')
 						# Generic filter of links to the post itself, catches videos, photos, and the date link
 						if style != '':
-							image = re.findall('url\(\'(.*?)\'\)', style)
-							if len(image) == 1:
-								images.append(image[0])
+							imageUrls = re.findall('url\(\'(.*?)\'\)', style)
+							if len(imageUrls) == 1:
+								media.append(Image(url = imageUrls[0]))
 							continue
 					if _SINGLE_MEDIA_LINK_PATTERN.match(link['href']):
 						style = link.attrs.get('style', '')
-						image = re.findall('url\(\'(.*?)\'\)', style)
-						if len(image) == 1:
-							images.append(image[0])
+						imageUrls = re.findall('url\(\'(.*?)\'\)', style)
+						if len(imageUrls) == 1:
+							media.append(Image(url = imageUrls[0]))
 							# resp = self._get(image[0])
 							# encoded_string = base64.b64encode(resp.content)
 						# Individual photo or video link
@@ -133,8 +172,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			else:
 				content = None
 				outlinks = []
-				images = []
-				videos = []
+				media = []
 			linkPreview = None
 			if (linkPreviewA := post.find('a', class_ = 'tgme_widget_message_link_preview')):
 				kwargs = {}
@@ -151,7 +189,13 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 					else:
 						_logger.warning(f'Could not process link preview image on {url}')
 				linkPreview = LinkPreview(**kwargs)
-			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, images = images, videos = videos, forwarded = forwarded)
+			viewsSpan = post.find('span', class_ = 'tgme_widget_message_views')
+			if viewsSpan is None:
+				views = None
+			else:
+				views = parse_num(viewsSpan.text)
+			
+			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, views = views)
 
 	def get_items(self):
 		r, soup = self._initial_page()
@@ -204,15 +248,6 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 		if (descriptionDiv := channelInfoDiv.find('div', class_ = 'tgme_channel_info_description')):
 			kwargs['description'] = descriptionDiv.text
 
-		def parse_num(s):
-			s = s.replace(' ', '')
-			if s.endswith('M'):
-				return int(float(s[:-1]) * 1e6), 10 ** (6 if '.' not in s else 6 - len(s[:-1].split('.')[1]))
-			elif s.endswith('K'):
-				return int(float(s[:-1]) * 1000), 10 ** (3 if '.' not in s else 3 - len(s[:-1].split('.')[1]))
-			else:
-				return int(s), 1
-
 		for div in channelInfoDiv.find_all('div', class_ = 'tgme_channel_info_counter'):
 			value, granularity = parse_num(div.find('span', class_ = 'counter_value').text)
 			type_ = div.find('span', class_ = 'counter_type').text
@@ -231,3 +266,12 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 	@classmethod
 	def _cli_from_args(cls, args):
 		return cls._cli_construct(args, args.channel)
+
+def parse_num(s):
+	s = s.replace(' ', '')
+	if s.endswith('M'):
+		return int(float(s[:-1]) * 1e6), 10 ** (6 if '.' not in s else 6 - len(s[:-1].split('.')[1]))
+	elif s.endswith('K'):
+		return int(float(s[:-1]) * 1000), 10 ** (3 if '.' not in s else 3 - len(s[:-1].split('.')[1]))
+	else:
+		return int(s), 1
\ No newline at end of file

From 4e59638e7c00b57d63ae06a84c6a4a90d9e7ee49 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Wed, 30 Mar 2022 21:33:03 -0500
Subject: [PATCH 10/32] added a forwardedUrl attribute to TelegramPost and made
 forwarded attribute type Channel.

---
 snscrape/modules/telegram.py | 43 ++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 0484c9c..19aa22e 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -24,23 +24,6 @@ class LinkPreview:
 	image: typing.Optional[str] = None
 
 
-@dataclasses.dataclass
-class TelegramPost(snscrape.base.Item):
-	url: str
-	date: datetime.datetime
-	content: str
-	outlinks: list
-	media: typing.Optional[typing.List['Medium']]
-	forwarded: str
-	views: int = None
-	linkPreview: typing.Optional[LinkPreview] = None
-
-	outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks')
-
-	def __str__(self):
-		return self.url
-
-
 @dataclasses.dataclass
 class Channel(snscrape.base.Entity):
 	username: str
@@ -62,6 +45,23 @@ class Channel(snscrape.base.Entity):
 	def __str__(self):
 		return f'https://t.me/s/{self.username}'
 
+@dataclasses.dataclass
+class TelegramPost(snscrape.base.Item):
+	url: str
+	date: datetime.datetime
+	content: str
+	outlinks: list
+	forwarded: typing.Optional['Channel'] = None
+	forwardedUrl: typing.Optional[str] = None
+	media: typing.Optional[typing.List['Medium']] = None
+	views: typing.Optional[int] = None
+	linkPreview: typing.Optional[LinkPreview] = None
+
+	outlinksss = snscrape.base._DeprecatedProperty('outlinksss', lambda self: ' '.join(self.outlinks), 'outlinks')
+
+	def __str__(self):
+		return self.url
+
 class Medium:
 	pass
 
@@ -118,6 +118,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			date = datetime.datetime.strptime(dateDiv.find('time', datetime = True)['datetime'].replace('-', '', 2).replace(':', ''), '%Y%m%dT%H%M%S%z')
 			media = []
 			forwarded = None
+			forwardedUrl = None
 			if (message := post.find('div', class_ = 'tgme_widget_message_text')):
 				content = message.get_text(separator="\n")
 
@@ -143,7 +144,11 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 						mKwargs['duration'] = sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationStr))])
 					media.append(cls(**mKwargs))
 				if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
-					forwarded = forward_tag['href'].split('t.me/')[1].split('/')[0]			
+					forwardedUrl = forward_tag['href']
+					forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0]
+					forwardedChannelScraper = TelegramChannelScraper(name = forwardedName)
+					forwarded = forwardedChannelScraper._get_entity()
+
 				outlinks = []
 				for link in post.find_all('a'):
 					if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')):
@@ -195,7 +200,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			else:
 				views = parse_num(viewsSpan.text)
 			
-			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, views = views)
+			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, forwardedUrl = forwardedUrl, views = views)
 
 	def get_items(self):
 		r, soup = self._initial_page()

From 5d156c6a1510aab8d64daf709517691dd065cfb5 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Sun, 3 Apr 2022 02:34:30 +0000
Subject: [PATCH 11/32] Detect and raise error on redirect from GraphQL
 endpoint to login

#165
---
 snscrape/modules/instagram.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/snscrape/modules/instagram.py b/snscrape/modules/instagram.py
index 4a89f3d..14483e0 100644
--- a/snscrape/modules/instagram.py
+++ b/snscrape/modules/instagram.py
@@ -96,6 +96,8 @@ class _InstagramCommonScraper(snscrape.base.Scraper):
 	def _check_json_callback(self, r):
 		if r.status_code != 200:
 			return False, f'status code {r.status_code}'
+		if r.url.startswith('https://www.instagram.com/accounts/login/'):
+			raise snscrape.base.ScraperException('Redirected to login page')
 		try:
 			obj = json.loads(r.text)
 		except json.JSONDecodeError as e:

From 2ce014ade4104a748b7a6a0ada19fcab1460e832 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Sun, 3 Apr 2022 01:45:25 -0500
Subject: [PATCH 12/32] fixed edge case for videos that have data-link-attr but
 no href attribute

---
 snscrape/modules/vkontakte.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/snscrape/modules/vkontakte.py b/snscrape/modules/vkontakte.py
index ea33b44..3193abe 100644
--- a/snscrape/modules/vkontakte.py
+++ b/snscrape/modules/vkontakte.py
@@ -177,11 +177,15 @@ class VKontakteUserScraper(snscrape.base.Scraper):
 					continue
 				if 'data-video' in a.attrs:
 					# Video
+					if 'data-link-attr' in a.attrs:
+						hrefUrl = urllib.parse.unquote(a.attrs['data-link-attr'].split('to=')[1].split('&')[0])
+					else:
+						hrefUrl = f'https://vk.com{a["href"]}'
 					video = Video(
 						id = a['data-video'],
 						list = a['data-list'],
 						duration = int(a['data-duration']),
-						url = f'https://vk.com{a["href"]}',
+						url = hrefUrl,
 						thumbUrl = a['style'][(begin := a['style'].find('background-image: url(') + 22) : a['style'].find(')', begin)],
 					)
 					continue

From 5fc3c0e290fdcd7025316b75913e79c1e985bce5 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Tue, 12 Apr 2022 18:03:36 +0000
Subject: [PATCH 13/32] Fix crash in locals dumping on module-less frames

---
 snscrape/_cli.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/snscrape/_cli.py b/snscrape/_cli.py
index 1b0d9b7..d4b66ea 100644
--- a/snscrape/_cli.py
+++ b/snscrape/_cli.py
@@ -133,12 +133,22 @@ def _dump_stack_and_locals(trace, exc = None):
 		fp.write('Stack:\n')
 		for frameRecord in trace:
 			fp.write(f'  File "{frameRecord.filename}", line {frameRecord.lineno}, in {frameRecord.function}\n')
-			for line in frameRecord.code_context:
-				fp.write(f'    {line.strip()}\n')
+			if frameRecord.code_context is not None:
+				for line in frameRecord.code_context:
+					fp.write(f'    {line.strip()}\n')
 		fp.write('\n')
 
-		for frameRecord in trace:
-			module = inspect.getmodule(frameRecord[0])
+		modules = [inspect.getmodule(frameRecord[0]) for frameRecord in trace]
+		for i, (module, frameRecord) in enumerate(zip(modules, trace)):
+			if module is None:
+				# Module-less frame, e.g. dataclass.__init__
+				for j in reversed(range(i)):
+					if modules[j] is not None:
+						break
+				else:
+					# No previous module scope
+					continue
+				module = modules[j]
 			if not module.__name__.startswith('snscrape.') and module.__name__ != 'snscrape':
 				continue
 			locals_ = frameRecord[0].f_locals

From 9af1f190349ee9bdbeb8198f5a4ea71b038bf21a Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Tue, 12 Apr 2022 18:11:26 +0000
Subject: [PATCH 14/32] Properly support all card types

Fixes #407
---
 snscrape/modules/twitter.py | 668 +++++++++++++++++++++++++++++++++---
 1 file changed, 614 insertions(+), 54 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 78d70a3..de4b396 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -99,7 +99,7 @@ class VideoVariant:
 class Video(Medium):
 	thumbnailUrl: str
 	variants: typing.List[VideoVariant]
-	duration: float
+	duration: typing.Optional[float] = None
 	views: typing.Optional[int] = None
 
 
@@ -132,12 +132,284 @@ class Place:
 	countryCode: str
 
 
-@dataclasses.dataclass
 class Card:
+	pass
+
+
+@dataclasses.dataclass
+class SummaryCard(Card):
 	title: str
 	url: str
 	description: typing.Optional[str] = None
 	thumbnailUrl: typing.Optional[str] = None
+	siteUser: typing.Optional['User'] = None
+	creatorUser: typing.Optional['User'] = None
+
+
+@dataclasses.dataclass
+class AppCard(SummaryCard):
+	pass
+
+
+@dataclasses.dataclass
+class PollCard(Card):
+	options: typing.List['PollOption']
+	endDate: datetime.datetime
+	duration: int
+	finalResults: bool
+	lastUpdateDate: typing.Optional[datetime.datetime] = None
+	medium: typing.Optional[Medium] = None
+
+
+@dataclasses.dataclass
+class PollOption:
+	label: str
+	count: typing.Optional[int] = None
+
+
+@dataclasses.dataclass
+class PlayerCard(Card):
+	title: str
+	url: str
+	description: typing.Optional[str] = None
+	imageUrl: typing.Optional[str] = None
+	siteUser: typing.Optional['User'] = None
+
+
+@dataclasses.dataclass
+class PromoConvoCard(Card):
+	actions: typing.List['PromoConvoAction']
+	thankYouText: str
+	medium: Medium
+	thankYouUrl: typing.Optional[str] = None
+	thankYouTcoUrl: typing.Optional[str] = None
+	cover: typing.Optional['Photo'] = None
+
+
+@dataclasses.dataclass
+class PromoConvoAction:
+	label: str
+	tweet: str
+
+
+@dataclasses.dataclass
+class BroadcastCard(Card):
+	id: str
+	url: str
+	title: str
+	state: str
+	source: str
+	thumbnailUrl: str
+	broadcaster: 'User'
+	siteUser: typing.Optional['User'] = None
+
+
+@dataclasses.dataclass
+class PeriscopeBroadcastCard(Card):
+	id: str
+	url: str
+	title: str
+	description: str
+	state: str
+	source: str
+	totalParticipants: int
+	thumbnailUrl: str
+	broadcaster: 'User'
+	siteUser: typing.Optional['User'] = None
+
+
+@dataclasses.dataclass
+class EventCard(Card):
+	event: 'Event'
+
+
+@dataclasses.dataclass
+class Event:
+	id: int
+	title: str
+	category: str
+	photo: Photo
+	description: typing.Optional[str] = None
+
+	@property
+	def url(self):
+		return f'https://twitter.com/i/events/{self.id}'
+
+
+@dataclasses.dataclass
+class NewsletterCard(Card):
+	title: str
+	description: str
+	imageUrl: str
+	url: str
+	revueAccountId: int
+	issueCount: int
+
+
+@dataclasses.dataclass
+class NewsletterIssueCard(Card):
+	newsletterTitle: str
+	newsletterDescription: str
+	issueTitle: str
+	issueDescription: str
+	issueNumber: int
+	url: str
+	revueAccountId: int
+	imageUrl: typing.Optional[str] = None
+
+
+@dataclasses.dataclass
+class AmplifyCard(Card):
+	id: str
+	video: Video
+
+
+@dataclasses.dataclass
+class AppPlayerCard(Card):
+	title: str
+	video: Video
+	appCategory: str
+	playerOwnerId: int
+	siteUser: typing.Optional['User'] = None
+
+
+@dataclasses.dataclass
+class SpacesCard(Card):
+	url: str
+	id: str
+
+
+UnifiedCardComponentKey = str
+UnifiedCardDestinationKey = str
+UnifiedCardMediumKey = str
+UnifiedCardAppKey = str
+
+
+@dataclasses.dataclass
+class UnifiedCard(Card):
+	componentObjects: typing.Dict[UnifiedCardComponentKey, 'UnifiedCardComponentObject']
+	destinations: typing.Dict[UnifiedCardDestinationKey, 'UnifiedCardDestination']
+	media: typing.Dict[UnifiedCardMediumKey, Medium]
+	apps: typing.Optional[typing.Dict[UnifiedCardAppKey, typing.List['UnifiedCardApp']]] = None
+	components: typing.Optional[typing.List[UnifiedCardComponentKey]] = None
+	swipeableLayoutSlides: typing.Optional[typing.List['UnifiedCardSwipeableLayoutSlide']] = None
+	type: typing.Optional[str] = None
+
+	def __post_init__(self):
+		if (self.components is None) == (self.swipeableLayoutSlides is None):
+			raise ValueError('did not get exactly one of components or swipeableLayoutSlides')
+		if self.components and not all(k in self.componentObjects for k in self.components):
+			raise ValueError('missing components')
+		if self.swipeableLayoutSlides and not all(s.mediumComponentKey in self.componentObjects and s.componentKey in self.componentObjects for s in self.swipeableLayoutSlides):
+			raise ValueError('missing components')
+		if any(c.destinationKey not in self.destinations for c in self.componentObjects.values() if hasattr(c, 'destinationKey')):
+			raise ValueError('missing destinations')
+		if any(b.destinationKey not in self.destinations for c in self.componentObjects.values() if isinstance(c, UnifiedCardButtonGroupComponentObject) for b in c.buttons):
+			raise ValueError('missing destinations')
+		mediaKeys = []
+		for c in self.componentObjects.values():
+			if isinstance(c, UnifiedCardMediumComponentObject):
+				mediaKeys.append(c.mediumKey)
+			elif isinstance(c, UnifiedCardSwipeableMediaComponentObject):
+				mediaKeys.extend(x.mediumKey for x in c.media)
+		mediaKeys.extend(d.mediumKey for d in self.destinations.values() if d.mediumKey is not None)
+		mediaKeys.extend(a.iconMediumKey for l in (self.apps.values() if self.apps is not None else []) for a in l if a.iconMediumKey is not None)
+		if any(k not in self.media for k in mediaKeys):
+			raise ValueError('missing media')
+		if any(c.appKey not in self.apps for c in self.componentObjects.values() if hasattr(c, 'appKey')):
+			raise ValueError('missing apps')
+		if any(d.appKey not in self.apps for d in self.destinations.values() if d.appKey is not None):
+			raise ValueError('missing apps')
+
+
+class UnifiedCardComponentObject:
+	pass
+
+
+@dataclasses.dataclass
+class UnifiedCardDetailComponentObject(UnifiedCardComponentObject):
+	content: str
+	destinationKey: UnifiedCardDestinationKey
+
+
+@dataclasses.dataclass
+class UnifiedCardMediumComponentObject(UnifiedCardComponentObject):
+	mediumKey: UnifiedCardMediumKey
+	destinationKey: UnifiedCardDestinationKey
+
+
+@dataclasses.dataclass
+class UnifiedCardButtonGroupComponentObject(UnifiedCardComponentObject):
+	buttons: typing.List['UnifiedCardButton']
+
+
+@dataclasses.dataclass
+class UnifiedCardButton:
+	text: str
+	destinationKey: UnifiedCardDestinationKey
+
+
+@dataclasses.dataclass
+class UnifiedCardSwipeableMediaComponentObject(UnifiedCardComponentObject):
+	media: typing.List['UnifiedCardSwipeableMediaMedium']
+
+
+@dataclasses.dataclass
+class UnifiedCardSwipeableMediaMedium:
+	mediumKey: UnifiedCardMediumKey
+	destinationKey: UnifiedCardDestinationKey
+
+
+@dataclasses.dataclass
+class UnifiedCardAppStoreComponentObject(UnifiedCardComponentObject):
+	appKey: UnifiedCardAppKey
+	destinationKey: UnifiedCardDestinationKey
+
+
+@dataclasses.dataclass
+class UnifiedCardTwitterListDetailsComponentObject(UnifiedCardComponentObject):
+	name: str
+	memberCount: int
+	subscriberCount: int
+	user: 'User'
+	destinationKey: UnifiedCardDestinationKey
+
+
+@dataclasses.dataclass
+class UnifiedCardDestination:
+	url: typing.Optional[str] = None
+	appKey: typing.Optional[UnifiedCardAppKey] = None
+	mediumKey: typing.Optional[UnifiedCardMediumKey] = None
+
+	def __post_init__(self):
+		if (self.url is None) == (self.appKey is None):
+			raise ValueError('did not get exactly one of url and appKey')
+
+
+@dataclasses.dataclass
+class UnifiedCardApp:
+	type: str
+	id: str
+	title: str
+	category: str
+	countryCode: str
+	url: str
+	description: typing.Optional[str] = None
+	iconMediumKey: typing.Optional[UnifiedCardMediumKey] = None
+	size: typing.Optional[int] = None
+	installs: typing.Optional[int] = None
+	ratingAverage: typing.Optional[float] = None
+	ratingCount: typing.Optional[int] = None
+	isFree: typing.Optional[bool] = None
+	isEditorsChoice: typing.Optional[bool] = None
+	hasInAppPurchases: typing.Optional[bool] = None
+	hasInAppAds: typing.Optional[bool] = None
+
+
+@dataclasses.dataclass
+class UnifiedCardSwipeableLayoutSlide:
+	mediumComponentKey: UnifiedCardComponentKey
+	componentKey: UnifiedCardComponentKey
 
 
 @dataclasses.dataclass
@@ -192,6 +464,11 @@ class UserLabel:
 	longDescription: typing.Optional[str] = None
 
 
+@dataclasses.dataclass
+class UserRef:
+	id: int
+
+
 @dataclasses.dataclass
 class Trend(snscrape.base.Item):
 	name: str
@@ -510,9 +787,12 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 			raise snscrape.base.ScraperException(f'Unable to handle entry {entryId!r}')
 		yield self._tweet_to_tweet(tweet, obj)
 
+	def _get_tweet_id(self, tweet):
+		return tweet['id'] if 'id' in tweet else int(tweet['id_str'])
+
 	def _make_tweet(self, tweet, user, retweetedTweet = None, quotedTweet = None, card = None):
 		kwargs = {}
-		kwargs['id'] = tweet['id'] if 'id' in tweet else int(tweet['id_str'])
+		kwargs['id'] = self._get_tweet_id(tweet)
 		kwargs['content'] = tweet['full_text']
 		kwargs['renderedContent'] = self._render_text_with_urls(tweet['full_text'], tweet['entities'].get('urls'))
 		kwargs['user'] = user
@@ -535,36 +815,8 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 		if 'extended_entities' in tweet and 'media' in tweet['extended_entities']:
 			media = []
 			for medium in tweet['extended_entities']['media']:
-				if medium['type'] == 'photo':
-					if '.' not in medium['media_url_https']:
-						_logger.warning(f'Skipping malformed medium URL on tweet {kwargs["id"]}: {medium["media_url_https"]!r} contains no dot')
-						continue
-					baseUrl, format = medium['media_url_https'].rsplit('.', 1)
-					if format not in ('jpg', 'png'):
-						_logger.warning(f'Skipping photo with unknown format on tweet {kwargs["id"]}: {format!r}')
-						continue
-					media.append(Photo(
-						previewUrl = f'{baseUrl}?format={format}&name=small',
-						fullUrl = f'{baseUrl}?format={format}&name=large',
-					))
-				elif medium['type'] == 'video' or medium['type'] == 'animated_gif':
-					variants = []
-					for variant in medium['video_info']['variants']:
-						variants.append(VideoVariant(contentType = variant['content_type'], url = variant['url'], bitrate = variant.get('bitrate')))
-					mKwargs = {
-						'thumbnailUrl': medium['media_url_https'],
-						'variants': variants,
-					}
-					if medium['type'] == 'video':
-						mKwargs['duration'] = medium['video_info']['duration_millis'] / 1000
-						if (ext := medium.get('ext')) and (mediaStats := ext['mediaStats']) and isinstance(r := mediaStats['r'], dict) and 'ok' in r and isinstance(r['ok'], dict):
-							mKwargs['views'] = int(r['ok']['viewCount'])
-						elif (mediaStats := medium.get('mediaStats')):
-							mKwargs['views'] = mediaStats['viewCount']
-						cls = Video
-					elif medium['type'] == 'animated_gif':
-						cls = Gif
-					media.append(cls(**mKwargs))
+				if (mediumO := self._make_medium(medium, kwargs['id'])):
+					media.append(mediumO)
 			if media:
 				kwargs['media'] = media
 		if retweetedTweet:
@@ -605,31 +857,339 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 			kwargs['cashtags'] = [o['text'] for o in tweet['entities']['symbols']]
 		if card:
 			kwargs['card'] = card
-			# Try to convert the URL to the non-shortened/t.co one
-			try:
-				i = kwargs['tcooutlinks'].index(card.url)
-			except ValueError:
-				_logger.warning('Could not find card URL in tcooutlinks')
-			else:
-				card.url = kwargs['outlinks'][i]
+			if hasattr(card, 'url') and '//t.co/' in card.url and 'tcooutlinks' in kwargs:
+				# Try to convert the URL to the non-shortened/t.co one
+				try:
+					i = kwargs['tcooutlinks'].index(card.url)
+				except ValueError:
+					_logger.warning('Could not find card URL in tcooutlinks')
+				else:
+					card.url = kwargs['outlinks'][i]
 		return Tweet(**kwargs)
 
-	def _make_card(self, card, apiType):
-		cardKwargs = {}
-		for key, kwarg in [('title', 'title'), ('description', 'description'), ('card_url', 'url'), ('thumbnail_image_original', 'thumbnailUrl')]:
-			if apiType is _TwitterAPIType.V2:
-				value = card['binding_values'].get(key)
-			elif apiType is _TwitterAPIType.GRAPHQL:
-				value = next((o['value'] for o in card['legacy']['binding_values'] if o['key'] == key), None)
-			if not value:
+	def _make_medium(self, medium, tweetId):
+		if medium['type'] == 'photo':
+			if '?format=' in medium['media_url_https'] or '&format=' in medium['media_url_https']:
+				return Photo(previewUrl = medium['media_url_https'], fullUrl = medium['media_url_https'])
+			if '.' not in medium['media_url_https']:
+				_logger.warning(f'Skipping malformed medium URL on tweet {tweetId}: {medium["media_url_https"]!r} contains no dot')
+				return
+			baseUrl, format = medium['media_url_https'].rsplit('.', 1)
+			if format not in ('jpg', 'png'):
+				_logger.warning(f'Skipping photo with unknown format on tweet {tweetId}: {format!r}')
+				return
+			return Photo(
+				previewUrl = f'{baseUrl}?format={format}&name=small',
+				fullUrl = f'{baseUrl}?format={format}&name=large',
+			)
+		elif medium['type'] == 'video' or medium['type'] == 'animated_gif':
+			variants = []
+			for variant in medium['video_info']['variants']:
+				variants.append(VideoVariant(contentType = variant['content_type'], url = variant['url'], bitrate = variant.get('bitrate')))
+			mKwargs = {
+				'thumbnailUrl': medium['media_url_https'],
+				'variants': variants,
+			}
+			if medium['type'] == 'video':
+				mKwargs['duration'] = medium['video_info']['duration_millis'] / 1000
+				if (ext := medium.get('ext')) and (mediaStats := ext.get('mediaStats')) and isinstance(r := mediaStats['r'], dict) and 'ok' in r and isinstance(r['ok'], dict):
+					mKwargs['views'] = int(r['ok']['viewCount'])
+				elif (mediaStats := medium.get('mediaStats')):
+					mKwargs['views'] = mediaStats['viewCount']
+				cls = Video
+			elif medium['type'] == 'animated_gif':
+				cls = Gif
+			return cls(**mKwargs)
+		else:
+			_logger.warning(f'Unsupported medium type on tweet {tweetId}: {medium["type"]!r}')
+
+	def _make_card(self, card, apiType, tweetId):
+		bindingValues = {}
+
+		def _kwargs_from_map(keyKwargMap):
+			nonlocal bindingValues
+			return {kwarg: bindingValues[key] for key, kwarg in keyKwargMap.items() if key in bindingValues}
+
+		userRefs = {}
+		if apiType is _TwitterAPIType.V2:
+			for o in card.get('users', {}).values():
+				userId = o['id']
+				assert userId not in userRefs
+				userRefs[userId] = self._user_to_user(o)
+		elif apiType is _TwitterAPIType.GRAPHQL:
+			for o in card['legacy'].get('user_refs', {}):
+				userId = int(o['rest_id'])
+				if userId in userRefs:
+					_logger.warning(f'Duplicate user {userId} in card on tweet {tweetId}')
+					continue
+				if 'legacy' in o:
+					userRefs[userId] = self._user_to_user(o['legacy'], id_ = userId)
+				else:
+					userRefs[userId] = UserRef(id = userId)
+
+		if apiType is _TwitterAPIType.V2:
+			messyBindingValues = card['binding_values'].items()
+		elif apiType is _TwitterAPIType.GRAPHQL:
+			messyBindingValues = ((x['key'], x['value']) for x in card['legacy']['binding_values'])
+		for key, value in messyBindingValues:
+			if 'type' not in value:
+				# Silently ignore creator/site entries since they frequently appear like this.
+				if key not in ('creator', 'site'):
+					_logger.warning(f'Skipping type-less card value {key!r} on tweet {tweetId}')
 				continue
 			if value['type'] == 'STRING':
-				cardKwargs[kwarg] = value['string_value']
+				bindingValues[key] = value['string_value']
+				if key.endswith('_datetime_utc'):
+					bindingValues[key] = datetime.datetime.strptime(bindingValues[key], '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo = datetime.timezone.utc)
 			elif value['type'] == 'IMAGE':
-				cardKwargs[kwarg] = value['image_value']['url']
+				bindingValues[key] = value['image_value']['url']
+			elif value['type'] == 'IMAGE_COLOR':
+				# Silently discard this.
+				pass
+			elif value['type'] == 'BOOLEAN':
+				bindingValues[key] = value['boolean_value']
+			elif value['type'] == 'USER':
+				bindingValues[key] = userRefs[int(value['user_value']['id_str'])]
 			else:
-				raise snscrape.base.ScraperError(f'Unknown card value type: {value["type"]!r}')
-		return Card(**cardKwargs)
+				_logger.warning(f'Unsupported card value type on {key!r} on tweet {tweetId}: {value["type"]!r}')
+
+		if apiType is _TwitterAPIType.V2:
+			cardName = card['name']
+		elif apiType is _TwitterAPIType.GRAPHQL:
+			cardName = card['legacy']['name']
+
+		if cardName in ('summary', 'summary_large_image', 'app', 'direct_store_link_app'):
+			keyKwargMap = {
+				'title': 'title',
+				'description': 'description',
+				'card_url': 'url',
+				'site': 'siteUser',
+				'creator': 'creatorUser',
+			}
+			if cardName in ('app', 'direct_store_link_app'):
+				keyKwargMap['thumbnail_original'] = 'thumbnailUrl'
+				return AppCard(**_kwargs_from_map(keyKwargMap))
+			else:
+				keyKwargMap['thumbnail_image_original'] = 'thumbnailUrl'
+				return SummaryCard(**_kwargs_from_map(keyKwargMap))
+		elif any(cardName.startswith(x) for x in ('poll2choice_', 'poll3choice_', 'poll4choice_')) and cardName.split('_', 1)[1] in ('text_only', 'image', 'video'):
+			kwargs = _kwargs_from_map({'end_datetime_utc': 'endDate', 'last_updated_datetime_utc': 'lastUpdateDate', 'duration_minutes': 'duration', 'counts_are_final': 'finalResults'})
+
+			options = []
+			for key in sorted(bindingValues):
+				if key.startswith('choice') and key.endswith('_label'):
+					optKwargs = {'label': bindingValues[key]}
+					if (count := bindingValues.get(f'{key[:-5]}count')):
+						optKwargs['count'] = int(count)
+					options.append(PollOption(**optKwargs))
+			kwargs['options'] = options
+			kwargs['duration'] = int(kwargs['duration'])
+
+			if cardName.endswith('_image'):
+				kwargs['medium'] = Photo(previewUrl = bindingValues['image_small'], fullUrl = bindingValues['image_original'])
+			elif cardName.endswith('_video'):
+				variants = []
+				variants.append(VideoVariant(contentType = 'application/x-mpegurl', url = bindingValues['player_hls_url'], bitrate = None))
+				if 'vmap' not in bindingValues['player_stream_url']:
+					_logger.warning(f'Non-VMAP URL in {cardName} player_stream_url on tweet {tweetId}')
+				variants.append(VideoVariant(contentType = 'text/xml', url = bindingValues['player_stream_url'], bitrate = None))
+				kwargs['medium'] = Video(thumbnailUrl = bindingValues['player_image_original'], variants = variants, duration = int(bindingValues['content_duration_seconds']))
+
+			return PollCard(**kwargs)
+		elif cardName == 'player':
+			return PlayerCard(**_kwargs_from_map({'title': 'title', 'description': 'description', 'card_url': 'url', 'player_image_original': 'imageUrl', 'site': 'siteUser'}))
+		elif cardName in ('promo_image_convo', 'promo_video_convo'):
+			kwargs = _kwargs_from_map({'thank_you_text': 'thankYouText', 'thank_you_url': 'thankYouUrl', 'thank_you_shortened_url': 'thankYouTcoUrl'})
+			kwargs['actions'] = []
+			for l in ('one', 'two', 'three', 'four'):
+				if f'cta_{l}' in bindingValues:
+					kwargs['actions'].append(PromoConvoAction(label = bindingValues[f'cta_{l}'], tweet = bindingValues[f'cta_{l}_tweet']))
+			if 'image' in cardName:
+				kwargs['medium'] = Photo(previewUrl = bindingValues['promo_image_small'], fullUrl = bindingValues['promo_image_original'])
+				if 'cover_promo_image' in bindingValues:
+					kwargs['cover'] = Photo(previewUrl = bindingValues['cover_promo_image_small'], fullUrl = bindingValues['cover_promo_image_original'])
+			elif 'video' in cardName:
+				variants = []
+				variants.append(VideoVariant(contentType = bindingValues['player_stream_content_type'], url = bindingValues['player_stream_url'], bitrate = None))
+				if bindingValues['player_stream_url'] != bindingValues['player_url']:
+					if 'vmap' not in bindingValues['player_url']:
+						_logger.warning(f'Non-VMAP URL in {cardName} player_url on tweet {tweetId}')
+					variants.append(VideoVariant(contentType = 'text/xml', url = bindingValues['player_url'], bitrate = None))
+				kwargs['medium'] = Video(thumbnailUrl = bindingValues['player_image_original'], variants = variants, duration = int(bindingValues['content_duration_seconds']))
+			return PromoConvoCard(**kwargs)
+		elif cardName in ('745291183405076480:broadcast', '3691233323:periscope_broadcast'):
+			keyKwargMap = {'broadcast_state': 'state', 'broadcast_source': 'source', 'site': 'siteUser'}
+			if cardName == '745291183405076480:broadcast':
+				keyKwargMap = {**keyKwargMap, 'broadcast_id': 'id', 'broadcast_url': 'url', 'broadcast_title': 'title', 'broadcast_thumbnail_original': 'thumbnailUrl'}
+			else:
+				keyKwargMap = {**keyKwargMap, 'id': 'id', 'url': 'url', 'title': 'title', 'description': 'description', 'total_participants': 'totalParticipants', 'thumbnail_original': 'thumbnailUrl'}
+			kwargs = _kwargs_from_map(keyKwargMap)
+			kwargs['broadcaster'] = User(id = int(bindingValues['broadcaster_twitter_id']), username = bindingValues['broadcaster_username'], displayname = bindingValues['broadcaster_display_name'])
+			if 'siteUser' not in kwargs:
+				kwargs['siteUser'] = None
+			if cardName == '745291183405076480:broadcast':
+				return BroadcastCard(**kwargs)
+			else:
+				kwargs['totalParticipants'] = int(kwargs['totalParticipants'])
+				return PeriscopeBroadcastCard(**kwargs)
+		elif cardName == '745291183405076480:live_event':
+			kwargs = _kwargs_from_map({'event_id': 'id', 'event_title': 'title', 'event_category': 'category', 'event_subtitle': 'description'})
+			kwargs['id'] = int(kwargs['id'])
+			kwargs['photo'] = Photo(previewUrl = bindingValues['event_thumbnail_small'], fullUrl = bindingValues['event_thumbnail_original'])
+			return EventCard(event = Event(**kwargs))
+		elif cardName == '3337203208:newsletter_publication':
+			kwargs = _kwargs_from_map({'newsletter_title': 'title', 'newsletter_description': 'description', 'newsletter_image_original': 'imageUrl', 'card_url': 'url', 'revue_account_id': 'revueAccountId', 'issue_count': 'issueCount'})
+			kwargs['revueAccountId'] = int(kwargs['revueAccountId'])
+			kwargs['issueCount'] = int(kwargs['issueCount'])
+			return NewsletterCard(**kwargs)
+		elif cardName == '3337203208:newsletter_issue':
+			kwargs = _kwargs_from_map({
+				'newsletter_title': 'newsletterTitle',
+				'newsletter_description': 'newsletterDescription',
+				'issue_title': 'issueTitle',
+				'issue_description': 'issueDescription',
+				'issue_number': 'issueNumber',
+				'issue_image_original': 'imageUrl',
+				'card_url': 'url',
+				'revue_account_id': 'revueAccountId'
+			})
+			kwargs['issueNumber'] = int(kwargs['issueNumber'])
+			kwargs['revueAccountId'] = int(kwargs['revueAccountId'])
+			return NewsletterIssueCard(**kwargs)
+		elif cardName == 'amplify':
+			return AmplifyCard(
+				id = bindingValues['amplify_content_id'],
+				video = Video(
+					thumbnailUrl = bindingValues['player_image'],
+					variants = [VideoVariant(contentType = bindingValues['player_stream_content_type'], url = bindingValues['amplify_url_vmap'], bitrate = None)],
+				),
+			)
+		elif cardName == 'appplayer':
+			kwargs = _kwargs_from_map({'title': 'title', 'app_category': 'appCategory', 'player_owner_id': 'playerOwnerId', 'site': 'siteUser'})
+			kwargs['playerOwnerId'] = int(kwargs['playerOwnerId'])
+			variants = []
+			variants.append(VideoVariant(contentType = 'application/x-mpegurl', url = bindingValues['player_hls_url'], bitrate = None))
+			if 'vmap' not in bindingValues['player_url']:
+				_logger.warning(f'Non-VMAP URL in {cardName} player_url on tweet {tweetId}')
+			variants.append(VideoVariant(contentType = 'text/xml', url = bindingValues['player_url'], bitrate = None))
+			kwargs['video'] = Video(thumbnailUrl = bindingValues['player_image_original'], variants = variants, duration = int(bindingValues['content_duration_seconds']))
+			return AppPlayerCard(**kwargs)
+		elif cardName == '3691233323:audiospace':
+			return SpacesCard(**_kwargs_from_map({'card_url': 'url', 'id': 'id'}))
+		elif cardName == 'unified_card':
+			o = json.loads(bindingValues['unified_card'])
+			kwargs = {}
+			if 'type' in o:
+				unifiedCardType = o.get('type')
+				if unifiedCardType not in (
+					'image_app',
+					'image_carousel_app',
+					'image_carousel_website',
+					'image_multi_dest_carousel_website',
+					'image_website',
+					'mixed_media_multi_dest_carousel_website',
+					'mixed_media_single_dest_carousel_app',
+					'mixed_media_single_dest_carousel_website',
+					'video_app',
+					'video_carousel_app',
+					'video_carousel_website',
+					'video_multi_dest_carousel_website',
+					'video_website',
+				):
+					_logger.warning(f'Unsupported unified_card type on tweet {tweetId}: {unifiedCardType!r}')
+					return
+				kwargs['type'] = unifiedCardType
+			elif set(c['type'] for c in o['component_objects'].values()) != {'media', 'twitter_list_details'}:
+				_logger.warning(f'Unsupported unified_card type on tweet {tweetId}')
+				return
+
+			kwargs['componentObjects'] = {}
+			for k, v in o['component_objects'].items():
+				if v['type'] == 'details':
+					co = UnifiedCardDetailComponentObject(content = v['data']['title']['content'], destinationKey = v['data']['destination'])
+				elif v['type'] == 'media':
+					co = UnifiedCardMediumComponentObject(mediumKey = v['data']['id'], destinationKey = v['data']['destination'])
+				elif v['type'] == 'button_group':
+					if not all(b['type'] == 'cta' for b in v['data']['buttons']):
+						_logger.warning(f'Unsupported unified_card button_group button type on tweet {tweetId}')
+						return
+					buttons = [UnifiedCardButton(text = b['action'][0].upper() + re.sub('[A-Z]', lambda x: f' {x[0]}', b['action'][1:]), destinationKey = b['destination']) for b in v['data']['buttons']]
+					co = UnifiedCardButtonGroupComponentObject(buttons = buttons)
+				elif v['type'] == 'swipeable_media':
+					media = [UnifiedCardSwipeableMediaMedium(mediumKey = m['id'], destinationKey = m['destination']) for m in v['data']['media_list']]
+					co = UnifiedCardSwipeableMediaComponentObject(media = media)
+				elif v['type'] == 'app_store_details':
+					co = UnifiedCardAppStoreComponentObject(appKey = v['data']['app_id'], destinationKey = v['data']['destination'])
+				elif v['type'] == 'twitter_list_details':
+					co = UnifiedCardTwitterListDetailsComponentObject(
+						name = v['data']['name']['content'],
+						memberCount = v['data']['member_count'],
+						subscriberCount = v['data']['subscriber_count'],
+						user = self._user_to_user(o['users'][v['data']['user_id']]),
+						destinationKey = v['data']['destination'],
+					)
+				else:
+					_logger.warning(f'Unsupported unified_card component type on tweet {tweetId}: {v["type"]!r}')
+					return
+				kwargs['componentObjects'][k] = co
+
+			kwargs['destinations'] = {}
+			for k, v in o['destination_objects'].items():
+				dKwargs = {}
+				if 'url_data' in v['data']:
+					dKwargs['url'] = v['data']['url_data']['url']
+				if 'app_id' in v['data']:
+					dKwargs['appKey'] = v['data']['app_id']
+				if 'media_id' in v['data']:
+					dKwargs['mediumKey'] = v['data']['media_id']
+				kwargs['destinations'][k] = UnifiedCardDestination(**dKwargs)
+
+			kwargs['media'] = {}
+			for k, v in o['media_entities'].items():
+				if (medium := self._make_medium(v, tweetId)):
+					kwargs['media'][k] = medium
+
+			if 'app_store_data' in o:
+				kwargs['apps'] = {}
+				for k, v in o['app_store_data'].items():
+					variants = []
+					for var in v:
+						vKwargsMap = {
+							'type': 'type',
+							'id': 'id',
+							'icon_media_key': 'iconMediumKey',
+							'country_code': 'countryCode',
+							'num_installs': 'installs',
+							'size_bytes': 'size',
+							'is_free': 'isFree',
+							'is_editors_choice': 'isEditorsChoice',
+							'has_in_app_purchases': 'hasInAppPurchases',
+							'has_in_app_ads': 'hasInAppAds',
+						}
+						vKwargs = {kwarg: var[key] for key, kwarg in vKwargsMap.items() if key in var}
+						vKwargs['title'] = var['title']['content']
+						if 'description' in var:
+							vKwargs['description'] = var['description']['content']
+						vKwargs['category'] = var['category']['content']
+						if (ratings := var['ratings']):
+							vKwargs['ratingAverage'] = var['ratings']['star']
+							vKwargs['ratingCount'] = var['ratings']['count']
+						vKwargs['url'] = f'https://play.google.com/store/apps/details?id={var["id"]}' if var['type'] == 'android_app' else f'https://itunes.apple.com/app/id{var["id"]}'
+						variants.append(UnifiedCardApp(**vKwargs))
+					kwargs['apps'][k] = variants
+
+			if o['components']:
+				kwargs['components'] = o['components']
+
+			if 'layout' in o:
+				if o['layout']['type'] != 'swipeable':
+					_logger.warning(f'Unsupported unified_card layout type on tweet {tweetId}: {o["layout"]["type"]!r}')
+					return
+				kwargs['swipeableLayoutSlides'] = [UnifiedCardSwipeableLayoutSlide(mediumComponentKey = v[0], componentKey = v[1]) for v in o['layout']['data']['slides']]
+
+			return UnifiedCard(**kwargs)
+
+		_logger.warning(f'Unsupported card type on tweet {tweetId}: {cardName!r}')
 
 	def _tweet_to_tweet(self, tweet, obj):
 		user = self._user_to_user(obj['globalObjects']['users'][tweet['user_id_str']])
@@ -639,7 +1199,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 		if 'quoted_status_id_str' in tweet and tweet['quoted_status_id_str'] in obj['globalObjects']['tweets']:
 			kwargs['quotedTweet'] = self._tweet_to_tweet(obj['globalObjects']['tweets'][tweet['quoted_status_id_str']], obj)
 		if 'card' in tweet:
-			kwargs['card'] = self._make_card(tweet['card'], _TwitterAPIType.V2)
+			kwargs['card'] = self._make_card(tweet['card'], _TwitterAPIType.V2, self._get_tweet_id(tweet))
 		return self._make_tweet(tweet, user, **kwargs)
 
 	def _graphql_timeline_tweet_item_result_to_tweet(self, result):
@@ -669,7 +1229,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 		elif 'quoted_status_id_str' in tweet:
 			kwargs['quotedTweet'] = TweetRef(id = int(tweet['quoted_status_id_str']))
 		if 'card' in result:
-			kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL)
+			kwargs['card'] = self._make_card(result['card'], _TwitterAPIType.GRAPHQL, self._get_tweet_id(tweet))
 		return self._make_tweet(tweet, user, **kwargs)
 
 	def _graphql_timeline_instructions_to_tweets(self, instructions, includeConversationThreads = False):

From 7c0fcdec436fc24919e65d07e0dec463527a08f0 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Tue, 12 Apr 2022 18:29:51 +0000
Subject: [PATCH 15/32] Fix Periscope card crashes

---
 snscrape/modules/twitter.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index de4b396..8f9e9d6 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -211,10 +211,10 @@ class PeriscopeBroadcastCard(Card):
 	title: str
 	description: str
 	state: str
-	source: str
 	totalParticipants: int
 	thumbnailUrl: str
-	broadcaster: 'User'
+	source: typing.Optional[str] = None
+	broadcaster: typing.Optional['User'] = None
 	siteUser: typing.Optional['User'] = None
 
 
@@ -1022,9 +1022,10 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 			if cardName == '745291183405076480:broadcast':
 				keyKwargMap = {**keyKwargMap, 'broadcast_id': 'id', 'broadcast_url': 'url', 'broadcast_title': 'title', 'broadcast_thumbnail_original': 'thumbnailUrl'}
 			else:
-				keyKwargMap = {**keyKwargMap, 'id': 'id', 'url': 'url', 'title': 'title', 'description': 'description', 'total_participants': 'totalParticipants', 'thumbnail_original': 'thumbnailUrl'}
+				keyKwargMap = {**keyKwargMap, 'id': 'id', 'url': 'url', 'title': 'title', 'description': 'description', 'total_participants': 'totalParticipants', 'full_size_thumbnail_url': 'thumbnailUrl'}
 			kwargs = _kwargs_from_map(keyKwargMap)
-			kwargs['broadcaster'] = User(id = int(bindingValues['broadcaster_twitter_id']), username = bindingValues['broadcaster_username'], displayname = bindingValues['broadcaster_display_name'])
+			if 'broadcaster_twitter_id' in bindingValues:
+				kwargs['broadcaster'] = User(id = int(bindingValues['broadcaster_twitter_id']), username = bindingValues['broadcaster_username'], displayname = bindingValues['broadcaster_display_name'])
 			if 'siteUser' not in kwargs:
 				kwargs['siteUser'] = None
 			if cardName == '745291183405076480:broadcast':

From 3870282a427a26e56d11d35df88b1f0092e11e9a Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Tue, 12 Apr 2022 20:53:38 +0000
Subject: [PATCH 16/32] Fix broadcast and event card crashes

---
 snscrape/modules/twitter.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 8f9e9d6..ca906e7 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -197,10 +197,10 @@ class BroadcastCard(Card):
 	id: str
 	url: str
 	title: str
-	state: str
-	source: str
-	thumbnailUrl: str
-	broadcaster: 'User'
+	state: typing.Optional[str] = None
+	broadcaster: typing.Optional['User'] = None
+	thumbnailUrl: typing.Optional[str] = None
+	source: typing.Optional[str] = None
 	siteUser: typing.Optional['User'] = None
 
 
@@ -226,9 +226,9 @@ class EventCard(Card):
 @dataclasses.dataclass
 class Event:
 	id: int
-	title: str
 	category: str
 	photo: Photo
+	title: typing.Optional[str] = None
 	description: typing.Optional[str] = None
 
 	@property
@@ -1036,7 +1036,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 		elif cardName == '745291183405076480:live_event':
 			kwargs = _kwargs_from_map({'event_id': 'id', 'event_title': 'title', 'event_category': 'category', 'event_subtitle': 'description'})
 			kwargs['id'] = int(kwargs['id'])
-			kwargs['photo'] = Photo(previewUrl = bindingValues['event_thumbnail_small'], fullUrl = bindingValues['event_thumbnail_original'])
+			kwargs['photo'] = Photo(previewUrl = bindingValues['event_thumbnail_small'], fullUrl = bindingValues.get('event_thumbnail_original') or bindingValues['event_thumbnail'])
 			return EventCard(event = Event(**kwargs))
 		elif cardName == '3337203208:newsletter_publication':
 			kwargs = _kwargs_from_map({'newsletter_title': 'title', 'newsletter_description': 'description', 'newsletter_image_original': 'imageUrl', 'card_url': 'url', 'revue_account_id': 'revueAccountId', 'issue_count': 'issueCount'})

From 65e7d8bd24af6ed4f7b80573643c1dd912946da0 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Fri, 15 Apr 2022 02:52:03 +0000
Subject: [PATCH 17/32] Fix warning on card URL translation to include the
 tweet ID

---
 snscrape/modules/twitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index ca906e7..0504193 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -862,7 +862,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 				try:
 					i = kwargs['tcooutlinks'].index(card.url)
 				except ValueError:
-					_logger.warning('Could not find card URL in tcooutlinks')
+					_logger.warning(f'Could not find card URL in tcooutlinks on tweet {kwargs["id"]}')
 				else:
 					card.url = kwargs['outlinks'][i]
 		return Tweet(**kwargs)

From 5fc67f2bcff8d9a06c2b095e6a4eb7596fb4d032 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Fri, 15 Apr 2022 02:52:37 +0000
Subject: [PATCH 18/32] Add support for 'message me' cards

---
 snscrape/modules/twitter.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 0504193..51d7369 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -279,6 +279,13 @@ class SpacesCard(Card):
 	id: str
 
 
+@dataclasses.dataclass
+class MessageMeCard(Card):
+	recipient: 'User'
+	url: str
+	buttonText: str
+
+
 UnifiedCardComponentKey = str
 UnifiedCardDestinationKey = str
 UnifiedCardMediumKey = str
@@ -1077,6 +1084,14 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 			return AppPlayerCard(**kwargs)
 		elif cardName == '3691233323:audiospace':
 			return SpacesCard(**_kwargs_from_map({'card_url': 'url', 'id': 'id'}))
+		elif cardName == '2586390716:message_me':
+			# Note that the strings in Twitter's JS appear to have an incorrect mapping that then gets changed somewhere in the 1.8 MiB of JS!
+			# cta_1, 3, and 4 should mean 'Message us', 'Send a private message', and 'Send me a private message', but the correct mapping is currently unknown.
+			ctas = {'message_me_card_cta_2': 'Send us a private message'}
+			if bindingValues['cta'] not in ctas:
+				_logger.warning(f'Unsupported message_me card cta on tweet {tweetId}: {bindingValues["cta"]!r}')
+				return
+			return MessageMeCard(**_kwargs_from_map({'recipient': 'recipient', 'card_url': 'url'}), buttonText = ctas[bindingValues['cta']])
 		elif cardName == 'unified_card':
 			o = json.loads(bindingValues['unified_card'])
 			kwargs = {}

From 247bd82d79b264de7cdbcd9542ef2ed3b1c790fa Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Fri, 15 Apr 2022 03:14:29 +0000
Subject: [PATCH 19/32] Refactor to tweetId variable

---
 snscrape/modules/twitter.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 51d7369..b38201b 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -798,8 +798,9 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 		return tweet['id'] if 'id' in tweet else int(tweet['id_str'])
 
 	def _make_tweet(self, tweet, user, retweetedTweet = None, quotedTweet = None, card = None):
+		tweetId = self._get_tweet_id(tweet)
 		kwargs = {}
-		kwargs['id'] = self._get_tweet_id(tweet)
+		kwargs['id'] = tweetId
 		kwargs['content'] = tweet['full_text']
 		kwargs['renderedContent'] = self._render_text_with_urls(tweet['full_text'], tweet['entities'].get('urls'))
 		kwargs['user'] = user
@@ -807,7 +808,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 		if tweet['entities'].get('urls'):
 			kwargs['outlinks'] = [u['expanded_url'] for u in tweet['entities']['urls']]
 			kwargs['tcooutlinks'] = [u['url'] for u in tweet['entities']['urls']]
-		kwargs['url'] = f'https://twitter.com/{user.username}/status/{kwargs["id"]}'
+		kwargs['url'] = f'https://twitter.com/{user.username}/status/{tweetId}'
 		kwargs['replyCount'] = tweet['reply_count']
 		kwargs['retweetCount'] = tweet['retweet_count']
 		kwargs['likeCount'] = tweet['favorite_count']
@@ -822,7 +823,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 		if 'extended_entities' in tweet and 'media' in tweet['extended_entities']:
 			media = []
 			for medium in tweet['extended_entities']['media']:
-				if (mediumO := self._make_medium(medium, kwargs['id'])):
+				if (mediumO := self._make_medium(medium, tweetId)):
 					media.append(mediumO)
 			if media:
 				kwargs['media'] = media
@@ -869,7 +870,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 				try:
 					i = kwargs['tcooutlinks'].index(card.url)
 				except ValueError:
-					_logger.warning(f'Could not find card URL in tcooutlinks on tweet {kwargs["id"]}')
+					_logger.warning(f'Could not find card URL in tcooutlinks on tweet {tweetId}')
 				else:
 					card.url = kwargs['outlinks'][i]
 		return Tweet(**kwargs)

From 5103a33afa67847643b7bfa1461d245e3ec46030 Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Fri, 15 Apr 2022 03:18:45 +0000
Subject: [PATCH 20/32] Fix t.co card URL replacement on retweets

Fixes #411
---
 snscrape/modules/twitter.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index b38201b..1f23038 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -865,14 +865,15 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 			kwargs['cashtags'] = [o['text'] for o in tweet['entities']['symbols']]
 		if card:
 			kwargs['card'] = card
-			if hasattr(card, 'url') and '//t.co/' in card.url and 'tcooutlinks' in kwargs:
+			if hasattr(card, 'url') and '//t.co/' in card.url:
 				# Try to convert the URL to the non-shortened/t.co one
-				try:
-					i = kwargs['tcooutlinks'].index(card.url)
-				except ValueError:
-					_logger.warning(f'Could not find card URL in tcooutlinks on tweet {tweetId}')
+				# Retweets inherit the card but not the outlinks; try to get them from the retweeted tweet instead in that case.
+				if 'tcooutlinks' in kwargs and card.url in kwargs['tcooutlinks']:
+					card.url = kwargs['outlinks'][kwargs['tcooutlinks'].index(card.url)]
+				elif retweetedTweet and retweetedTweet.tcooutlinks and card.url in retweetedTweet.tcooutlinks:
+					card.url = retweetedTweet.outlinks[retweetedTweet.tcooutlinks.index(card.url)]
 				else:
-					card.url = kwargs['outlinks'][i]
+					_logger.warning(f'Could not translate t.co card URL on tweet {tweetId}')
 		return Tweet(**kwargs)
 
 	def _make_medium(self, medium, tweetId):

From 6c50eee31b667b057739c1b26a64f572550b960e Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Fri, 15 Apr 2022 19:23:54 +0000
Subject: [PATCH 21/32] Fix proxies not being applied correctly due to missing
 merge with environment settings

Fixes #447
---
 snscrape/base.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/snscrape/base.py b/snscrape/base.py
index bda493f..71ab649 100644
--- a/snscrape/base.py
+++ b/snscrape/base.py
@@ -163,16 +163,19 @@ class Scraper:
 		return self._get_entity()
 
 	def _request(self, method, url, params = None, data = None, headers = None, timeout = 10, responseOkCallback = None, allowRedirects = True, proxies = None):
-		proxies = proxies or self._proxies
+		proxies = proxies or self._proxies or {}
 		for attempt in range(self._retries + 1):
 			# The request is newly prepared on each retry because of potential cookie updates.
 			req = self._session.prepare_request(requests.Request(method, url, params = params, data = data, headers = headers))
+			environmentSettings = self._session.merge_environment_settings(req.url, proxies, None, None, None)
 			logger.info(f'Retrieving {req.url}')
 			logger.debug(f'... with headers: {headers!r}')
 			if data:
 				logger.debug(f'... with data: {data!r}')
+			if environmentSettings:
+				logger.debug(f'... with environmentSettings: {environmentSettings!r}')
 			try:
-				r = self._session.send(req, allow_redirects = allowRedirects, timeout = timeout, proxies = proxies)
+				r = self._session.send(req, allow_redirects = allowRedirects, timeout = timeout, **environmentSettings)
 			except requests.exceptions.RequestException as exc:
 				if attempt < self._retries:
 					retrying = ', retrying'

From e7a6d38a5f3cf0c0e5d6133ab64938e89f1c22bd Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Fri, 15 Apr 2022 20:07:01 +0000
Subject: [PATCH 22/32] Add support for community_details cards

---
 snscrape/modules/twitter.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 1f23038..5d1cb09 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -382,6 +382,15 @@ class UnifiedCardTwitterListDetailsComponentObject(UnifiedCardComponentObject):
 	destinationKey: UnifiedCardDestinationKey
 
 
+@dataclasses.dataclass
+class UnifiedCardTwitterCommunityDetailsComponentObject(UnifiedCardComponentObject):
+	name: str
+	theme: str
+	membersCount: int
+	destinationKey: UnifiedCardDestinationKey
+	membersFacepile: typing.Optional[typing.List['User']] = None
+
+
 @dataclasses.dataclass
 class UnifiedCardDestination:
 	url: typing.Optional[str] = None
@@ -1117,7 +1126,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 					_logger.warning(f'Unsupported unified_card type on tweet {tweetId}: {unifiedCardType!r}')
 					return
 				kwargs['type'] = unifiedCardType
-			elif set(c['type'] for c in o['component_objects'].values()) != {'media', 'twitter_list_details'}:
+			elif set(c['type'] for c in o['component_objects'].values()) not in ({'media', 'twitter_list_details'}, {'media', 'community_details'}):
 				_logger.warning(f'Unsupported unified_card type on tweet {tweetId}')
 				return
 
@@ -1146,6 +1155,14 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 						user = self._user_to_user(o['users'][v['data']['user_id']]),
 						destinationKey = v['data']['destination'],
 					)
+				elif v['type'] == 'community_details':
+					co = UnifiedCardTwitterCommunityDetailsComponentObject(
+						name = v['data']['name']['content'],
+						theme = v['data']['theme'],
+						membersCount = v['data']['member_count'],
+						destinationKey = v['data']['destination'],
+						membersFacepile = [self._user_to_user(u) for u in map(o['users'].get, v['data']['members_facepile']) if u],
+					)
 				else:
 					_logger.warning(f'Unsupported unified_card component type on tweet {tweetId}: {v["type"]!r}')
 					return

From ed3ea944d177157d688786470c8369198b0ce8ce Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Sat, 16 Apr 2022 19:44:36 +0000
Subject: [PATCH 23/32] Fix newsletter issue cards without an issue description

Fixes #456
---
 snscrape/modules/twitter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 5d1cb09..048df34 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -251,10 +251,10 @@ class NewsletterIssueCard(Card):
 	newsletterTitle: str
 	newsletterDescription: str
 	issueTitle: str
-	issueDescription: str
 	issueNumber: int
 	url: str
 	revueAccountId: int
+	issueDescription: typing.Optional[str] = None
 	imageUrl: typing.Optional[str] = None
 
 

From babcddda19a0eca7d7413464666cbeb145d76f2d Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Sun, 17 Apr 2022 03:55:37 -0500
Subject: [PATCH 24/32] made Telegram scraper not return full channel info for
 forwarded_from attribute; fixed video edge cases.

---
 snscrape/modules/telegram.py | 39 +++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 19aa22e..565322c 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -27,9 +27,9 @@ class LinkPreview:
 @dataclasses.dataclass
 class Channel(snscrape.base.Entity):
 	username: str
-	title: str
-	verified: bool
-	photo: str
+	title: typing.Optional[str] = None
+	verified: typing.Optional[bool] = None
+	photo: typing.Optional[str] = None
 	description: typing.Optional[str] = None
 	members: typing.Optional[int] = None
 	photos: typing.Optional[snscrape.base.IntWithGranularity] = None
@@ -123,14 +123,18 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 				content = message.get_text(separator="\n")
 
 				for video_player in post.find_all('a', {'class': 'tgme_widget_message_video_player'}):
-
-					style = video_player.find('i')['style']
-					videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style)
-					videoTag = video_player.find('video')
-					if videoTag is None:
-						videoUrl = None
+					iTag = video_player.find('i')
+					if iTag is None:
+						videoUrl = None 
+						videoThumbnailUrl = None
 					else:
-						videoUrl = videoTag['src']
+						style = iTag['style']
+						videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style)[0]
+						videoTag = video_player.find('video')
+						if videoTag is None:
+							videoUrl = None
+						else:
+							videoUrl = videoTag['src']
 					mKwargs = {
 						'thumbnailUrl': videoThumbnailUrl,
 						'url': videoUrl,
@@ -146,8 +150,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 				if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
 					forwardedUrl = forward_tag['href']
 					forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0]
-					forwardedChannelScraper = TelegramChannelScraper(name = forwardedName)
-					forwarded = forwardedChannelScraper._get_entity()
+					forwarded = Channel(username = forwardedName)
 
 				outlinks = []
 				for link in post.find_all('a'):
@@ -213,7 +216,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			if not pageLink:
 				break
 			nextPageUrl = urllib.parse.urljoin(r.url, pageLink['href'])
-			r = self._get(nextPageUrl, headers = self._headers)
+			r = self._get(nextPageUrl, headers = self._headers, responseOkCallback = telegramResponseOkCallback)
 			if r.status_code != 200:
 				raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
 			soup = bs4.BeautifulSoup(r.text, 'lxml')
@@ -279,4 +282,12 @@ def parse_num(s):
 	elif s.endswith('K'):
 		return int(float(s[:-1]) * 1000), 10 ** (3 if '.' not in s else 3 - len(s[:-1].split('.')[1]))
 	else:
-		return int(s), 1
\ No newline at end of file
+		return int(s), 1
+
+def telegramResponseOkCallback(r):
+	if r.status_code == 200:
+		return (True, None)
+	elif r.status_code // 100 == 5:
+		return (False, f'status code: {r.status_code}')
+	else:
+		return (False, None)
\ No newline at end of file

From 1e4e0c278dce468fa98626234d522fa8e51af5e6 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Sun, 17 Apr 2022 04:33:22 -0500
Subject: [PATCH 25/32] fixed issue where Telegram scraper terminated early
 because some pages didn't have a next page link (added reasonable default)

---
 snscrape/modules/telegram.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 565322c..ad49066 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -214,8 +214,13 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			yield from self._soup_to_items(soup, r.url)
 			pageLink = soup.find('a', attrs = {'class': 'tme_messages_more', 'data-before': True})
 			if not pageLink:
-				break
+				nextPostIndex = int(nextPageUrl.split('=')[-1]) - 20
+				if nextPostIndex > 20:
+					pageLink = {'href': nextPageUrl.split('=')[0] + f'={nextPostIndex}'}
+				else:
+					break
 			nextPageUrl = urllib.parse.urljoin(r.url, pageLink['href'])
+			print(f'nextPageUrl: {nextPageUrl}')
 			r = self._get(nextPageUrl, headers = self._headers, responseOkCallback = telegramResponseOkCallback)
 			if r.status_code != 200:
 				raise snscrape.base.ScraperException(f'Got status code {r.status_code}')

From b276c3cc27f35e57f287fb0b815e19b94b31487f Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Sun, 17 Apr 2022 06:50:43 -0500
Subject: [PATCH 26/32] fixed issue where some videos and photos weren't being
 scraped (because they weren't in a post containing a
 'tgme_widget_message_text' div

---
 snscrape/modules/telegram.py | 134 ++++++++++++++++++++---------------
 1 file changed, 75 insertions(+), 59 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index ad49066..c3d055d 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -65,14 +65,8 @@ class TelegramPost(snscrape.base.Item):
 class Medium:
 	pass
 
-
 @dataclasses.dataclass
 class Photo(Medium):
-	previewUrl: str
-	fullUrl: str
-
-@dataclasses.dataclass
-class Image(Medium):
 	url: str
 
 @dataclasses.dataclass
@@ -81,6 +75,12 @@ class Video(Medium):
 	duration: float
 	url: typing.Optional[str] = None
 
+@dataclasses.dataclass
+class VoiceMessage(Medium):
+	url: str
+	duration: str
+	bars:typing.List[float]
+
 @dataclasses.dataclass
 class Gif(Medium):
 	thumbnailUrl: str
@@ -117,70 +117,81 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			url = rawUrl.replace('//t.me/', '//t.me/s/')
 			date = datetime.datetime.strptime(dateDiv.find('time', datetime = True)['datetime'].replace('-', '', 2).replace(':', ''), '%Y%m%dT%H%M%S%z')
 			media = []
+			outlinks = []
 			forwarded = None
 			forwardedUrl = None
+
 			if (message := post.find('div', class_ = 'tgme_widget_message_text')):
 				content = message.get_text(separator="\n")
 
-				for video_player in post.find_all('a', {'class': 'tgme_widget_message_video_player'}):
-					iTag = video_player.find('i')
-					if iTag is None:
-						videoUrl = None 
-						videoThumbnailUrl = None
-					else:
-						style = iTag['style']
-						videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style)[0]
-						videoTag = video_player.find('video')
-						if videoTag is None:
-							videoUrl = None
-						else:
-							videoUrl = videoTag['src']
-					mKwargs = {
-						'thumbnailUrl': videoThumbnailUrl,
-						'url': videoUrl,
-					}
-					timeTag = video_player.find('time')
-					if timeTag is None:
-						cls = Gif
-					else:
-						cls = Video
-						durationStr = video_player.find('time').text.split(':')
-						mKwargs['duration'] = sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationStr))])
-					media.append(cls(**mKwargs))
 				if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
 					forwardedUrl = forward_tag['href']
 					forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0]
 					forwarded = Channel(username = forwardedName)
 
-				outlinks = []
-				for link in post.find_all('a'):
-					if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')):
-						# Author links at the top (avatar and name)
-						continue
-					if link['href'] == rawUrl or link['href'] == url:
-						style = link.attrs.get('style', '')
-						# Generic filter of links to the post itself, catches videos, photos, and the date link
-						if style != '':
-							imageUrls = re.findall('url\(\'(.*?)\'\)', style)
-							if len(imageUrls) == 1:
-								media.append(Image(url = imageUrls[0]))
-							continue
-					if _SINGLE_MEDIA_LINK_PATTERN.match(link['href']):
-						style = link.attrs.get('style', '')
-						imageUrls = re.findall('url\(\'(.*?)\'\)', style)
-						if len(imageUrls) == 1:
-							media.append(Image(url = imageUrls[0]))
-							# resp = self._get(image[0])
-							# encoded_string = base64.b64encode(resp.content)
-						# Individual photo or video link
-						continue
-					href = urllib.parse.urljoin(pageUrl, link['href'])
-					if href not in outlinks:
-						outlinks.append(href)
 			else:
 				content = None
-				outlinks = []
-				media = []
+
+			outlinks = []
+			for link in post.find_all('a'):
+				if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')):
+					# Author links at the top (avatar and name)
+					continue
+				if link['href'] == rawUrl or link['href'] == url:
+					style = link.attrs.get('style', '')
+					# Generic filter of links to the post itself, catches videos, photos, and the date link
+					if style != '':
+						imageUrls = re.findall('url\(\'(.*?)\'\)', style)
+						if len(imageUrls) == 1:
+							media.append(Photo(url = imageUrls[0]))
+						continue
+				if _SINGLE_MEDIA_LINK_PATTERN.match(link['href']):
+					style = link.attrs.get('style', '')
+					imageUrls = re.findall('url\(\'(.*?)\'\)', style)
+					if len(imageUrls) == 1:
+						media.append(Photo(url = imageUrls[0]))
+						# resp = self._get(image[0])
+						# encoded_string = base64.b64encode(resp.content)
+					# Individual photo or video link
+					continue
+				href = urllib.parse.urljoin(pageUrl, link['href'])
+				if (href not in outlinks) and (href != rawUrl):
+					outlinks.append(href)
+
+			for voice_player in post.find_all('a', {'class': 'tgme_widget_message_voice_player'}):
+				audioUrl = voice_player.find('audio')['src']
+				durationStr = voice_player.find('time').text.split(':')
+				duration = durationStrToSeconds(durationStr)
+				barHeights = [float(s['style'].split(':')[-1].strip(';%')) for s in voice_player.find('div', {'class': 'bar'}).find_all('s')]
+
+				media.append(VoiceMessage(url = audioUrl, duration = duration, bars = barHeights))
+
+			for video_player in post.find_all('a', {'class': 'tgme_widget_message_video_player'}):
+				iTag = video_player.find('i')
+				if iTag is None:
+					videoUrl = None 
+					videoThumbnailUrl = None
+				else:
+					style = iTag['style']
+					videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style)[0]
+					videoTag = video_player.find('video')
+					if videoTag is None:
+						videoUrl = None
+					else:
+						videoUrl = videoTag['src']
+				mKwargs = {
+					'thumbnailUrl': videoThumbnailUrl,
+					'url': videoUrl,
+				}
+				timeTag = video_player.find('time')
+				if timeTag is None:
+					cls = Gif
+				else:
+					cls = Video
+					durationStr = video_player.find('time').text.split(':')
+					mKwargs['duration'] = durationStrToSeconds(durationStr)
+				media.append(cls(**mKwargs))
+
 			linkPreview = None
 			if (linkPreviewA := post.find('a', class_ = 'tgme_widget_message_link_preview')):
 				kwargs = {}
@@ -197,6 +208,9 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 					else:
 						_logger.warning(f'Could not process link preview image on {url}')
 				linkPreview = LinkPreview(**kwargs)
+				if kwargs['href'] in outlinks:
+					outlinks.remove(kwargs['href'])
+
 			viewsSpan = post.find('span', class_ = 'tgme_widget_message_views')
 			if viewsSpan is None:
 				views = None
@@ -220,7 +234,6 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 				else:
 					break
 			nextPageUrl = urllib.parse.urljoin(r.url, pageLink['href'])
-			print(f'nextPageUrl: {nextPageUrl}')
 			r = self._get(nextPageUrl, headers = self._headers, responseOkCallback = telegramResponseOkCallback)
 			if r.status_code != 200:
 				raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
@@ -289,6 +302,9 @@ def parse_num(s):
 	else:
 		return int(s), 1
 
+def durationStrToSeconds(durationStr):
+	return sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationStr))])
+
 def telegramResponseOkCallback(r):
 	if r.status_code == 200:
 		return (True, None)

From 97d38e5cde20219dfff814f0b84d85d416fef86c Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Thu, 21 Apr 2022 09:41:53 -0500
Subject: [PATCH 27/32] added additional termination criteria to Telegram
 scraper

---
 snscrape/modules/telegram.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index c3d055d..89245f4 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -224,10 +224,20 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 		if '/s/' not in r.url:
 			_logger.warning('No public post list for this user')
 			return
+		nextPageUrl = ''
 		while True:
 			yield from self._soup_to_items(soup, r.url)
+			try:
+				if soup.find('a', attrs = {'class': 'tgme_widget_message_date'}, href = True)['href'].split('/')[-1] == '1':
+					# if message 1 is the first message in the page, terminate scraping
+					break
+			except:
+				pass
 			pageLink = soup.find('a', attrs = {'class': 'tme_messages_more', 'data-before': True})
 			if not pageLink:
+				# some pages are missing a "tme_messages_more" tag, causing early termination
+				if '=' not in nextPageUrl:
+					nextPageUrl =  soup.find('link', attrs = {'rel': 'canonical'}, href = True)['href']
 				nextPostIndex = int(nextPageUrl.split('=')[-1]) - 20
 				if nextPostIndex > 20:
 					pageLink = {'href': nextPageUrl.split('=')[0] + f'={nextPostIndex}'}

From 9b3faec9803cebfc5606f7c36eb74ebe8f2e6973 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Thu, 21 Apr 2022 18:06:43 -0500
Subject: [PATCH 28/32] added additional attributes for hashtags and user
 mentions, removed redundant outlinks

---
 snscrape/modules/telegram.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 89245f4..bed72cf 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -50,7 +50,9 @@ class TelegramPost(snscrape.base.Item):
 	url: str
 	date: datetime.datetime
 	content: str
-	outlinks: list
+	outlinks: typing.List[str] = None
+	mentions: typing.List[str] = None
+	hashtags: typing.List[str] = None
 	forwarded: typing.Optional['Channel'] = None
 	forwardedUrl: typing.Optional[str] = None
 	media: typing.Optional[typing.List['Medium']] = None
@@ -133,6 +135,8 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 				content = None
 
 			outlinks = []
+			mentions = []
+			hashtags = []
 			for link in post.find_all('a'):
 				if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')):
 					# Author links at the top (avatar and name)
@@ -154,8 +158,14 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 						# encoded_string = base64.b64encode(resp.content)
 					# Individual photo or video link
 					continue
+				if link.text.startswith('@'):
+					mentions.append(link.text.strip('@'))
+					continue
+				if link.text.startswith('#'):
+					hashtags.append(link.text.strip('#'))
+					continue
 				href = urllib.parse.urljoin(pageUrl, link['href'])
-				if (href not in outlinks) and (href != rawUrl):
+				if (href not in outlinks) and (href != rawUrl) and (href != forwardedUrl):
 					outlinks.append(href)
 
 			for voice_player in post.find_all('a', {'class': 'tgme_widget_message_voice_player'}):
@@ -217,7 +227,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			else:
 				views = parse_num(viewsSpan.text)
 			
-			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview, media = media, forwarded = forwarded, forwardedUrl = forwardedUrl, views = views)
+			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, mentions = mentions, hashtags = hashtags, linkPreview = linkPreview, media = media, forwarded = forwarded, forwardedUrl = forwardedUrl, views = views)
 
 	def get_items(self):
 		r, soup = self._initial_page()

From 21f7b620ec4d89102912700be6fa41a8001a8692 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Thu, 21 Apr 2022 18:26:31 -0500
Subject: [PATCH 29/32] moved forward finding out of tgme_widget_message_text
 clause, since it wasn't correctly getting the forwarding information in
 forwarded posts that contained attachments but no text

---
 snscrape/modules/telegram.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index bed72cf..9cd7573 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -123,14 +123,13 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			forwarded = None
 			forwardedUrl = None
 
+			if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
+				forwardedUrl = forward_tag['href']
+				forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0]
+				forwarded = Channel(username = forwardedName)
+
 			if (message := post.find('div', class_ = 'tgme_widget_message_text')):
 				content = message.get_text(separator="\n")
-
-				if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
-					forwardedUrl = forward_tag['href']
-					forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0]
-					forwarded = Channel(username = forwardedName)
-
 			else:
 				content = None
 

From 5648e957d0d1f759f68a2dfe21a137bf7c0ca608 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Wed, 27 Apr 2022 16:41:24 -0500
Subject: [PATCH 30/32] improved consistency of code formatting and added
 _STYLE_MEDIA_URL_PATTERN as variable

---
 snscrape/modules/telegram.py | 64 +++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 33 deletions(-)

diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py
index 9cd7573..ab44561 100644
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -13,7 +13,7 @@ import base64
 
 _logger = logging.getLogger(__name__)
 _SINGLE_MEDIA_LINK_PATTERN = re.compile(r'^https://t\.me/[^/]+/\d+\?single$')
-
+_STYLE_MEDIA_URL_PATTERN = re.compile(r'url\(\'(.*?)\'\)')
 
 @dataclasses.dataclass
 class LinkPreview:
@@ -45,6 +45,7 @@ class Channel(snscrape.base.Entity):
 	def __str__(self):
 		return f'https://t.me/s/{self.username}'
 
+
 @dataclasses.dataclass
 class TelegramPost(snscrape.base.Item):
 	url: str
@@ -64,30 +65,36 @@ class TelegramPost(snscrape.base.Item):
 	def __str__(self):
 		return self.url
 
+
 class Medium:
 	pass
 
+
 @dataclasses.dataclass
 class Photo(Medium):
 	url: str
 
+
 @dataclasses.dataclass
 class Video(Medium):
 	thumbnailUrl: str
 	duration: float
 	url: typing.Optional[str] = None
 
+
 @dataclasses.dataclass
 class VoiceMessage(Medium):
 	url: str
 	duration: str
 	bars:typing.List[float]
 
+
 @dataclasses.dataclass
 class Gif(Medium):
 	thumbnailUrl: str
 	url: typing.Optional[str] = None
 
+
 class TelegramChannelScraper(snscrape.base.Scraper):
 	name = 'telegram-channel'
 
@@ -120,11 +127,13 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			date = datetime.datetime.strptime(dateDiv.find('time', datetime = True)['datetime'].replace('-', '', 2).replace(':', ''), '%Y%m%dT%H%M%S%z')
 			media = []
 			outlinks = []
+			mentions = []
+			hashtags = []
 			forwarded = None
 			forwardedUrl = None
 
-			if (forward_tag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
-				forwardedUrl = forward_tag['href']
+			if (forwardTag := post.find('a', class_ = 'tgme_widget_message_forwarded_from_name')):
+				forwardedUrl = forwardTag['href']
 				forwardedName = forwardedUrl.split('t.me/')[1].split('/')[0]
 				forwarded = Channel(username = forwardedName)
 
@@ -133,9 +142,6 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			else:
 				content = None
 
-			outlinks = []
-			mentions = []
-			hashtags = []
 			for link in post.find_all('a'):
 				if any(x in link.parent.attrs.get('class', []) for x in ('tgme_widget_message_user', 'tgme_widget_message_author')):
 					# Author links at the top (avatar and name)
@@ -144,13 +150,13 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 					style = link.attrs.get('style', '')
 					# Generic filter of links to the post itself, catches videos, photos, and the date link
 					if style != '':
-						imageUrls = re.findall('url\(\'(.*?)\'\)', style)
+						imageUrls = _STYLE_MEDIA_URL_PATTERN.findall(style)
 						if len(imageUrls) == 1:
 							media.append(Photo(url = imageUrls[0]))
 						continue
 				if _SINGLE_MEDIA_LINK_PATTERN.match(link['href']):
 					style = link.attrs.get('style', '')
-					imageUrls = re.findall('url\(\'(.*?)\'\)', style)
+					imageUrls = _STYLE_MEDIA_URL_PATTERN.findall(style)
 					if len(imageUrls) == 1:
 						media.append(Photo(url = imageUrls[0]))
 						# resp = self._get(image[0])
@@ -167,37 +173,34 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 				if (href not in outlinks) and (href != rawUrl) and (href != forwardedUrl):
 					outlinks.append(href)
 
-			for voice_player in post.find_all('a', {'class': 'tgme_widget_message_voice_player'}):
-				audioUrl = voice_player.find('audio')['src']
-				durationStr = voice_player.find('time').text.split(':')
+			for voicePlayer in post.find_all('a', {'class': 'tgme_widget_message_voice_player'}):
+				audioUrl = voicePlayer.find('audio')['src']
+				durationStr = voicePlayer.find('time').text
 				duration = durationStrToSeconds(durationStr)
-				barHeights = [float(s['style'].split(':')[-1].strip(';%')) for s in voice_player.find('div', {'class': 'bar'}).find_all('s')]
+				barHeights = [float(s['style'].split(':')[-1].strip(';%')) for s in voicePlayer.find('div', {'class': 'bar'}).find_all('s')]
 
 				media.append(VoiceMessage(url = audioUrl, duration = duration, bars = barHeights))
 
-			for video_player in post.find_all('a', {'class': 'tgme_widget_message_video_player'}):
-				iTag = video_player.find('i')
+			for videoPlayer in post.find_all('a', {'class': 'tgme_widget_message_video_player'}):
+				iTag = videoPlayer.find('i')
 				if iTag is None:
 					videoUrl = None 
 					videoThumbnailUrl = None
 				else:
 					style = iTag['style']
-					videoThumbnailUrl = re.findall('url\(\'(.*?)\'\)', style)[0]
-					videoTag = video_player.find('video')
-					if videoTag is None:
-						videoUrl = None
-					else:
-						videoUrl = videoTag['src']
+					videoThumbnailUrl = _STYLE_MEDIA_URL_PATTERN.findall(style)[0]
+					videoTag = videoPlayer.find('video')
+					videoUrl = None if videoTag is None else videoTag['src']
 				mKwargs = {
 					'thumbnailUrl': videoThumbnailUrl,
 					'url': videoUrl,
 				}
-				timeTag = video_player.find('time')
+				timeTag = videoPlayer.find('time')
 				if timeTag is None:
 					cls = Gif
 				else:
 					cls = Video
-					durationStr = video_player.find('time').text.split(':')
+					durationStr = videoPlayer.find('time').text
 					mKwargs['duration'] = durationStrToSeconds(durationStr)
 				media.append(cls(**mKwargs))
 
@@ -221,10 +224,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 					outlinks.remove(kwargs['href'])
 
 			viewsSpan = post.find('span', class_ = 'tgme_widget_message_views')
-			if viewsSpan is None:
-				views = None
-			else:
-				views = parse_num(viewsSpan.text)
+			views = None if viewsSpan is None else parse_num(viewsSpan.text)
 			
 			yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, mentions = mentions, hashtags = hashtags, linkPreview = linkPreview, media = media, forwarded = forwarded, forwardedUrl = forwardedUrl, views = views)
 
@@ -318,16 +318,14 @@ def parse_num(s):
 		return int(float(s[:-1]) * 1e6), 10 ** (6 if '.' not in s else 6 - len(s[:-1].split('.')[1]))
 	elif s.endswith('K'):
 		return int(float(s[:-1]) * 1000), 10 ** (3 if '.' not in s else 3 - len(s[:-1].split('.')[1]))
-	else:
-		return int(s), 1
+	return int(s), 1
 
 def durationStrToSeconds(durationStr):
-	return sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationStr))])
+	durationList = durationStr.split(':')
+	return sum([int(s) * int(g) for s, g in zip([1, 60, 360], reversed(durationList))])
 
 def telegramResponseOkCallback(r):
 	if r.status_code == 200:
 		return (True, None)
-	elif r.status_code // 100 == 5:
-		return (False, f'status code: {r.status_code}')
-	else:
-		return (False, None)
\ No newline at end of file
+	return (False, f'{r.status_code=}')
+	
\ No newline at end of file

From e2d922301e114c0fe9956715bc304cbed68d2610 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Mon, 9 May 2022 09:37:36 -0500
Subject: [PATCH 31/32] forgot to save modified twitter.py module

---
 snscrape/modules/twitter.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 9a845b0..048df34 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -874,18 +874,6 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 			kwargs['cashtags'] = [o['text'] for o in tweet['entities']['symbols']]
 		if card:
 			kwargs['card'] = card
-<<<<<<< HEAD
-			# Try to convert the URL to the non-shortened/t.co one
-			try:
-				i = kwargs['tcooutlinks'].index(card.url)
-			except ValueError:
-				_logger.warning('Could not find card URL in tcooutlinks')
-			except KeyError:
-				# retweets are missing this attribute
-				pass
-			else:
-				card.url = kwargs['outlinks'][i]
-=======
 			if hasattr(card, 'url') and '//t.co/' in card.url:
 				# Try to convert the URL to the non-shortened/t.co one
 				# Retweets inherit the card but not the outlinks; try to get them from the retweeted tweet instead in that case.
@@ -895,7 +883,6 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
 					card.url = retweetedTweet.outlinks[retweetedTweet.tcooutlinks.index(card.url)]
 				else:
 					_logger.warning(f'Could not translate t.co card URL on tweet {tweetId}')
->>>>>>> ed3ea944d177157d688786470c8369198b0ce8ce
 		return Tweet(**kwargs)
 
 	def _make_medium(self, medium, tweetId):

From e3bdc02a7c29561be19c28d71b688b275e46b966 Mon Sep 17 00:00:00 2001
From: Geranium <geranium@kf.lol>
Date: Mon, 23 May 2022 22:43:33 +0100
Subject: [PATCH 32/32] Reddit: deprecate 'created' property for 'date'

This fixes a crash when using --since with the Reddit scraper,
as the CLI code expects items to have a date property.
---
 snscrape/modules/reddit.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/snscrape/modules/reddit.py b/snscrape/modules/reddit.py
index 32b9d6a..55af939 100644
--- a/snscrape/modules/reddit.py
+++ b/snscrape/modules/reddit.py
@@ -20,7 +20,7 @@ _logger = logging.getLogger(__name__)
 @dataclasses.dataclass
 class Submission(snscrape.base.Item):
 	author: typing.Optional[str] # E.g. submission hf7k6
-	created: datetime.datetime
+	date: datetime.datetime
 	id: str
 	link: typing.Optional[str]
 	selftext: typing.Optional[str]
@@ -28,6 +28,8 @@ class Submission(snscrape.base.Item):
 	title: str
 	url: str
 
+	created = snscrape.base._DeprecatedProperty('created', lambda self: self.date, 'date')
+
 	def __str__(self):
 		return self.url
 
@@ -36,12 +38,14 @@ class Submission(snscrape.base.Item):
 class Comment(snscrape.base.Item):
 	author: typing.Optional[str]
 	body: str
-	created: datetime.datetime
+	date: datetime.datetime
 	id: str
 	parentId: typing.Optional[str]
 	subreddit: typing.Optional[str]
 	url: str
 
+	created = snscrape.base._DeprecatedProperty('created', lambda self: self.date, 'date')
+
 	def __str__(self):
 		return self.url
 
@@ -111,7 +115,7 @@ class _RedditPushshiftScraper(snscrape.base.Scraper):
 
 		kwargs = {
 			'author': d.get('author'),
-			'created': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc),
+			'date': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc),
 			'url': f'https://old.reddit.com{permalink}',
 			'subreddit': d.get('subreddit'),
 		}
@@ -192,7 +196,7 @@ class _RedditPushshiftSearchScraper(_RedditPushshiftScraper):
 
 		while True:
 			# Return newer first; if both have the same creation datetime, return the comment first
-			if tipSubmission.created > tipComment.created:
+			if tipSubmission.date > tipComment.date:
 				yield tipSubmission
 				try:
 					tipSubmission = next(submissionsIter)