From 0cc4f0c0166c3baa8d5bfcbe0648951d315770dc Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Tue, 1 Sep 2020 03:13:49 +0000
Subject: [PATCH] Add support for Twitter profile pages

Closes #5
---
 snscrape/modules/twitter.py | 67 ++++++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 9 deletions(-)

diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py
index 7ee4d34..f062ad5 100644
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -211,7 +211,7 @@ class TwitterAPIScraper(TwitterCommonScraper):
 			else:
 				continue
 			for entry in entries:
-				if entry['entryId'].startswith('sq-I-t-'):
+				if entry['entryId'].startswith('sq-I-t-') or entry['entryId'].startswith('tweet-'):
 					if 'tweet' in entry['content']['item']['content']:
 						if 'promotedMetadata' in entry['content']['item']['content']['tweet']: # Promoted tweet aka ads
 							continue
@@ -220,14 +220,18 @@ class TwitterAPIScraper(TwitterCommonScraper):
 						tweet = obj['globalObjects']['tweets'][entry['content']['item']['content']['tombstone']['tweet']['id']]
 					else:
 						raise snscrape.base.ScraperException(f'Unable to handle entry {entry["entryId"]!r}')
-					tweetID = tweet['id']
-					content = tweet['full_text']
-					username = obj['globalObjects']['users'][tweet['user_id_str']]['screen_name']
-					date = datetime.datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y').replace(tzinfo = datetime.timezone.utc)
-					outlinks = [u['expanded_url'] for u in tweet['entities']['urls']]
-					tcooutlinks = [u['url'] for u in tweet['entities']['urls']]
-					url = f'https://twitter.com/{username}/status/{tweetID}'
-					yield Tweet(url, date, content, tweetID, username, outlinks, ' '.join(outlinks), tcooutlinks, ' '.join(tcooutlinks))
+					yield self._tweet_to_tweet(tweet, obj)
+
+	def _tweet_to_tweet(self, tweet, obj):
+		# Transforms a Twitter API tweet object into a Tweet
+		tweetID = tweet['id'] if 'id' in tweet else int(tweet['id_str'])
+		content = tweet['full_text']
+		username = obj['globalObjects']['users'][tweet['user_id_str']]['screen_name']
+		date = datetime.datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y').replace(tzinfo = datetime.timezone.utc)
+		outlinks = [u['expanded_url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else []
+		tcooutlinks = [u['url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else []
+		url = f'https://twitter.com/{username}/status/{tweetID}'
+		return Tweet(url, date, content, tweetID, username, outlinks, ' '.join(outlinks), tcooutlinks, ' '.join(tcooutlinks))
 
 
 class TwitterSearchScraper(TwitterAPIScraper):
@@ -355,6 +359,51 @@ class TwitterUserScraper(TwitterSearchScraper):
 	def from_args(cls, args):
 		return cls(args.username, retries = args.retries)
 
+
+class TwitterProfileScraper(TwitterUserScraper):
+	name = 'twitter-profile'
+
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+		self._baseUrl = f'https://twitter.com/{self._username}'
+
+	def get_items(self):
+		user = self.entity
+		params = {
+			'include_profile_interstitial_type': '1',
+			'include_blocking': '1',
+			'include_blocked_by': '1',
+			'include_followed_by': '1',
+			'include_want_retweets': '1',
+			'include_mute_edge': '1',
+			'include_can_dm': '1',
+			'include_can_media_tag': '1',
+			'skip_status': '1',
+			'cards_platform': 'Web-12',
+			'include_cards': '1',
+			'include_ext_alt_text': 'true',
+			'include_quote_count': 'true',
+			'include_reply_count': '1',
+			'tweet_mode': 'extended',
+			'include_entities': 'true',
+			'include_user_entities': 'true',
+			'include_ext_media_color': 'true',
+			'include_ext_media_availability': 'true',
+			'send_error_codes': 'true',
+			'simple_quoted_tweets': 'true',
+			'include_tweet_replies': 'true',
+			'userId': user.id,
+			'count': '100',
+		}
+		paginationParams = params.copy()
+		paginationParams['cursor'] = None
+		for d in (params, paginationParams):
+			d['ext'] = 'ext=mediaStats%2ChighlightedLabel'
+
+		for obj in self._iter_api_data(f'https://api.twitter.com/2/timeline/profile/{user.id}.json', params, paginationParams):
+			yield from self._instructions_to_tweets(obj)
+
+
 class TwitterHashtagScraper(TwitterSearchScraper):
 	name = 'twitter-hashtag'