Use more assignment expressions where appropriate

2026-06-08 02:28:29 +03:00 · 2020-10-01 21:41:44 +00:00
parent 8b68f1a8af
commit a70b361176
5 changed files with 21 additions and 41 deletions
--- a/snscrape/cli.py
+++ b/snscrape/cli.py
@@ -254,13 +254,11 @@ def main():

 	i = 0
 	with _dump_locals_on_exception():
-		if args.withEntity:
-			entity = scraper.entity
-			if entity:
-				if args.jsonl:
-					print(json.dumps(namedtuple_to_dict_recursive(entity), default = json_serialise_datetime))
-				else:
-					print(entity)
+		if args.withEntity and (entity := scraper.entity):
+			if args.jsonl:
+				print(json.dumps(namedtuple_to_dict_recursive(entity), default = json_serialise_datetime))
+			else:
+				print(entity)
 		if args.maxResults == 0:
 			logger.info('Exiting after 0 results')
 			return
--- a/snscrape/modules/facebook.py
+++ b/snscrape/modules/facebook.py
@@ -129,8 +129,7 @@ class FacebookCommonScraper(snscrape.base.Scraper):
 			dirtyUrl = urllib.parse.urljoin(baseUrl, href)
 			cleanUrl = self._clean_url(dirtyUrl)
 			date = datetime.datetime.fromtimestamp(int(entry.find('abbr', class_ = '_5ptz')['data-utime']), datetime.timezone.utc)
-			contentDiv = entry.find('div', class_ = '_5pbx')
-			if contentDiv:
+			if (contentDiv := entry.find('div', class_ = '_5pbx')):
 				content = contentDiv.text
 			else:
 				content = None
@@ -180,9 +179,8 @@ class FacebookUserAndCommunityScraper(FacebookCommonScraper):
 			logger.warning('User does not exist')
 			return
 		yield from self._soup_to_items(soup, self._baseUrl, 'user')
-		nextPageLink = soup.find('a', ajaxify = nextPageLinkPattern)

-		while nextPageLink:
+		while (nextPageLink := soup.find('a', ajaxify = nextPageLinkPattern)):
 			logger.info('Retrieving next page')

 			# The web app sends a bunch of additional parameters. Most of them would be easy to add, but there's also __dyn, which is a compressed list of the "modules" loaded in the browser.
@@ -200,7 +198,6 @@ class FacebookUserAndCommunityScraper(FacebookCommonScraper):
 			assert '__html' in response['domops'][0][3]
 			soup = bs4.BeautifulSoup(response['domops'][0][3]['__html'], 'lxml')
 			yield from self._soup_to_items(soup, self._baseUrl, 'user')
-			nextPageLink = soup.find('a', ajaxify = nextPageLinkPattern)

 	@classmethod
 	def setup_parser(cls, subparser):
@@ -337,8 +334,7 @@ class FacebookGroupScraper(FacebookCommonScraper):
 			yield from self._soup_to_items(codeSoup, baseUrl, 'group')

 		# Pagination
-		data = pageletDataPattern.search(r.text).group(0)[pageletDataPrefixLength:]
-		while True:
+		while (data := pageletDataPattern.search(r.text).group(0)[pageletDataPrefixLength:]):
 			# As on the user profile pages, the web app sends a lot of additional parameters, but those all seem to be unnecessary (although some change the response format, e.g. from JSON to HTML)
 			r = self._get(
 				f'https://www.facebook.com/ajax/pagelet/generic.php/GroupEntstreamPagelet',
@@ -353,7 +349,6 @@ class FacebookGroupScraper(FacebookCommonScraper):
 				break
 			soup = bs4.BeautifulSoup(obj['payload'], 'lxml')
 			yield from self._soup_to_items(soup, baseUrl, 'group')
-			data = pageletDataPattern.search(r.text).group(0)[pageletDataPrefixLength:]

 	@classmethod
 	def setup_parser(cls, subparser):
--- a/snscrape/modules/telegram.py
+++ b/snscrape/modules/telegram.py
@@ -154,8 +154,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
 			# If there are no posts, fall back to the channel info div, although that should never happen due to the 'Channel created' entry.
 			logger.warning('Could not find a post; extracting username from channel info div, which may not be capitalised correctly')
 			kwargs['username'] = channelInfoDiv.find('div', class_ = 'tgme_channel_info_header_username').text[1:] # Remove @
-		descriptionDiv = channelInfoDiv.find('div', class_ = 'tgme_channel_info_description')
-		if descriptionDiv:
+		if (descriptionDiv := channelInfoDiv.find('div', class_ = 'tgme_channel_info_description')):
 			kwargs['description'] = descriptionDiv.text

 		def parse_num(s):
--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -121,19 +121,17 @@ class TwitterOldDesignScraper(snscrape.base.Scraper):
 			url = f'https://twitter.com/{username}/status/{tweetID}'

 			date = None
-			timestampA = tweet.find('a', 'tweet-timestamp')
-			if timestampA:
+			if (timestampA := tweet.find('a', 'tweet-timestamp')):
 				timestampSpan = timestampA.find('span', '_timestamp')
 				if timestampSpan and timestampSpan.has_attr('data-time'):
 					date = datetime.datetime.fromtimestamp(int(timestampSpan['data-time']), datetime.timezone.utc)
 			if not date:
 				logger.warning(f'Failed to extract date for {url}')

-			contentP = tweet.find('p', 'tweet-text')
 			content = None
 			outlinks = []
 			tcooutlinks = []
-			if contentP:
+			if (contentP := tweet.find('p', 'tweet-text')):
 				content = contentP.text
 				for a in contentP.find_all('a'):
 					if a.has_attr('href') and not a['href'].startswith('/') and (not a.has_attr('class') or 'u-hidden' not in a['class']):
@@ -144,8 +142,7 @@ class TwitterOldDesignScraper(snscrape.base.Scraper):
 						tcooutlinks.append(a['href'])
 			else:
 				logger.warning(f'Failed to extract content for {url}')
-			card = tweet.find('div', 'card2')
-			if card and 'has-autoplayable-media' not in card['class']:
+			if (card := tweet.find('div', 'card2')) and 'has-autoplayable-media' not in card['class']:
 				for div in card.find_all('div'):
 					if div.has_attr('data-card-url'):
 						outlinks.append(div['data-card-url'])
@@ -177,8 +174,7 @@ class TwitterAPIScraper(snscrape.base.Scraper):
 			return
 		logger.info('Retrieving guest token')
 		r = self._get(self._baseUrl if url is None else url, headers = {'User-Agent': self._userAgent})
-		match = re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)
-		if match:
+		if (match := re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)):
 			logger.debug('Found guest token in HTML')
 			self._guestToken = match.group(1)
 		if 'gt' in r.cookies:
--- a/snscrape/modules/vkontakte.py
+++ b/snscrape/modules/vkontakte.py
@@ -87,8 +87,7 @@ class VKontakteUserScraper(snscrape.base.Scraper):
 			logger.warning('Private profile')
 			return

-		profileDeleted = soup.find('h5', class_ = 'profile_deleted_text')
-		if profileDeleted:
+		if (profileDeleted := soup.find('h5', class_ = 'profile_deleted_text')):
 			# Unclear what this state represents, so just log website text.
 			logger.warning(profileDeleted.text)
 			return
@@ -166,12 +165,10 @@ class VKontakteUserScraper(snscrape.base.Scraper):
 		kwargs['name'] = nameH1.text
 		kwargs['verified'] = bool(nameH1.find('div', class_ = 'page_verified'))

-		descriptionDiv = soup.find('div', id = 'page_current_info')
-		if descriptionDiv:
+		if (descriptionDiv := soup.find('div', id = 'page_current_info')):
 			kwargs['description'] = descriptionDiv.text

-		infoDiv = soup.find('div', id = 'page_info_wrap')
-		if infoDiv:
+		if (infoDiv := soup.find('div', id = 'page_info_wrap')):
 			websites = []
 			for rowDiv in infoDiv.find_all('div', class_ = ['profile_info_row', 'group_info_row']):
 				if 'profile_info_row' in rowDiv['class']:
@@ -197,8 +194,7 @@ class VKontakteUserScraper(snscrape.base.Scraper):
 			else:
 				return int(s.replace(',', '')), 1

-		countsDiv = soup.find('div', class_ = 'counts_module')
-		if countsDiv:
+		if (countsDiv := soup.find('div', class_ = 'counts_module')):
 			for a in countsDiv.find_all('a', class_ = 'page_counter'):
 				count, granularity = parse_num(a.find('div', class_ = 'count').text)
 				label = a.find('div', class_ = 'label').text
@@ -207,17 +203,13 @@ class VKontakteUserScraper(snscrape.base.Scraper):
 				if label in ('followers', 'posts', 'photos', 'tags'):
 					kwargs[label], kwargs[f'{label}Granularity'] = count, granularity

-		idolsDiv = soup.find('div', id = 'profile_idols')
-		if idolsDiv:
-			topDiv = idolsDiv.find('div', class_ = 'header_top')
-			if topDiv and topDiv.find('span', class_ = 'header_label').text == 'Following':
+		if (idolsDiv := soup.find('div', id = 'profile_idols')):
+			if (topDiv := idolsDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Following':
 				kwargs['following'], kwargs['followingGranularity'] = parse_num(topDiv.find('span', class_ = 'header_count').text)

 		# On public pages, this is where followers are listed
-		followersDiv = soup.find('div', id = 'public_followers')
-		if followersDiv:
-			topDiv = followersDiv.find('div', class_ = 'header_top')
-			if topDiv and topDiv.find('span', class_ = 'header_label').text == 'Followers':
+		if (followersDiv := soup.find('div', id = 'public_followers')):
+			if (topDiv := followersDiv.find('div', class_ = 'header_top')) and topDiv.find('span', class_ = 'header_label').text == 'Followers':
 				kwargs['followers'], kwargs['followersGranularity'] = parse_num(topDiv.find('span', class_ = 'header_count').text)

 		return User(**kwargs)