Use more assignment expressions where appropriate

This commit is contained in:
JustAnotherArchivist
2020-10-01 21:41:44 +00:00
parent 8b68f1a8af
commit a70b361176
5 changed files with 21 additions and 41 deletions

View File

@@ -121,19 +121,17 @@ class TwitterOldDesignScraper(snscrape.base.Scraper):
url = f'https://twitter.com/{username}/status/{tweetID}'
date = None
timestampA = tweet.find('a', 'tweet-timestamp')
if timestampA:
if (timestampA := tweet.find('a', 'tweet-timestamp')):
timestampSpan = timestampA.find('span', '_timestamp')
if timestampSpan and timestampSpan.has_attr('data-time'):
date = datetime.datetime.fromtimestamp(int(timestampSpan['data-time']), datetime.timezone.utc)
if not date:
logger.warning(f'Failed to extract date for {url}')
contentP = tweet.find('p', 'tweet-text')
content = None
outlinks = []
tcooutlinks = []
if contentP:
if (contentP := tweet.find('p', 'tweet-text')):
content = contentP.text
for a in contentP.find_all('a'):
if a.has_attr('href') and not a['href'].startswith('/') and (not a.has_attr('class') or 'u-hidden' not in a['class']):
@@ -144,8 +142,7 @@ class TwitterOldDesignScraper(snscrape.base.Scraper):
tcooutlinks.append(a['href'])
else:
logger.warning(f'Failed to extract content for {url}')
card = tweet.find('div', 'card2')
if card and 'has-autoplayable-media' not in card['class']:
if (card := tweet.find('div', 'card2')) and 'has-autoplayable-media' not in card['class']:
for div in card.find_all('div'):
if div.has_attr('data-card-url'):
outlinks.append(div['data-card-url'])
@@ -177,8 +174,7 @@ class TwitterAPIScraper(snscrape.base.Scraper):
return
logger.info('Retrieving guest token')
r = self._get(self._baseUrl if url is None else url, headers = {'User-Agent': self._userAgent})
match = re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)
if match:
if (match := re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)):
logger.debug('Found guest token in HTML')
self._guestToken = match.group(1)
if 'gt' in r.cookies: