mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-12 20:38:29 +03:00
Use more assignment expressions where appropriate
This commit is contained in:
@@ -121,19 +121,17 @@ class TwitterOldDesignScraper(snscrape.base.Scraper):
|
||||
url = f'https://twitter.com/{username}/status/{tweetID}'
|
||||
|
||||
date = None
|
||||
timestampA = tweet.find('a', 'tweet-timestamp')
|
||||
if timestampA:
|
||||
if (timestampA := tweet.find('a', 'tweet-timestamp')):
|
||||
timestampSpan = timestampA.find('span', '_timestamp')
|
||||
if timestampSpan and timestampSpan.has_attr('data-time'):
|
||||
date = datetime.datetime.fromtimestamp(int(timestampSpan['data-time']), datetime.timezone.utc)
|
||||
if not date:
|
||||
logger.warning(f'Failed to extract date for {url}')
|
||||
|
||||
contentP = tweet.find('p', 'tweet-text')
|
||||
content = None
|
||||
outlinks = []
|
||||
tcooutlinks = []
|
||||
if contentP:
|
||||
if (contentP := tweet.find('p', 'tweet-text')):
|
||||
content = contentP.text
|
||||
for a in contentP.find_all('a'):
|
||||
if a.has_attr('href') and not a['href'].startswith('/') and (not a.has_attr('class') or 'u-hidden' not in a['class']):
|
||||
@@ -144,8 +142,7 @@ class TwitterOldDesignScraper(snscrape.base.Scraper):
|
||||
tcooutlinks.append(a['href'])
|
||||
else:
|
||||
logger.warning(f'Failed to extract content for {url}')
|
||||
card = tweet.find('div', 'card2')
|
||||
if card and 'has-autoplayable-media' not in card['class']:
|
||||
if (card := tweet.find('div', 'card2')) and 'has-autoplayable-media' not in card['class']:
|
||||
for div in card.find_all('div'):
|
||||
if div.has_attr('data-card-url'):
|
||||
outlinks.append(div['data-card-url'])
|
||||
@@ -177,8 +174,7 @@ class TwitterAPIScraper(snscrape.base.Scraper):
|
||||
return
|
||||
logger.info('Retrieving guest token')
|
||||
r = self._get(self._baseUrl if url is None else url, headers = {'User-Agent': self._userAgent})
|
||||
match = re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)
|
||||
if match:
|
||||
if (match := re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)):
|
||||
logger.debug('Found guest token in HTML')
|
||||
self._guestToken = match.group(1)
|
||||
if 'gt' in r.cookies:
|
||||
|
||||
Reference in New Issue
Block a user