mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-11 20:08:29 +03:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7b967ff82a |
@@ -5,6 +5,7 @@ import random
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import snscrape.base
|
import snscrape.base
|
||||||
|
import time
|
||||||
import typing
|
import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
@@ -98,9 +99,14 @@ class TwitterSearchScraper(TwitterCommonScraper):
|
|||||||
def _get_guest_token(self):
|
def _get_guest_token(self):
|
||||||
logger.info(f'Retrieving guest token from search page')
|
logger.info(f'Retrieving guest token from search page')
|
||||||
r = self._get(self._baseUrl, headers = {'User-Agent': self._userAgent})
|
r = self._get(self._baseUrl, headers = {'User-Agent': self._userAgent})
|
||||||
if 'gt' not in r.cookies:
|
match = re.search(r'document\.cookie = decodeURIComponent\("gt=(\d+); Max-Age=10800; Domain=\.twitter\.com; Path=/; Secure"\);', r.text)
|
||||||
raise snscrape.base.ScraperException("Twitter didn't set the cookie")
|
if match:
|
||||||
return r.cookies['gt']
|
logger.debug('Found guest token in HTML')
|
||||||
|
return match.group(1)
|
||||||
|
if 'gt' in r.cookies:
|
||||||
|
logger.debug('Found guest token in cookies')
|
||||||
|
return r.cookies['gt']
|
||||||
|
raise snscrape.base.ScraperException('Unable to find guest token')
|
||||||
|
|
||||||
def _check_scroll_response(self, r):
|
def _check_scroll_response(self, r):
|
||||||
if r.status_code == 429:
|
if r.status_code == 429:
|
||||||
@@ -123,6 +129,7 @@ class TwitterSearchScraper(TwitterCommonScraper):
|
|||||||
while True:
|
while True:
|
||||||
if not guestToken:
|
if not guestToken:
|
||||||
guestToken = self._get_guest_token()
|
guestToken = self._get_guest_token()
|
||||||
|
self._session.cookies.set('gt', guestToken, domain = '.twitter.com', path = '/', secure = True, expires = time.time() + 10800)
|
||||||
headers['x-guest-token'] = guestToken
|
headers['x-guest-token'] = guestToken
|
||||||
|
|
||||||
logger.info(f'Retrieving scroll page {cursor}')
|
logger.info(f'Retrieving scroll page {cursor}')
|
||||||
@@ -162,6 +169,7 @@ class TwitterSearchScraper(TwitterCommonScraper):
|
|||||||
if r.status_code == 429:
|
if r.status_code == 429:
|
||||||
guestToken = None
|
guestToken = None
|
||||||
del self._session.cookies['gt']
|
del self._session.cookies['gt']
|
||||||
|
del headers['x-guest-token']
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
obj = r.json()
|
obj = r.json()
|
||||||
|
|||||||
Reference in New Issue
Block a user