mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-08 02:28:29 +03:00
@@ -694,13 +694,13 @@ class TwitterSearchScraper(_TwitterAPIScraper):
|
||||
class TwitterUserScraper(TwitterSearchScraper):
|
||||
name = 'twitter-user'
|
||||
|
||||
def __init__(self, username, isUserId = False, **kwargs):
|
||||
if not self.is_valid_username(username):
|
||||
def __init__(self, user, **kwargs):
|
||||
self._isUserId = isinstance(user, int)
|
||||
if not self._isUserId and not self.is_valid_username(user):
|
||||
raise ValueError('Invalid username')
|
||||
super().__init__(f'from:{username}', **kwargs)
|
||||
self._username = username
|
||||
self._isUserId = isUserId
|
||||
self._baseUrl = f'https://twitter.com/{self._username}' if not self._isUserId else f'https://twitter.com/i/user/{self._username}'
|
||||
super().__init__(f'from:{user}', **kwargs)
|
||||
self._user = user
|
||||
self._baseUrl = f'https://twitter.com/{self._user}' if not self._isUserId else f'https://twitter.com/i/user/{self._user}'
|
||||
|
||||
def _get_entity(self):
|
||||
self._ensure_guest_token()
|
||||
@@ -710,7 +710,7 @@ class TwitterUserScraper(TwitterSearchScraper):
|
||||
else:
|
||||
fieldName = 'userId'
|
||||
endpoint = 'https://twitter.com/i/api/graphql/WN6Hck-Pwm-YP0uxVj1oMQ/UserByRestIdWithoutResults'
|
||||
params = {'variables': json.dumps({fieldName: self._username, 'withHighlightedLabel': True}, separators = (',', ':'))}
|
||||
params = {'variables': json.dumps({fieldName: str(self._user), 'withHighlightedLabel': True}, separators = (',', ':'))}
|
||||
obj = self._get_api_data(endpoint, params = urllib.parse.urlencode(params, quote_via=urllib.parse.quote))
|
||||
if not obj['data']:
|
||||
return None
|
||||
@@ -747,28 +747,28 @@ class TwitterUserScraper(TwitterSearchScraper):
|
||||
def get_items(self):
|
||||
if self._isUserId:
|
||||
# Resolve user ID to username
|
||||
self._username = self.entity.username
|
||||
self._user = self.entity.username
|
||||
self._isUserId = False
|
||||
self._query = f'from:{self._username}'
|
||||
self._query = f'from:{self._user}'
|
||||
yield from super().get_items()
|
||||
|
||||
@staticmethod
|
||||
def is_valid_username(s):
|
||||
return (1 <= len(s) <= 15 and s.strip(string.ascii_letters + string.digits + '_') == '') or (s and s.strip(string.digits) == '')
|
||||
return 1 <= len(s) <= 15 and s.strip(string.ascii_letters + string.digits + '_') == ''
|
||||
|
||||
@classmethod
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
def username(s):
|
||||
if cls.is_valid_username(s):
|
||||
def user(s):
|
||||
if cls.is_valid_username(s) or s.isdigit():
|
||||
return s
|
||||
raise ValueError('Invalid username')
|
||||
raise ValueError('Invalid username or ID')
|
||||
|
||||
subparser.add_argument('--user-id', dest = 'isUserId', action = 'store_true', default = False, help = 'Use user ID instead of username')
|
||||
subparser.add_argument('username', type = username, help = 'A Twitter username (without @)')
|
||||
subparser.add_argument('user', type = user, help = 'A Twitter username (without @)')
|
||||
|
||||
@classmethod
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.username, args.isUserId)
|
||||
return cls._cli_construct(args, user = int(args.user) if args.isUserId else args.user)
|
||||
|
||||
|
||||
class TwitterProfileScraper(TwitterUserScraper):
|
||||
@@ -778,7 +778,7 @@ class TwitterProfileScraper(TwitterUserScraper):
|
||||
if not self._isUserId:
|
||||
userId = self.entity.id
|
||||
else:
|
||||
userId = self._username
|
||||
userId = self._user
|
||||
paginationParams = {
|
||||
'include_profile_interstitial_type': '1',
|
||||
'include_blocking': '1',
|
||||
|
||||
@@ -52,24 +52,23 @@ class User(snscrape.base.Entity):
|
||||
class WeiboUserScraper(snscrape.base.Scraper):
|
||||
name = 'weibo-user'
|
||||
|
||||
def __init__(self, name, uid, **kwargs):
|
||||
def __init__(self, user, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._name = name
|
||||
self._uid = uid
|
||||
if self._name is None and self._uid is None:
|
||||
raise ValueError('name or uid must not be None')
|
||||
self._user = user
|
||||
self._isUserId = isinstance(user, int)
|
||||
self._headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}
|
||||
|
||||
def _ensure_uid(self):
|
||||
if self._uid is not None:
|
||||
def _ensure_user_id(self):
|
||||
if self._isUserId:
|
||||
return
|
||||
r = self._get(f'https://m.weibo.cn/n/{self._name}', headers = self._headers, allowRedirects = False)
|
||||
r = self._get(f'https://m.weibo.cn/n/{self._user}', headers = self._headers, allowRedirects = False)
|
||||
if r.status_code == 302 and r.headers['Location'].startswith('/u/') and len(r.headers['Location']) == 13 and r.headers['Location'][3:].strip('0123456789') == '':
|
||||
# Redirect to uid URL
|
||||
self._uid = int(r.headers['Location'][3:])
|
||||
self._user = int(r.headers['Location'][3:])
|
||||
self._isUserId = True
|
||||
elif r.status_code == 200 and '<p class="h5-4con">用户不存在</p>' in r.text:
|
||||
_logger.warning('User does not exist')
|
||||
self._uid = _userDoesNotExist
|
||||
self._user = _userDoesNotExist
|
||||
else:
|
||||
raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})')
|
||||
|
||||
@@ -99,13 +98,13 @@ class WeiboUserScraper(snscrape.base.Scraper):
|
||||
)
|
||||
|
||||
def get_items(self):
|
||||
self._ensure_uid()
|
||||
if self._uid is _userDoesNotExist:
|
||||
self._ensure_user_id()
|
||||
if self._user is _userDoesNotExist:
|
||||
return
|
||||
sinceId = None
|
||||
while True:
|
||||
sinceParam = f'&since_id={sinceId}' if sinceId is not None else ''
|
||||
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._uid}&containerid=107603{self._uid}&count=25{sinceParam}', headers = self._headers, responseOkCallback = self._check_timeline_response)
|
||||
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}&containerid=107603{self._user}&count=25{sinceParam}', headers = self._headers, responseOkCallback = self._check_timeline_response)
|
||||
if r.status_code != 200:
|
||||
raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
|
||||
o = r.json()
|
||||
@@ -133,10 +132,10 @@ class WeiboUserScraper(snscrape.base.Scraper):
|
||||
)
|
||||
|
||||
def _get_entity(self):
|
||||
self._ensure_uid()
|
||||
if self._uid is _userDoesNotExist:
|
||||
self._ensure_user_id()
|
||||
if self._user is _userDoesNotExist:
|
||||
return
|
||||
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._uid}', headers = self._headers)
|
||||
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}', headers = self._headers)
|
||||
if r.status_code != 200:
|
||||
raise snscrape.base.ScraperException('Could not fetch user info')
|
||||
o = r.json()
|
||||
@@ -144,14 +143,9 @@ class WeiboUserScraper(snscrape.base.Scraper):
|
||||
|
||||
@classmethod
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('user', type = snscrape.base.nonempty_string('user'), help = 'A user name or ID')
|
||||
subparser.add_argument('--name', dest = 'isName', action = 'store_true', help = 'Use username instead of user ID')
|
||||
subparser.add_argument('user', type = snscrape.base.nonempty_string('user'), help = 'A user ID')
|
||||
|
||||
@classmethod
|
||||
def _cli_from_args(cls, args):
|
||||
if len(args.user) == 10 and args.user.strip('0123456789') == '':
|
||||
uid = args.user
|
||||
name = None
|
||||
else:
|
||||
uid = None
|
||||
name = args.user
|
||||
return cls._cli_construct(args, name = name, uid = uid)
|
||||
return cls._cli_construct(args, user = args.user if args.isName else int(args.user))
|
||||
|
||||
Reference in New Issue
Block a user