Refactor username vs ID mess

Closes #354
This commit is contained in:
JustAnotherArchivist
2022-01-12 22:36:26 +00:00
parent e6076353c8
commit eebdfc1c55
2 changed files with 34 additions and 40 deletions

View File

@@ -694,13 +694,13 @@ class TwitterSearchScraper(_TwitterAPIScraper):
class TwitterUserScraper(TwitterSearchScraper):
name = 'twitter-user'
def __init__(self, username, isUserId = False, **kwargs):
if not self.is_valid_username(username):
def __init__(self, user, **kwargs):
self._isUserId = isinstance(user, int)
if not self._isUserId and not self.is_valid_username(user):
raise ValueError('Invalid username')
super().__init__(f'from:{username}', **kwargs)
self._username = username
self._isUserId = isUserId
self._baseUrl = f'https://twitter.com/{self._username}' if not self._isUserId else f'https://twitter.com/i/user/{self._username}'
super().__init__(f'from:{user}', **kwargs)
self._user = user
self._baseUrl = f'https://twitter.com/{self._user}' if not self._isUserId else f'https://twitter.com/i/user/{self._user}'
def _get_entity(self):
self._ensure_guest_token()
@@ -710,7 +710,7 @@ class TwitterUserScraper(TwitterSearchScraper):
else:
fieldName = 'userId'
endpoint = 'https://twitter.com/i/api/graphql/WN6Hck-Pwm-YP0uxVj1oMQ/UserByRestIdWithoutResults'
params = {'variables': json.dumps({fieldName: self._username, 'withHighlightedLabel': True}, separators = (',', ':'))}
params = {'variables': json.dumps({fieldName: str(self._user), 'withHighlightedLabel': True}, separators = (',', ':'))}
obj = self._get_api_data(endpoint, params = urllib.parse.urlencode(params, quote_via=urllib.parse.quote))
if not obj['data']:
return None
@@ -747,28 +747,28 @@ class TwitterUserScraper(TwitterSearchScraper):
def get_items(self):
if self._isUserId:
# Resolve user ID to username
self._username = self.entity.username
self._user = self.entity.username
self._isUserId = False
self._query = f'from:{self._username}'
self._query = f'from:{self._user}'
yield from super().get_items()
@staticmethod
def is_valid_username(s):
return (1 <= len(s) <= 15 and s.strip(string.ascii_letters + string.digits + '_') == '') or (s and s.strip(string.digits) == '')
return 1 <= len(s) <= 15 and s.strip(string.ascii_letters + string.digits + '_') == ''
@classmethod
def _cli_setup_parser(cls, subparser):
def username(s):
if cls.is_valid_username(s):
def user(s):
if cls.is_valid_username(s) or s.isdigit():
return s
raise ValueError('Invalid username')
raise ValueError('Invalid username or ID')
subparser.add_argument('--user-id', dest = 'isUserId', action = 'store_true', default = False, help = 'Use user ID instead of username')
subparser.add_argument('username', type = username, help = 'A Twitter username (without @)')
subparser.add_argument('user', type = user, help = 'A Twitter username (without @)')
@classmethod
def _cli_from_args(cls, args):
return cls._cli_construct(args, args.username, args.isUserId)
return cls._cli_construct(args, user = int(args.user) if args.isUserId else args.user)
class TwitterProfileScraper(TwitterUserScraper):
@@ -778,7 +778,7 @@ class TwitterProfileScraper(TwitterUserScraper):
if not self._isUserId:
userId = self.entity.id
else:
userId = self._username
userId = self._user
paginationParams = {
'include_profile_interstitial_type': '1',
'include_blocking': '1',

View File

@@ -52,24 +52,23 @@ class User(snscrape.base.Entity):
class WeiboUserScraper(snscrape.base.Scraper):
name = 'weibo-user'
def __init__(self, name, uid, **kwargs):
def __init__(self, user, **kwargs):
super().__init__(**kwargs)
self._name = name
self._uid = uid
if self._name is None and self._uid is None:
raise ValueError('name or uid must not be None')
self._user = user
self._isUserId = isinstance(user, int)
self._headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}
def _ensure_uid(self):
if self._uid is not None:
def _ensure_user_id(self):
if self._isUserId:
return
r = self._get(f'https://m.weibo.cn/n/{self._name}', headers = self._headers, allowRedirects = False)
r = self._get(f'https://m.weibo.cn/n/{self._user}', headers = self._headers, allowRedirects = False)
if r.status_code == 302 and r.headers['Location'].startswith('/u/') and len(r.headers['Location']) == 13 and r.headers['Location'][3:].strip('0123456789') == '':
# Redirect to uid URL
self._uid = int(r.headers['Location'][3:])
self._user = int(r.headers['Location'][3:])
self._isUserId = True
elif r.status_code == 200 and '<p class="h5-4con">用户不存在</p>' in r.text:
_logger.warning('User does not exist')
self._uid = _userDoesNotExist
self._user = _userDoesNotExist
else:
raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})')
@@ -99,13 +98,13 @@ class WeiboUserScraper(snscrape.base.Scraper):
)
def get_items(self):
self._ensure_uid()
if self._uid is _userDoesNotExist:
self._ensure_user_id()
if self._user is _userDoesNotExist:
return
sinceId = None
while True:
sinceParam = f'&since_id={sinceId}' if sinceId is not None else ''
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._uid}&containerid=107603{self._uid}&count=25{sinceParam}', headers = self._headers, responseOkCallback = self._check_timeline_response)
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}&containerid=107603{self._user}&count=25{sinceParam}', headers = self._headers, responseOkCallback = self._check_timeline_response)
if r.status_code != 200:
raise snscrape.base.ScraperException(f'Got status code {r.status_code}')
o = r.json()
@@ -133,10 +132,10 @@ class WeiboUserScraper(snscrape.base.Scraper):
)
def _get_entity(self):
self._ensure_uid()
if self._uid is _userDoesNotExist:
self._ensure_user_id()
if self._user is _userDoesNotExist:
return
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._uid}', headers = self._headers)
r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}', headers = self._headers)
if r.status_code != 200:
raise snscrape.base.ScraperException('Could not fetch user info')
o = r.json()
@@ -144,14 +143,9 @@ class WeiboUserScraper(snscrape.base.Scraper):
@classmethod
def _cli_setup_parser(cls, subparser):
subparser.add_argument('user', type = snscrape.base.nonempty_string('user'), help = 'A user name or ID')
subparser.add_argument('--name', dest = 'isName', action = 'store_true', help = 'Use username instead of user ID')
subparser.add_argument('user', type = snscrape.base.nonempty_string('user'), help = 'A user ID')
@classmethod
def _cli_from_args(cls, args):
if len(args.user) == 10 and args.user.strip('0123456789') == '':
uid = args.user
name = None
else:
uid = None
name = args.user
return cls._cli_construct(args, name = name, uid = uid)
return cls._cli_construct(args, user = args.user if args.isName else int(args.user))