From eebdfc1c55d49ed60bf1438fa566fcc32fb495e7 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Wed, 12 Jan 2022 22:36:26 +0000 Subject: [PATCH] Refactor username vs ID mess Closes #354 --- snscrape/modules/twitter.py | 32 ++++++++++++++-------------- snscrape/modules/weibo.py | 42 ++++++++++++++++--------------------- 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 699fd2b..0ee2301 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -694,13 +694,13 @@ class TwitterSearchScraper(_TwitterAPIScraper): class TwitterUserScraper(TwitterSearchScraper): name = 'twitter-user' - def __init__(self, username, isUserId = False, **kwargs): - if not self.is_valid_username(username): + def __init__(self, user, **kwargs): + self._isUserId = isinstance(user, int) + if not self._isUserId and not self.is_valid_username(user): raise ValueError('Invalid username') - super().__init__(f'from:{username}', **kwargs) - self._username = username - self._isUserId = isUserId - self._baseUrl = f'https://twitter.com/{self._username}' if not self._isUserId else f'https://twitter.com/i/user/{self._username}' + super().__init__(f'from:{user}', **kwargs) + self._user = user + self._baseUrl = f'https://twitter.com/{self._user}' if not self._isUserId else f'https://twitter.com/i/user/{self._user}' def _get_entity(self): self._ensure_guest_token() @@ -710,7 +710,7 @@ class TwitterUserScraper(TwitterSearchScraper): else: fieldName = 'userId' endpoint = 'https://twitter.com/i/api/graphql/WN6Hck-Pwm-YP0uxVj1oMQ/UserByRestIdWithoutResults' - params = {'variables': json.dumps({fieldName: self._username, 'withHighlightedLabel': True}, separators = (',', ':'))} + params = {'variables': json.dumps({fieldName: str(self._user), 'withHighlightedLabel': True}, separators = (',', ':'))} obj = self._get_api_data(endpoint, params = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)) if not obj['data']: return None @@ -747,28 +747,28 @@ class TwitterUserScraper(TwitterSearchScraper): def get_items(self): if self._isUserId: # Resolve user ID to username - self._username = self.entity.username + self._user = self.entity.username self._isUserId = False - self._query = f'from:{self._username}' + self._query = f'from:{self._user}' yield from super().get_items() @staticmethod def is_valid_username(s): - return (1 <= len(s) <= 15 and s.strip(string.ascii_letters + string.digits + '_') == '') or (s and s.strip(string.digits) == '') + return 1 <= len(s) <= 15 and s.strip(string.ascii_letters + string.digits + '_') == '' @classmethod def _cli_setup_parser(cls, subparser): - def username(s): - if cls.is_valid_username(s): + def user(s): + if cls.is_valid_username(s) or s.isdigit(): return s - raise ValueError('Invalid username') + raise ValueError('Invalid username or ID') subparser.add_argument('--user-id', dest = 'isUserId', action = 'store_true', default = False, help = 'Use user ID instead of username') - subparser.add_argument('username', type = username, help = 'A Twitter username (without @)') + subparser.add_argument('user', type = user, help = 'A Twitter username (without @)') @classmethod def _cli_from_args(cls, args): - return cls._cli_construct(args, args.username, args.isUserId) + return cls._cli_construct(args, user = int(args.user) if args.isUserId else args.user) class TwitterProfileScraper(TwitterUserScraper): @@ -778,7 +778,7 @@ class TwitterProfileScraper(TwitterUserScraper): if not self._isUserId: userId = self.entity.id else: - userId = self._username + userId = self._user paginationParams = { 'include_profile_interstitial_type': '1', 'include_blocking': '1', diff --git a/snscrape/modules/weibo.py b/snscrape/modules/weibo.py index 4cf90d3..38318bd 100644 --- a/snscrape/modules/weibo.py +++ b/snscrape/modules/weibo.py @@ -52,24 +52,23 @@ class User(snscrape.base.Entity): class WeiboUserScraper(snscrape.base.Scraper): name = 'weibo-user' - def __init__(self, name, uid, **kwargs): + def __init__(self, user, **kwargs): super().__init__(**kwargs) - self._name = name - self._uid = uid - if self._name is None and self._uid is None: - raise ValueError('name or uid must not be None') + self._user = user + self._isUserId = isinstance(user, int) self._headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'} - def _ensure_uid(self): - if self._uid is not None: + def _ensure_user_id(self): + if self._isUserId: return - r = self._get(f'https://m.weibo.cn/n/{self._name}', headers = self._headers, allowRedirects = False) + r = self._get(f'https://m.weibo.cn/n/{self._user}', headers = self._headers, allowRedirects = False) if r.status_code == 302 and r.headers['Location'].startswith('/u/') and len(r.headers['Location']) == 13 and r.headers['Location'][3:].strip('0123456789') == '': # Redirect to uid URL - self._uid = int(r.headers['Location'][3:]) + self._user = int(r.headers['Location'][3:]) + self._isUserId = True elif r.status_code == 200 and '

用户不存在

' in r.text: _logger.warning('User does not exist') - self._uid = _userDoesNotExist + self._user = _userDoesNotExist else: raise snscrape.base.ScraperError(f'Got unexpected response on resolving username ({r.status_code})') @@ -99,13 +98,13 @@ class WeiboUserScraper(snscrape.base.Scraper): ) def get_items(self): - self._ensure_uid() - if self._uid is _userDoesNotExist: + self._ensure_user_id() + if self._user is _userDoesNotExist: return sinceId = None while True: sinceParam = f'&since_id={sinceId}' if sinceId is not None else '' - r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._uid}&containerid=107603{self._uid}&count=25{sinceParam}', headers = self._headers, responseOkCallback = self._check_timeline_response) + r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}&containerid=107603{self._user}&count=25{sinceParam}', headers = self._headers, responseOkCallback = self._check_timeline_response) if r.status_code != 200: raise snscrape.base.ScraperException(f'Got status code {r.status_code}') o = r.json() @@ -133,10 +132,10 @@ class WeiboUserScraper(snscrape.base.Scraper): ) def _get_entity(self): - self._ensure_uid() - if self._uid is _userDoesNotExist: + self._ensure_user_id() + if self._user is _userDoesNotExist: return - r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._uid}', headers = self._headers) + r = self._get(f'https://m.weibo.cn/api/container/getIndex?type=uid&value={self._user}', headers = self._headers) if r.status_code != 200: raise snscrape.base.ScraperException('Could not fetch user info') o = r.json() @@ -144,14 +143,9 @@ class WeiboUserScraper(snscrape.base.Scraper): @classmethod def _cli_setup_parser(cls, subparser): - subparser.add_argument('user', type = snscrape.base.nonempty_string('user'), help = 'A user name or ID') + subparser.add_argument('--name', dest = 'isName', action = 'store_true', help = 'Use username instead of user ID') + subparser.add_argument('user', type = snscrape.base.nonempty_string('user'), help = 'A user ID') @classmethod def _cli_from_args(cls, args): - if len(args.user) == 10 and args.user.strip('0123456789') == '': - uid = args.user - name = None - else: - uid = None - name = args.user - return cls._cli_construct(args, name = name, uid = uid) + return cls._cli_construct(args, user = args.user if args.isName else int(args.user))