mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-15 05:38:29 +03:00
@@ -246,7 +246,7 @@ def parse_args():
|
||||
classes.extend(cls.__subclasses__())
|
||||
for scraper, cls in sorted(scrapers.items()):
|
||||
subparser = subparsers.add_parser(cls.name, help = '', formatter_class = argparse.ArgumentDefaultsHelpFormatter)
|
||||
cls.cli_setup_parser(subparser)
|
||||
cls._cli_setup_parser(subparser)
|
||||
subparser.set_defaults(cls = cls)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -293,7 +293,7 @@ def main():
|
||||
setup_logging()
|
||||
args = parse_args()
|
||||
configure_logging(args.verbosity, args.dumpLocals)
|
||||
scraper = args.cls.cli_from_args(args)
|
||||
scraper = args.cls._cli_from_args(args)
|
||||
|
||||
i = 0
|
||||
with _dump_locals_on_exception():
|
||||
|
||||
@@ -219,15 +219,15 @@ class Scraper:
|
||||
return self._request('POST', *args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._construct(args)
|
||||
|
||||
@classmethod
|
||||
def cli_construct(cls, argparseArgs, *args, **kwargs):
|
||||
def _cli_construct(cls, argparseArgs, *args, **kwargs):
|
||||
return cls(*args, **kwargs, retries = argparseArgs.retries)
|
||||
|
||||
|
||||
|
||||
@@ -205,12 +205,12 @@ class _FacebookUserAndCommunityScraper(_FacebookCommonScraper):
|
||||
yield from self._soup_to_items(soup, self._baseUrl, 'user')
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('username', type = snscrape.base.nonempty_string('username'), help = 'A Facebook username or user ID')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.username)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.username)
|
||||
|
||||
|
||||
class FacebookUserScraper(_FacebookUserAndCommunityScraper):
|
||||
@@ -356,9 +356,9 @@ class FacebookGroupScraper(_FacebookCommonScraper):
|
||||
yield from self._soup_to_items(soup, baseUrl, 'group')
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('group', type = snscrape.base.nonempty_string('group'), help = 'A group name or ID')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.group)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.group)
|
||||
|
||||
@@ -156,12 +156,12 @@ class InstagramUserScraper(_InstagramCommonScraper):
|
||||
self._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('username', type = snscrape.base.nonempty_string('username'), help = 'An Instagram username (no leading @)')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.username)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.username)
|
||||
|
||||
def _get_entity(self):
|
||||
r = self._initial_page()
|
||||
@@ -211,12 +211,12 @@ class InstagramHashtagScraper(_InstagramCommonScraper):
|
||||
self._variablesFormat = '{{"tag_name":"{pageID}","first":50,"after":"{endCursor}"}}'
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('hashtag', type = snscrape.base.nonempty_string('hashtag'), help = 'An Instagram hashtag (no leading #)')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.hashtag)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.hashtag)
|
||||
|
||||
|
||||
class InstagramLocationScraper(_InstagramCommonScraper):
|
||||
@@ -233,9 +233,9 @@ class InstagramLocationScraper(_InstagramCommonScraper):
|
||||
self._variablesFormat = '{{"id":"{pageID}","first":50,"after":"{endCursor}"}}'
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('locationid', help = 'An Instagram location ID', type = int)
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.locationid)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.locationid)
|
||||
|
||||
@@ -288,12 +288,12 @@ class MastodonProfileScraper(_MastodonCommonScraper):
|
||||
url = urllib.parse.urljoin(r.url, nextA['href'])
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('account', type = snscrape.base.nonempty_string('account'), help = 'A Mastodon account. This can be either a URL to the profile page or a string of the form @account@instance.example.org')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.account)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.account)
|
||||
|
||||
|
||||
class MastodonTootScraperMode(enum.Enum):
|
||||
@@ -301,7 +301,7 @@ class MastodonTootScraperMode(enum.Enum):
|
||||
THREAD = 'thread'
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
def _cli_from_args(cls, args):
|
||||
if args.thread:
|
||||
return cls.THREAD
|
||||
return cls.SINGLE
|
||||
@@ -331,10 +331,10 @@ class MastodonTootScraper(_MastodonCommonScraper):
|
||||
yield from self._entries_to_items(soup.find('div', class_ = 'activity-stream').find_all('div', class_ = 'entry'), r.url)
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('--thread', action = 'store_true', help = 'Collect thread around the toot referenced by the URL')
|
||||
subparser.add_argument('url', type = snscrape.base.nonempty_string('url'), help = 'A URL for a toot')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.url, MastodonTootScraperMode.cli_from_args(args))
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.url, MastodonTootScraperMode._cli_from_args(args))
|
||||
|
||||
@@ -204,7 +204,7 @@ class _RedditPushshiftScraper(snscrape.base.Scraper):
|
||||
yield from self._iter_api_submissions_and_comments({type(self)._apiField: self._name})
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('--no-submissions', dest = 'noSubmissions', action = 'store_true', default = False, help = 'Don\'t list submissions')
|
||||
subparser.add_argument('--no-comments', dest = 'noComments', action = 'store_true', default = False, help = 'Don\'t list comments')
|
||||
subparser.add_argument('--before', metavar = 'TIMESTAMP', type = int, help = 'Fetch results before a Unix timestamp')
|
||||
@@ -213,9 +213,9 @@ class _RedditPushshiftScraper(snscrape.base.Scraper):
|
||||
subparser.add_argument(name, type = snscrape.base.nonempty_string(name))
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
def _cli_from_args(cls, args):
|
||||
name = cls.name.split('-', 1)[1]
|
||||
return cls.cli_construct(args, getattr(args, name), submissions = not args.noSubmissions, comments = not args.noComments, before = args.before, after = args.after)
|
||||
return cls._cli_construct(args, getattr(args, name), submissions = not args.noSubmissions, comments = not args.noComments, before = args.before, after = args.after)
|
||||
|
||||
|
||||
class RedditUserScraper(_RedditPushshiftScraper):
|
||||
|
||||
@@ -195,9 +195,9 @@ class TelegramChannelScraper(snscrape.base.Scraper):
|
||||
return Channel(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('channel', type = snscrape.base.nonempty_string('channel'), help = 'A channel name')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.channel)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.channel)
|
||||
|
||||
@@ -610,9 +610,9 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
||||
return UserLabel(**labelKwargs)
|
||||
|
||||
@classmethod
|
||||
def cli_construct(cls, argparseArgs, *args, **kwargs):
|
||||
def _cli_construct(cls, argparseArgs, *args, **kwargs):
|
||||
kwargs['guestTokenManager'] = _CLIGuestTokenManager()
|
||||
return super().cli_construct(argparseArgs, *args, **kwargs)
|
||||
return super()._cli_construct(argparseArgs, *args, **kwargs)
|
||||
|
||||
|
||||
class TwitterSearchScraper(_TwitterAPIScraper):
|
||||
@@ -681,14 +681,14 @@ class TwitterSearchScraper(_TwitterAPIScraper):
|
||||
yield from self._instructions_to_tweets(obj)
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('--cursor', metavar = 'CURSOR')
|
||||
subparser.add_argument('--top', action = 'store_true', default = False, help = 'Enable fetching top tweets instead of live/chronological')
|
||||
subparser.add_argument('query', type = snscrape.base.nonempty_string('query'), help = 'A Twitter search string')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.query, cursor = args.cursor, top = args.top)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.query, cursor = args.cursor, top = args.top)
|
||||
|
||||
|
||||
class TwitterUserScraper(TwitterSearchScraper):
|
||||
@@ -757,7 +757,7 @@ class TwitterUserScraper(TwitterSearchScraper):
|
||||
return (1 <= len(s) <= 15 and s.strip(string.ascii_letters + string.digits + '_') == '') or (s and s.strip(string.digits) == '')
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
def username(s):
|
||||
if cls.is_valid_username(s):
|
||||
return s
|
||||
@@ -767,8 +767,8 @@ class TwitterUserScraper(TwitterSearchScraper):
|
||||
subparser.add_argument('username', type = username, help = 'A Twitter username (without @)')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.username, args.isUserId)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.username, args.isUserId)
|
||||
|
||||
|
||||
class TwitterProfileScraper(TwitterUserScraper):
|
||||
@@ -822,12 +822,12 @@ class TwitterHashtagScraper(TwitterSearchScraper):
|
||||
self._hashtag = hashtag
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('hashtag', type = snscrape.base.nonempty_string('hashtag'), help = 'A Twitter hashtag (without #)')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.hashtag)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.hashtag)
|
||||
|
||||
|
||||
class TwitterTweetScraperMode(enum.Enum):
|
||||
@@ -836,7 +836,7 @@ class TwitterTweetScraperMode(enum.Enum):
|
||||
RECURSE = 'recurse'
|
||||
|
||||
@classmethod
|
||||
def from_args(cls, args):
|
||||
def _cli_from_args(cls, args):
|
||||
if args.scroll:
|
||||
return cls.SCROLL
|
||||
if args.recurse:
|
||||
@@ -903,15 +903,15 @@ class TwitterTweetScraper(_TwitterAPIScraper):
|
||||
queue.append(tweet.id)
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
group = subparser.add_mutually_exclusive_group(required = False)
|
||||
group.add_argument('--scroll', action = 'store_true', default = False, help = 'Enable scrolling in both directions')
|
||||
group.add_argument('--recurse', '--recursive', action = 'store_true', default = False, help = 'Enable recursion through all tweets encountered (warning: slow, potentially memory-intensive!)')
|
||||
subparser.add_argument('tweetId', type = int, help = 'A tweet ID')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.tweetId, TwitterTweetScraperMode.from_args(args))
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.tweetId, TwitterTweetScraperMode._cli_from_args(args))
|
||||
|
||||
|
||||
class TwitterListPostsScraper(TwitterSearchScraper):
|
||||
@@ -922,12 +922,12 @@ class TwitterListPostsScraper(TwitterSearchScraper):
|
||||
self._listName = listName
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('list', type = snscrape.base.nonempty_string('list'), help = 'A Twitter list ID or a string of the form "username/listname" (replace spaces with dashes)')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.list)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.list)
|
||||
|
||||
|
||||
class TwitterTrendsScraper(_TwitterAPIScraper):
|
||||
|
||||
@@ -374,9 +374,9 @@ class VKontakteUserScraper(snscrape.base.Scraper):
|
||||
return User(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('username', type = snscrape.base.nonempty_string('username'), help = 'A VK username')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
return cls.cli_construct(args, args.username)
|
||||
def _cli_from_args(cls, args):
|
||||
return cls._cli_construct(args, args.username)
|
||||
|
||||
@@ -141,15 +141,15 @@ class WeiboUserScraper(snscrape.base.Scraper):
|
||||
return self._user_info_to_entity(o['data']['userInfo'])
|
||||
|
||||
@classmethod
|
||||
def cli_setup_parser(cls, subparser):
|
||||
def _cli_setup_parser(cls, subparser):
|
||||
subparser.add_argument('user', type = snscrape.base.nonempty_string('user'), help = 'A user name or ID')
|
||||
|
||||
@classmethod
|
||||
def cli_from_args(cls, args):
|
||||
def _cli_from_args(cls, args):
|
||||
if len(args.user) == 10 and args.user.strip('0123456789') == '':
|
||||
uid = args.user
|
||||
name = None
|
||||
else:
|
||||
uid = None
|
||||
name = args.user
|
||||
return cls.cli_construct(args, name = name, uid = uid)
|
||||
return cls._cli_construct(args, name = name, uid = uid)
|
||||
|
||||
Reference in New Issue
Block a user