mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-12 12:28:28 +03:00
Make all optional scraper arguments keyword-only and fix Mastodon argument style to conform with the other scrapers
Cf. #376
This commit is contained in:
@@ -139,7 +139,7 @@ class Scraper:
|
|||||||
|
|
||||||
name = None
|
name = None
|
||||||
|
|
||||||
def __init__(self, retries = 3, proxies = None):
|
def __init__(self, *, retries = 3, proxies = None):
|
||||||
self._retries = retries
|
self._retries = retries
|
||||||
self._proxies = proxies
|
self._proxies = proxies
|
||||||
self._session = requests.Session()
|
self._session = requests.Session()
|
||||||
|
|||||||
@@ -92,8 +92,8 @@ class CustomEmoji:
|
|||||||
|
|
||||||
|
|
||||||
class _MastodonCommonScraper(snscrape.base.Scraper):
|
class _MastodonCommonScraper(snscrape.base.Scraper):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(**kwargs)
|
||||||
self._headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0', 'Accept-Language': 'en-US,en;q=0.5'}
|
self._headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0', 'Accept-Language': 'en-US,en;q=0.5'}
|
||||||
self._lastRequest = 0
|
self._lastRequest = 0
|
||||||
|
|
||||||
@@ -245,8 +245,8 @@ class _MastodonCommonScraper(snscrape.base.Scraper):
|
|||||||
class MastodonProfileScraper(_MastodonCommonScraper):
|
class MastodonProfileScraper(_MastodonCommonScraper):
|
||||||
name = 'mastodon-profile'
|
name = 'mastodon-profile'
|
||||||
|
|
||||||
def __init__(self, account, *args, **kwargs):
|
def __init__(self, account, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(**kwargs)
|
||||||
if account.startswith('@') and account.count('@') == 2:
|
if account.startswith('@') and account.count('@') == 2:
|
||||||
account, domain = account[1:].split('@')
|
account, domain = account[1:].split('@')
|
||||||
url = f'https://{domain}/@{account}'
|
url = f'https://{domain}/@{account}'
|
||||||
@@ -310,8 +310,8 @@ class MastodonTootScraperMode(enum.Enum):
|
|||||||
class MastodonTootScraper(_MastodonCommonScraper):
|
class MastodonTootScraper(_MastodonCommonScraper):
|
||||||
name = 'mastodon-toot'
|
name = 'mastodon-toot'
|
||||||
|
|
||||||
def __init__(self, url, mode, *args, **kwargs):
|
def __init__(self, url, *, mode = MastodonTootScraperMode.SINGLE, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(**kwargs)
|
||||||
self._url = url
|
self._url = url
|
||||||
self._mode = mode
|
self._mode = mode
|
||||||
|
|
||||||
@@ -337,4 +337,4 @@ class MastodonTootScraper(_MastodonCommonScraper):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _cli_from_args(cls, args):
|
def _cli_from_args(cls, args):
|
||||||
return cls._cli_construct(args, args.url, MastodonTootScraperMode._cli_from_args(args))
|
return cls._cli_construct(args, args.url, mode = MastodonTootScraperMode._cli_from_args(args))
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ class Comment(snscrape.base.Item):
|
|||||||
|
|
||||||
|
|
||||||
class _RedditPushshiftScraper(snscrape.base.Scraper):
|
class _RedditPushshiftScraper(snscrape.base.Scraper):
|
||||||
def __init__(self, name, submissions = True, comments = True, before = None, after = None, **kwargs):
|
def __init__(self, name, *, submissions = True, comments = True, before = None, after = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self._name = name
|
self._name = name
|
||||||
self._submissions = submissions
|
self._submissions = submissions
|
||||||
|
|||||||
@@ -268,7 +268,7 @@ class _CLIGuestTokenManager(GuestTokenManager):
|
|||||||
|
|
||||||
|
|
||||||
class _TwitterAPIScraper(snscrape.base.Scraper):
|
class _TwitterAPIScraper(snscrape.base.Scraper):
|
||||||
def __init__(self, baseUrl, guestTokenManager = None, **kwargs):
|
def __init__(self, baseUrl, *, guestTokenManager = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self._baseUrl = baseUrl
|
self._baseUrl = baseUrl
|
||||||
if guestTokenManager is None:
|
if guestTokenManager is None:
|
||||||
@@ -618,7 +618,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
|
|||||||
class TwitterSearchScraper(_TwitterAPIScraper):
|
class TwitterSearchScraper(_TwitterAPIScraper):
|
||||||
name = 'twitter-search'
|
name = 'twitter-search'
|
||||||
|
|
||||||
def __init__(self, query, cursor = None, top = False, **kwargs):
|
def __init__(self, query, *, cursor = None, top = False, **kwargs):
|
||||||
if not query.strip():
|
if not query.strip():
|
||||||
raise ValueError('empty query')
|
raise ValueError('empty query')
|
||||||
super().__init__(baseUrl = 'https://twitter.com/search?' + urllib.parse.urlencode({'f': 'live', 'lang': 'en', 'q': query, 'src': 'spelling_expansion_revert_click'}), **kwargs)
|
super().__init__(baseUrl = 'https://twitter.com/search?' + urllib.parse.urlencode({'f': 'live', 'lang': 'en', 'q': query, 'src': 'spelling_expansion_revert_click'}), **kwargs)
|
||||||
@@ -847,7 +847,7 @@ class TwitterTweetScraperMode(enum.Enum):
|
|||||||
class TwitterTweetScraper(_TwitterAPIScraper):
|
class TwitterTweetScraper(_TwitterAPIScraper):
|
||||||
name = 'twitter-tweet'
|
name = 'twitter-tweet'
|
||||||
|
|
||||||
def __init__(self, tweetId, mode = TwitterTweetScraperMode.SINGLE, **kwargs):
|
def __init__(self, tweetId, *, mode = TwitterTweetScraperMode.SINGLE, **kwargs):
|
||||||
self._tweetId = tweetId
|
self._tweetId = tweetId
|
||||||
self._mode = mode
|
self._mode = mode
|
||||||
super().__init__(f'https://twitter.com/i/web/status/{self._tweetId}', **kwargs)
|
super().__init__(f'https://twitter.com/i/web/status/{self._tweetId}', **kwargs)
|
||||||
@@ -911,7 +911,7 @@ class TwitterTweetScraper(_TwitterAPIScraper):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _cli_from_args(cls, args):
|
def _cli_from_args(cls, args):
|
||||||
return cls._cli_construct(args, args.tweetId, TwitterTweetScraperMode._cli_from_args(args))
|
return cls._cli_construct(args, args.tweetId, mode = TwitterTweetScraperMode._cli_from_args(args))
|
||||||
|
|
||||||
|
|
||||||
class TwitterListPostsScraper(TwitterSearchScraper):
|
class TwitterListPostsScraper(TwitterSearchScraper):
|
||||||
|
|||||||
Reference in New Issue
Block a user