Make all optional scraper arguments keyword-only and fix Mastodon argument style to conform with the other scrapers

Cf. #376
This commit is contained in:
JustAnotherArchivist
2022-01-30 00:21:18 +00:00
parent 107c3c71c2
commit 560c78c5cf
4 changed files with 13 additions and 13 deletions

View File

@@ -139,7 +139,7 @@ class Scraper:
name = None
def __init__(self, retries = 3, proxies = None):
def __init__(self, *, retries = 3, proxies = None):
self._retries = retries
self._proxies = proxies
self._session = requests.Session()

View File

@@ -92,8 +92,8 @@ class CustomEmoji:
class _MastodonCommonScraper(snscrape.base.Scraper):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0', 'Accept-Language': 'en-US,en;q=0.5'}
self._lastRequest = 0
@@ -245,8 +245,8 @@ class _MastodonCommonScraper(snscrape.base.Scraper):
class MastodonProfileScraper(_MastodonCommonScraper):
name = 'mastodon-profile'
def __init__(self, account, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, account, **kwargs):
super().__init__(**kwargs)
if account.startswith('@') and account.count('@') == 2:
account, domain = account[1:].split('@')
url = f'https://{domain}/@{account}'
@@ -310,8 +310,8 @@ class MastodonTootScraperMode(enum.Enum):
class MastodonTootScraper(_MastodonCommonScraper):
name = 'mastodon-toot'
def __init__(self, url, mode, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, url, *, mode = MastodonTootScraperMode.SINGLE, **kwargs):
super().__init__(**kwargs)
self._url = url
self._mode = mode
@@ -337,4 +337,4 @@ class MastodonTootScraper(_MastodonCommonScraper):
@classmethod
def _cli_from_args(cls, args):
return cls._cli_construct(args, args.url, MastodonTootScraperMode._cli_from_args(args))
return cls._cli_construct(args, args.url, mode = MastodonTootScraperMode._cli_from_args(args))

View File

@@ -47,7 +47,7 @@ class Comment(snscrape.base.Item):
class _RedditPushshiftScraper(snscrape.base.Scraper):
def __init__(self, name, submissions = True, comments = True, before = None, after = None, **kwargs):
def __init__(self, name, *, submissions = True, comments = True, before = None, after = None, **kwargs):
super().__init__(**kwargs)
self._name = name
self._submissions = submissions

View File

@@ -268,7 +268,7 @@ class _CLIGuestTokenManager(GuestTokenManager):
class _TwitterAPIScraper(snscrape.base.Scraper):
def __init__(self, baseUrl, guestTokenManager = None, **kwargs):
def __init__(self, baseUrl, *, guestTokenManager = None, **kwargs):
super().__init__(**kwargs)
self._baseUrl = baseUrl
if guestTokenManager is None:
@@ -618,7 +618,7 @@ class _TwitterAPIScraper(snscrape.base.Scraper):
class TwitterSearchScraper(_TwitterAPIScraper):
name = 'twitter-search'
def __init__(self, query, cursor = None, top = False, **kwargs):
def __init__(self, query, *, cursor = None, top = False, **kwargs):
if not query.strip():
raise ValueError('empty query')
super().__init__(baseUrl = 'https://twitter.com/search?' + urllib.parse.urlencode({'f': 'live', 'lang': 'en', 'q': query, 'src': 'spelling_expansion_revert_click'}), **kwargs)
@@ -847,7 +847,7 @@ class TwitterTweetScraperMode(enum.Enum):
class TwitterTweetScraper(_TwitterAPIScraper):
name = 'twitter-tweet'
def __init__(self, tweetId, mode = TwitterTweetScraperMode.SINGLE, **kwargs):
def __init__(self, tweetId, *, mode = TwitterTweetScraperMode.SINGLE, **kwargs):
self._tweetId = tweetId
self._mode = mode
super().__init__(f'https://twitter.com/i/web/status/{self._tweetId}', **kwargs)
@@ -911,7 +911,7 @@ class TwitterTweetScraper(_TwitterAPIScraper):
@classmethod
def _cli_from_args(cls, args):
return cls._cli_construct(args, args.tweetId, TwitterTweetScraperMode._cli_from_args(args))
return cls._cli_construct(args, args.tweetId, mode = TwitterTweetScraperMode._cli_from_args(args))
class TwitterListPostsScraper(TwitterSearchScraper):