diff --git a/snscrape/base.py b/snscrape/base.py index d9d4a8c..06547d3 100644 --- a/snscrape/base.py +++ b/snscrape/base.py @@ -1,4 +1,5 @@ import abc +import functools import logging import requests import time @@ -18,7 +19,7 @@ class Item: class Entity: - '''An abstract base class for an entity returned by the scraper's get_entity method. + '''An abstract base class for an entity returned by the scraper's entity property. An entity is typically the account of a person or organisation. The string representation should be the preferred direct URL to the entity's page on the network.''' @@ -65,10 +66,16 @@ class Scraper: '''Iterator yielding Items.''' pass - def get_entity(self): - '''Get the entity behind the scraper, if any.''' + def _get_entity(self): + '''Get the entity behind the scraper, if any. + + This is the method implemented by subclasses for doing the actual retrieval/entity object creation. For accessing the scraper's entity, use the entity property.''' return None + @functools.cached_property + def entity(self): + return self._get_entity() + def _request(self, method, url, params = None, data = None, headers = None, timeout = 10, responseOkCallback = None): for attempt in range(self._retries + 1): # The request is newly prepared on each retry because of potential cookie updates. diff --git a/snscrape/cli.py b/snscrape/cli.py index b05a380..b3282db 100644 --- a/snscrape/cli.py +++ b/snscrape/cli.py @@ -238,7 +238,7 @@ def main(): i = 0 with _dump_locals_on_exception(): if args.withEntity: - entity = scraper.get_entity() + entity = scraper.entity if entity: if args.jsonl: print(json.dumps(entity._asdict(), default = json_serialise_datetime)) diff --git a/snscrape/modules/facebook.py b/snscrape/modules/facebook.py index 670aa97..00f8297 100644 --- a/snscrape/modules/facebook.py +++ b/snscrape/modules/facebook.py @@ -218,7 +218,7 @@ class FacebookUserScraper(FacebookUserAndCommunityScraper): super().__init__(*args, **kwargs) self._baseUrl = f'https://www.facebook.com/{self._username}/' - def get_entity(self): + def _get_entity(self): kwargs = {} nameVerifiedMarkupPattern = re.compile(r'"markup":\[\["__markup_a588f507_0_0",\{"__html":(".*?")\}') diff --git a/snscrape/modules/instagram.py b/snscrape/modules/instagram.py index eeaec77..357c4e4 100644 --- a/snscrape/modules/instagram.py +++ b/snscrape/modules/instagram.py @@ -179,7 +179,7 @@ class InstagramUserScraper(InstagramCommonScraper): def from_args(cls, args): return cls('User', args.username, retries = args.retries) - def get_entity(self): + def _get_entity(self): r = self._initial_page() if r.status_code != 200: return diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py index 163aae8..f6fa378 100644 --- a/snscrape/modules/telegram.py +++ b/snscrape/modules/telegram.py @@ -89,7 +89,7 @@ class TelegramChannelScraper(snscrape.base.Scraper): raise snscrape.base.ScraperException(f'Got status code {r.status_code}') soup = bs4.BeautifulSoup(r.text, 'lxml') - def get_entity(self): + def _get_entity(self): kwargs = {} # /channel has a more accurate member count and bigger profile picture r = self._get(f'https://t.me/{self._name}', headers = self._headers) diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index cbee140..7ee4d34 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -305,7 +305,7 @@ class TwitterUserScraper(TwitterSearchScraper): super().__init__(f'from:{username}', **kwargs) self._username = username - def get_entity(self): + def _get_entity(self): self._ensure_guest_token(f'https://twitter.com/{self._username}') params = {'variables': json.dumps({'screen_name': self._username, 'withHighlightedLabel': True}, separators = (',', ':'))} obj = self._get_api_data('https://api.twitter.com/graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName', params = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)) diff --git a/snscrape/modules/vkontakte.py b/snscrape/modules/vkontakte.py index a0ac6f4..2240275 100644 --- a/snscrape/modules/vkontakte.py +++ b/snscrape/modules/vkontakte.py @@ -120,7 +120,7 @@ class VKontakteUserScraper(snscrape.base.Scraper): soup = bs4.BeautifulSoup(posts, 'lxml') yield from self._soup_to_items(soup) - def get_entity(self): + def _get_entity(self): r, soup = self._initial_page() if r.status_code != 200: return