Add support for extracting the entity behind a scrape

Closes #11 Backwards incompatibility: snscrape.modules.twitter.Account is now called User. However, this was previously only used on the list member scraper, which has been broken for a while since the list member list is no longer publicly accessible. For compatibility reasons, the CLI does not output the entity by default; the new option --with-entity enables it.
2026-06-12 12:28:28 +03:00 · 2020-08-24 01:38:27 +00:00
parent c90fd54b6b
commit dd25fd0526
7 changed files with 459 additions and 40 deletions
--- a/snscrape/base.py
+++ b/snscrape/base.py
@@ -17,6 +17,22 @@ class Item:
 		pass


+class Entity:
+	'''An abstract base class for an entity returned by the scraper's get_entity method.
+
+	An entity is typically the account of a person or organisation. The string representation should be the preferred direct URL to the entity's page on the network.'''
+
+	@abc.abstractmethod
+	def __str__(self):
+		pass
+
+
+Granularity = int
+'''Type of fields storing the unit/granularity of numbers.
+
+For example, a granularity of 1000 means that the SNS returned something like '42k' and the last three significant digits are unknown.'''
+
+
 class URLItem(Item):
 	'''A generic item which only holds a URL string.'''

@@ -49,6 +65,10 @@ class Scraper:
 		'''Iterator yielding Items.'''
 		pass

+	def get_entity(self):
+		'''Get the entity behind the scraper, if any.'''
+		return None
+
 	def _request(self, method, url, params = None, data = None, headers = None, timeout = 10, responseOkCallback = None):
 		for attempt in range(self._retries + 1):
 			# The request is newly prepared on each retry because of potential cookie updates.