Remove Twitter list member scraper

It has been broken for a while. Member lists were removed from the old design, and they're behind a login wall on the new design.
2026-06-08 02:28:29 +03:00 · 2020-09-24 18:34:15 +00:00
parent f1428fa0e0
commit 397a0b988e
2 changed files with 2 additions and 56 deletions
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ The following services are currently supported:
 * Instagram: user profiles, hashtags, and locations
 * Reddit: users, subreddits, and searches (via Pushshift)
 * Telegram: channels
-* Twitter: users, user profiles, hashtags, searches, threads, and lists (members as well as posts)
+* Twitter: users, user profiles, hashtags, searches, threads, and list posts
 * VKontakte: user profiles
 * Weibo (Sina Weibo): user profiles

--- a/snscrape/modules/twitter.py
+++ b/snscrape/modules/twitter.py
@@ -81,8 +81,7 @@ class DescriptionURL(typing.NamedTuple):
 	indices: typing.Tuple[int, int]


-class User(typing.NamedTuple, snscrape.base.Item, snscrape.base.Entity):
-	# This is both an Item and an Entity since it can be returned as TwitterUserScraper's entity as well as TwitterListMembersScraper's items.
+class User(typing.NamedTuple, snscrape.base.Entity):
 	# Most fields can be None if they're not known.

 	username: str
@@ -631,56 +630,3 @@ class TwitterListPostsScraper(TwitterSearchScraper):
 	@classmethod
 	def from_args(cls, args):
 		return cls(args.list, retries = args.retries)
-
-
-class TwitterListMembersScraper(TwitterOldDesignScraper):
-	name = 'twitter-list-members'
-
-	def __init__(self, listName, **kwargs):
-		super().__init__(**kwargs)
-		self._user, self._list = listName.split('/')
-
-	def get_items(self):
-		headers = {'User-Agent': f'Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'}
-
-		baseUrl = f'https://twitter.com/{self._user}/lists/{self._list}/members'
-		r = self._get(baseUrl, headers = headers)
-		if r.status_code != 200:
-			logger.warning('List not found')
-			return
-		soup = bs4.BeautifulSoup(r.text, 'lxml')
-		container = soup.find('div', 'stream-container')
-		if not container:
-			raise snscrape.base.ScraperException('Unable to find container')
-		items = container.find_all('li', 'js-stream-item')
-		if not items:
-			logger.warning('Empty list')
-			return
-		for item in items:
-			yield User(username = item.find('div', 'account')['data-screen-name'])
-
-		if not container.has_attr('data-min-position') or container['data-min-position'] == '':
-			return
-		maxPosition = container['data-min-position']
-		while True:
-			r = self._get(
-				f'{baseUrl}/timeline?include_available_features=1&include_entities=1&max_position={maxPosition}&reset_error_state=false',
-				headers = headers,
-				responseOkCallback = self._check_json_callback
-			  )
-			obj = json.loads(r.text)
-			soup = bs4.BeautifulSoup(obj['items_html'], 'lxml')
-			items = soup.find_all('li', 'js-stream-item')
-			for item in items:
-				yield User(username = item.find('div', 'account')['data-screen-name'])
-			if not obj['has_more_items']:
-				break
-			maxPosition = obj['min_position']
-
-	@classmethod
-	def setup_parser(cls, subparser):
-		subparser.add_argument('list', help = 'A Twitter list, formatted as "username/listname"')
-
-	@classmethod
-	def from_args(cls, args):
-		return cls(args.list, retries = args.retries)