mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-11 03:48:29 +03:00
Formally deprecate the already deprecated item attributes
This commit is contained in:
@@ -7,6 +7,7 @@ import re
|
||||
import snscrape.base
|
||||
import typing
|
||||
import urllib.parse
|
||||
import warnings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -19,7 +20,11 @@ class FacebookPost(snscrape.base.Item):
|
||||
date: datetime.datetime
|
||||
content: typing.Optional[str]
|
||||
outlinks: list
|
||||
outlinksss: str # deprecated, use outlinks instead
|
||||
|
||||
@property
|
||||
def outlinksss(self):
|
||||
warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning)
|
||||
return ' '.join(self.outlinks)
|
||||
|
||||
def __str__(self):
|
||||
return self.cleanUrl
|
||||
@@ -150,7 +155,7 @@ class FacebookCommonScraper(snscrape.base.Scraper):
|
||||
outlink = query['u'][0]
|
||||
if outlink.startswith('http://') or outlink.startswith('https://') and outlink not in outlinks:
|
||||
outlinks.append(outlink)
|
||||
yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks, outlinksss = ' '.join(outlinks))
|
||||
yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks)
|
||||
|
||||
|
||||
class FacebookUserAndCommunityScraper(FacebookCommonScraper):
|
||||
|
||||
@@ -6,6 +6,7 @@ import re
|
||||
import snscrape.base
|
||||
import typing
|
||||
import urllib.parse
|
||||
import warnings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -27,9 +28,13 @@ class TelegramPost(snscrape.base.Item):
|
||||
date: datetime.datetime
|
||||
content: str
|
||||
outlinks: list
|
||||
outlinksss: str # deprecated, use outlinks instead
|
||||
linkPreview: typing.Optional[LinkPreview] = None
|
||||
|
||||
@property
|
||||
def outlinksss(self):
|
||||
warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning)
|
||||
return ' '.join(self.outlinks)
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
||||
@@ -101,11 +106,9 @@ class TelegramChannelScraper(snscrape.base.Scraper):
|
||||
href = urllib.parse.urljoin(pageUrl, link['href'])
|
||||
if href not in outlinks:
|
||||
outlinks.append(href)
|
||||
outlinksss = ' '.join(outlinks)
|
||||
else:
|
||||
content = None
|
||||
outlinks = []
|
||||
outlinksss = ''
|
||||
linkPreview = None
|
||||
if (linkPreviewA := post.find('a', class_ = 'tgme_widget_message_link_preview')):
|
||||
kwargs = {}
|
||||
@@ -122,7 +125,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
|
||||
else:
|
||||
self.logger.warning(f'Could not process link preview image on {url}')
|
||||
linkPreview = LinkPreview(**kwargs)
|
||||
yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, outlinksss = outlinksss, linkPreview = linkPreview)
|
||||
yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview)
|
||||
|
||||
def get_items(self):
|
||||
r, soup = self._initial_page()
|
||||
|
||||
@@ -12,6 +12,7 @@ import string
|
||||
import time
|
||||
import typing
|
||||
import urllib.parse
|
||||
import warnings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -25,12 +26,9 @@ class Tweet(snscrape.base.Item):
|
||||
content: str
|
||||
renderedContent: str
|
||||
id: int
|
||||
username: str # Deprecated, use user['username'] instead
|
||||
user: 'User'
|
||||
outlinks: list
|
||||
outlinksss: str # Deprecated, use outlinks instead
|
||||
tcooutlinks: list
|
||||
tcooutlinksss: str # Deprecated, use tcooutlinks instead
|
||||
replyCount: int
|
||||
retweetCount: int
|
||||
likeCount: int
|
||||
@@ -43,6 +41,21 @@ class Tweet(snscrape.base.Item):
|
||||
quotedTweet: typing.Optional['Tweet'] = None
|
||||
mentionedUsers: typing.Optional[typing.List['User']] = None
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
warnings.warn('username is deprecated, use user.username instead', FutureWarning)
|
||||
return self.user.username
|
||||
|
||||
@property
|
||||
def outlinksss(self):
|
||||
warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning)
|
||||
return ' '.join(self.outlinks)
|
||||
|
||||
@property
|
||||
def tcooutlinksss(self):
|
||||
warnings.warn('tcooutlinksss is deprecated, use tcooutlinks instead', FutureWarning)
|
||||
return ' '.join(self.tcooutlinks)
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
||||
@@ -302,14 +315,11 @@ class TwitterAPIScraper(snscrape.base.Scraper):
|
||||
kwargs['id'] = tweet['id'] if 'id' in tweet else int(tweet['id_str'])
|
||||
kwargs['content'] = tweet['full_text']
|
||||
kwargs['renderedContent'] = self._render_text_with_urls(tweet['full_text'], tweet['entities'].get('urls'))
|
||||
kwargs['username'] = obj['globalObjects']['users'][tweet['user_id_str']]['screen_name']
|
||||
kwargs['user'] = self._user_to_user(obj['globalObjects']['users'][tweet['user_id_str']])
|
||||
kwargs['date'] = email.utils.parsedate_to_datetime(tweet['created_at'])
|
||||
kwargs['outlinks'] = [u['expanded_url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else []
|
||||
kwargs['outlinksss'] = ' '.join(kwargs['outlinks'])
|
||||
kwargs['tcooutlinks'] = [u['url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else []
|
||||
kwargs['tcooutlinksss'] = ' '.join(kwargs['tcooutlinks'])
|
||||
kwargs['url'] = f'https://twitter.com/{kwargs["username"]}/status/{kwargs["id"]}'
|
||||
kwargs['url'] = f'https://twitter.com/{obj["globalObjects"]["users"][tweet["user_id_str"]]["screen_name"]}/status/{kwargs["id"]}'
|
||||
kwargs['replyCount'] = tweet['reply_count']
|
||||
kwargs['retweetCount'] = tweet['retweet_count']
|
||||
kwargs['likeCount'] = tweet['favorite_count']
|
||||
|
||||
Reference in New Issue
Block a user