diff --git a/snscrape/modules/facebook.py b/snscrape/modules/facebook.py index 0e3e58b..ffb4fc7 100644 --- a/snscrape/modules/facebook.py +++ b/snscrape/modules/facebook.py @@ -7,6 +7,7 @@ import re import snscrape.base import typing import urllib.parse +import warnings logger = logging.getLogger(__name__) @@ -19,7 +20,11 @@ class FacebookPost(snscrape.base.Item): date: datetime.datetime content: typing.Optional[str] outlinks: list - outlinksss: str # deprecated, use outlinks instead + + @property + def outlinksss(self): + warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning) + return ' '.join(self.outlinks) def __str__(self): return self.cleanUrl @@ -150,7 +155,7 @@ class FacebookCommonScraper(snscrape.base.Scraper): outlink = query['u'][0] if outlink.startswith('http://') or outlink.startswith('https://') and outlink not in outlinks: outlinks.append(outlink) - yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks, outlinksss = ' '.join(outlinks)) + yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks) class FacebookUserAndCommunityScraper(FacebookCommonScraper): diff --git a/snscrape/modules/telegram.py b/snscrape/modules/telegram.py index e566f70..b603913 100644 --- a/snscrape/modules/telegram.py +++ b/snscrape/modules/telegram.py @@ -6,6 +6,7 @@ import re import snscrape.base import typing import urllib.parse +import warnings logger = logging.getLogger(__name__) @@ -27,9 +28,13 @@ class TelegramPost(snscrape.base.Item): date: datetime.datetime content: str outlinks: list - outlinksss: str # deprecated, use outlinks instead linkPreview: typing.Optional[LinkPreview] = None + @property + def outlinksss(self): + warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning) + return ' '.join(self.outlinks) + def __str__(self): return self.url @@ -101,11 +106,9 @@ class TelegramChannelScraper(snscrape.base.Scraper): href = urllib.parse.urljoin(pageUrl, link['href']) if href not in outlinks: outlinks.append(href) - outlinksss = ' '.join(outlinks) else: content = None outlinks = [] - outlinksss = '' linkPreview = None if (linkPreviewA := post.find('a', class_ = 'tgme_widget_message_link_preview')): kwargs = {} @@ -122,7 +125,7 @@ class TelegramChannelScraper(snscrape.base.Scraper): else: self.logger.warning(f'Could not process link preview image on {url}') linkPreview = LinkPreview(**kwargs) - yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, outlinksss = outlinksss, linkPreview = linkPreview) + yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview) def get_items(self): r, soup = self._initial_page() diff --git a/snscrape/modules/twitter.py b/snscrape/modules/twitter.py index 9b8c078..9ed6bc0 100644 --- a/snscrape/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -12,6 +12,7 @@ import string import time import typing import urllib.parse +import warnings logger = logging.getLogger(__name__) @@ -25,12 +26,9 @@ class Tweet(snscrape.base.Item): content: str renderedContent: str id: int - username: str # Deprecated, use user['username'] instead user: 'User' outlinks: list - outlinksss: str # Deprecated, use outlinks instead tcooutlinks: list - tcooutlinksss: str # Deprecated, use tcooutlinks instead replyCount: int retweetCount: int likeCount: int @@ -43,6 +41,21 @@ class Tweet(snscrape.base.Item): quotedTweet: typing.Optional['Tweet'] = None mentionedUsers: typing.Optional[typing.List['User']] = None + @property + def username(self): + warnings.warn('username is deprecated, use user.username instead', FutureWarning) + return self.user.username + + @property + def outlinksss(self): + warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning) + return ' '.join(self.outlinks) + + @property + def tcooutlinksss(self): + warnings.warn('tcooutlinksss is deprecated, use tcooutlinks instead', FutureWarning) + return ' '.join(self.tcooutlinks) + def __str__(self): return self.url @@ -302,14 +315,11 @@ class TwitterAPIScraper(snscrape.base.Scraper): kwargs['id'] = tweet['id'] if 'id' in tweet else int(tweet['id_str']) kwargs['content'] = tweet['full_text'] kwargs['renderedContent'] = self._render_text_with_urls(tweet['full_text'], tweet['entities'].get('urls')) - kwargs['username'] = obj['globalObjects']['users'][tweet['user_id_str']]['screen_name'] kwargs['user'] = self._user_to_user(obj['globalObjects']['users'][tweet['user_id_str']]) kwargs['date'] = email.utils.parsedate_to_datetime(tweet['created_at']) kwargs['outlinks'] = [u['expanded_url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else [] - kwargs['outlinksss'] = ' '.join(kwargs['outlinks']) kwargs['tcooutlinks'] = [u['url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else [] - kwargs['tcooutlinksss'] = ' '.join(kwargs['tcooutlinks']) - kwargs['url'] = f'https://twitter.com/{kwargs["username"]}/status/{kwargs["id"]}' + kwargs['url'] = f'https://twitter.com/{obj["globalObjects"]["users"][tweet["user_id_str"]]["screen_name"]}/status/{kwargs["id"]}' kwargs['replyCount'] = tweet['reply_count'] kwargs['retweetCount'] = tweet['retweet_count'] kwargs['likeCount'] = tweet['favorite_count']