Formally deprecate the already deprecated item attributes

This commit is contained in:
JustAnotherArchivist
2020-10-16 00:55:55 +00:00
parent 0f78aa45fc
commit 5cb64faa72
3 changed files with 31 additions and 13 deletions

View File

@@ -7,6 +7,7 @@ import re
import snscrape.base
import typing
import urllib.parse
import warnings
logger = logging.getLogger(__name__)
@@ -19,7 +20,11 @@ class FacebookPost(snscrape.base.Item):
date: datetime.datetime
content: typing.Optional[str]
outlinks: list
outlinksss: str # deprecated, use outlinks instead
@property
def outlinksss(self):
warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning)
return ' '.join(self.outlinks)
def __str__(self):
return self.cleanUrl
@@ -150,7 +155,7 @@ class FacebookCommonScraper(snscrape.base.Scraper):
outlink = query['u'][0]
if outlink.startswith('http://') or outlink.startswith('https://') and outlink not in outlinks:
outlinks.append(outlink)
yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks, outlinksss = ' '.join(outlinks))
yield FacebookPost(cleanUrl = cleanUrl, dirtyUrl = dirtyUrl, date = date, content = content, outlinks = outlinks)
class FacebookUserAndCommunityScraper(FacebookCommonScraper):

View File

@@ -6,6 +6,7 @@ import re
import snscrape.base
import typing
import urllib.parse
import warnings
logger = logging.getLogger(__name__)
@@ -27,9 +28,13 @@ class TelegramPost(snscrape.base.Item):
date: datetime.datetime
content: str
outlinks: list
outlinksss: str # deprecated, use outlinks instead
linkPreview: typing.Optional[LinkPreview] = None
@property
def outlinksss(self):
warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning)
return ' '.join(self.outlinks)
def __str__(self):
return self.url
@@ -101,11 +106,9 @@ class TelegramChannelScraper(snscrape.base.Scraper):
href = urllib.parse.urljoin(pageUrl, link['href'])
if href not in outlinks:
outlinks.append(href)
outlinksss = ' '.join(outlinks)
else:
content = None
outlinks = []
outlinksss = ''
linkPreview = None
if (linkPreviewA := post.find('a', class_ = 'tgme_widget_message_link_preview')):
kwargs = {}
@@ -122,7 +125,7 @@ class TelegramChannelScraper(snscrape.base.Scraper):
else:
self.logger.warning(f'Could not process link preview image on {url}')
linkPreview = LinkPreview(**kwargs)
yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, outlinksss = outlinksss, linkPreview = linkPreview)
yield TelegramPost(url = url, date = date, content = content, outlinks = outlinks, linkPreview = linkPreview)
def get_items(self):
r, soup = self._initial_page()

View File

@@ -12,6 +12,7 @@ import string
import time
import typing
import urllib.parse
import warnings
logger = logging.getLogger(__name__)
@@ -25,12 +26,9 @@ class Tweet(snscrape.base.Item):
content: str
renderedContent: str
id: int
username: str # Deprecated, use user['username'] instead
user: 'User'
outlinks: list
outlinksss: str # Deprecated, use outlinks instead
tcooutlinks: list
tcooutlinksss: str # Deprecated, use tcooutlinks instead
replyCount: int
retweetCount: int
likeCount: int
@@ -43,6 +41,21 @@ class Tweet(snscrape.base.Item):
quotedTweet: typing.Optional['Tweet'] = None
mentionedUsers: typing.Optional[typing.List['User']] = None
@property
def username(self):
warnings.warn('username is deprecated, use user.username instead', FutureWarning)
return self.user.username
@property
def outlinksss(self):
warnings.warn('outlinksss is deprecated, use outlinks instead', FutureWarning)
return ' '.join(self.outlinks)
@property
def tcooutlinksss(self):
warnings.warn('tcooutlinksss is deprecated, use tcooutlinks instead', FutureWarning)
return ' '.join(self.tcooutlinks)
def __str__(self):
return self.url
@@ -302,14 +315,11 @@ class TwitterAPIScraper(snscrape.base.Scraper):
kwargs['id'] = tweet['id'] if 'id' in tweet else int(tweet['id_str'])
kwargs['content'] = tweet['full_text']
kwargs['renderedContent'] = self._render_text_with_urls(tweet['full_text'], tweet['entities'].get('urls'))
kwargs['username'] = obj['globalObjects']['users'][tweet['user_id_str']]['screen_name']
kwargs['user'] = self._user_to_user(obj['globalObjects']['users'][tweet['user_id_str']])
kwargs['date'] = email.utils.parsedate_to_datetime(tweet['created_at'])
kwargs['outlinks'] = [u['expanded_url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else []
kwargs['outlinksss'] = ' '.join(kwargs['outlinks'])
kwargs['tcooutlinks'] = [u['url'] for u in tweet['entities']['urls']] if 'urls' in tweet['entities'] else []
kwargs['tcooutlinksss'] = ' '.join(kwargs['tcooutlinks'])
kwargs['url'] = f'https://twitter.com/{kwargs["username"]}/status/{kwargs["id"]}'
kwargs['url'] = f'https://twitter.com/{obj["globalObjects"]["users"][tweet["user_id_str"]]["screen_name"]}/status/{kwargs["id"]}'
kwargs['replyCount'] = tweet['reply_count']
kwargs['retweetCount'] = tweet['retweet_count']
kwargs['likeCount'] = tweet['favorite_count']