Merge pull request #4 from JustAnotherArchivist/master

upstream merge
This commit is contained in:
Tristan Lee
2022-05-24 23:10:38 -07:00
committed by GitHub

View File

@@ -20,7 +20,7 @@ _logger = logging.getLogger(__name__)
@dataclasses.dataclass @dataclasses.dataclass
class Submission(snscrape.base.Item): class Submission(snscrape.base.Item):
author: typing.Optional[str] # E.g. submission hf7k6 author: typing.Optional[str] # E.g. submission hf7k6
created: datetime.datetime date: datetime.datetime
id: str id: str
link: typing.Optional[str] link: typing.Optional[str]
selftext: typing.Optional[str] selftext: typing.Optional[str]
@@ -28,6 +28,8 @@ class Submission(snscrape.base.Item):
title: str title: str
url: str url: str
created = snscrape.base._DeprecatedProperty('created', lambda self: self.date, 'date')
def __str__(self): def __str__(self):
return self.url return self.url
@@ -36,12 +38,14 @@ class Submission(snscrape.base.Item):
class Comment(snscrape.base.Item): class Comment(snscrape.base.Item):
author: typing.Optional[str] author: typing.Optional[str]
body: str body: str
created: datetime.datetime date: datetime.datetime
id: str id: str
parentId: typing.Optional[str] parentId: typing.Optional[str]
subreddit: typing.Optional[str] subreddit: typing.Optional[str]
url: str url: str
created = snscrape.base._DeprecatedProperty('created', lambda self: self.date, 'date')
def __str__(self): def __str__(self):
return self.url return self.url
@@ -111,7 +115,7 @@ class _RedditPushshiftScraper(snscrape.base.Scraper):
kwargs = { kwargs = {
'author': d.get('author'), 'author': d.get('author'),
'created': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc), 'date': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc),
'url': f'https://old.reddit.com{permalink}', 'url': f'https://old.reddit.com{permalink}',
'subreddit': d.get('subreddit'), 'subreddit': d.get('subreddit'),
} }
@@ -192,7 +196,7 @@ class _RedditPushshiftSearchScraper(_RedditPushshiftScraper):
while True: while True:
# Return newer first; if both have the same creation datetime, return the comment first # Return newer first; if both have the same creation datetime, return the comment first
if tipSubmission.created > tipComment.created: if tipSubmission.date > tipComment.date:
yield tipSubmission yield tipSubmission
try: try:
tipSubmission = next(submissionsIter) tipSubmission = next(submissionsIter)