mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-13 12:58:27 +03:00
@@ -20,7 +20,7 @@ _logger = logging.getLogger(__name__)
|
|||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class Submission(snscrape.base.Item):
|
class Submission(snscrape.base.Item):
|
||||||
author: typing.Optional[str] # E.g. submission hf7k6
|
author: typing.Optional[str] # E.g. submission hf7k6
|
||||||
created: datetime.datetime
|
date: datetime.datetime
|
||||||
id: str
|
id: str
|
||||||
link: typing.Optional[str]
|
link: typing.Optional[str]
|
||||||
selftext: typing.Optional[str]
|
selftext: typing.Optional[str]
|
||||||
@@ -28,6 +28,8 @@ class Submission(snscrape.base.Item):
|
|||||||
title: str
|
title: str
|
||||||
url: str
|
url: str
|
||||||
|
|
||||||
|
created = snscrape.base._DeprecatedProperty('created', lambda self: self.date, 'date')
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.url
|
return self.url
|
||||||
|
|
||||||
@@ -36,12 +38,14 @@ class Submission(snscrape.base.Item):
|
|||||||
class Comment(snscrape.base.Item):
|
class Comment(snscrape.base.Item):
|
||||||
author: typing.Optional[str]
|
author: typing.Optional[str]
|
||||||
body: str
|
body: str
|
||||||
created: datetime.datetime
|
date: datetime.datetime
|
||||||
id: str
|
id: str
|
||||||
parentId: typing.Optional[str]
|
parentId: typing.Optional[str]
|
||||||
subreddit: typing.Optional[str]
|
subreddit: typing.Optional[str]
|
||||||
url: str
|
url: str
|
||||||
|
|
||||||
|
created = snscrape.base._DeprecatedProperty('created', lambda self: self.date, 'date')
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.url
|
return self.url
|
||||||
|
|
||||||
@@ -111,7 +115,7 @@ class _RedditPushshiftScraper(snscrape.base.Scraper):
|
|||||||
|
|
||||||
kwargs = {
|
kwargs = {
|
||||||
'author': d.get('author'),
|
'author': d.get('author'),
|
||||||
'created': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc),
|
'date': datetime.datetime.fromtimestamp(d['created_utc'], datetime.timezone.utc),
|
||||||
'url': f'https://old.reddit.com{permalink}',
|
'url': f'https://old.reddit.com{permalink}',
|
||||||
'subreddit': d.get('subreddit'),
|
'subreddit': d.get('subreddit'),
|
||||||
}
|
}
|
||||||
@@ -192,7 +196,7 @@ class _RedditPushshiftSearchScraper(_RedditPushshiftScraper):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
# Return newer first; if both have the same creation datetime, return the comment first
|
# Return newer first; if both have the same creation datetime, return the comment first
|
||||||
if tipSubmission.created > tipComment.created:
|
if tipSubmission.date > tipComment.date:
|
||||||
yield tipSubmission
|
yield tipSubmission
|
||||||
try:
|
try:
|
||||||
tipSubmission = next(submissionsIter)
|
tipSubmission = next(submissionsIter)
|
||||||
|
|||||||
Reference in New Issue
Block a user