mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-19 15:48:29 +03:00
This commit is contained in:
@@ -1,13 +1,24 @@
|
||||
import bs4
|
||||
import datetime
|
||||
import json
|
||||
import random
|
||||
import logging
|
||||
import snscrape.base
|
||||
import typing
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Tweet(typing.NamedTuple, snscrape.base.Item):
|
||||
url: str
|
||||
date: datetime.datetime
|
||||
content: str
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
||||
|
||||
class TwitterSearchScraper(snscrape.base.Scraper):
|
||||
name = 'twitter-search'
|
||||
|
||||
@@ -24,7 +35,9 @@ class TwitterSearchScraper(snscrape.base.Scraper):
|
||||
for tweet in feed:
|
||||
username = tweet.find('span', 'username').find('b').text
|
||||
tweetID = tweet['data-item-id']
|
||||
yield snscrape.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
|
||||
date = datetime.datetime.fromtimestamp(int(tweet.find('a', 'tweet-timestamp').find('span', '_timestamp')['data-time']), datetime.timezone.utc)
|
||||
content = tweet.find('p', 'tweet-text').text
|
||||
yield Tweet(f'https://twitter.com/{username}/status/{tweetID}', date, content)
|
||||
|
||||
def _check_json_callback(self, r):
|
||||
if r.headers.get('content-type') != 'application/json;charset=utf-8':
|
||||
|
||||
Reference in New Issue
Block a user