mirror of
https://github.com/bellingcat/polyphemus.git
synced 2026-06-13 13:58:32 +03:00
refactored base classes to have structure more similar to snscrape, made scraper 'get' methods return dataclasses or list of dataclasses rather than dicts
This commit is contained in:
@@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
from typing import Tuple, Optional, List
|
||||||
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -23,7 +25,7 @@ NEW_USER_API_URL = 'https://api.odysee.com/user/new'
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def make_request(request, kwargs):
|
def make_request(request: str, kwargs: dict) -> requests.Response:
|
||||||
|
|
||||||
"""Wrapper for retrying request multiple times.
|
"""Wrapper for retrying request multiple times.
|
||||||
"""
|
"""
|
||||||
@@ -32,12 +34,24 @@ def make_request(request, kwargs):
|
|||||||
msg = f'`request` argument must be either `requests.get` or `requests.post`, not {type(request)}'
|
msg = f'`request` argument must be either `requests.get` or `requests.post`, not {type(request)}'
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
|
||||||
n_retries = 0
|
if 'timeout' not in kwargs:
|
||||||
response = request(**kwargs)
|
kwargs['timeout'] = 15
|
||||||
|
|
||||||
while response.status_code != 200 and n_retries < 5:
|
n_retries = 0
|
||||||
n_retries += 1
|
|
||||||
response = request(**kwargs)
|
response = requests.Response()
|
||||||
|
response.status_code = 418
|
||||||
|
|
||||||
|
while n_retries < 5:
|
||||||
|
time.sleep(2 ** n_retries - 1)
|
||||||
|
try:
|
||||||
|
response = request(**kwargs)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
n_retries += 1
|
||||||
|
except Exception:
|
||||||
|
n_retries += 1
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
msg = f'Maximum number of retries reached for request {request} with kwargs {kwargs}: status code {response.status_code}'
|
msg = f'Maximum number of retries reached for request {request} with kwargs {kwargs}: status code {response.status_code}'
|
||||||
@@ -47,9 +61,12 @@ def make_request(request, kwargs):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_auth_token():
|
def get_auth_token() -> str:
|
||||||
|
|
||||||
"""Get a fresh authorization token, to use for API calls that require it.
|
"""Get a fresh authorization token, to use for API calls that require it.
|
||||||
|
|
||||||
|
Note: calling this function many times in quick succession may result in a
|
||||||
|
503 error.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
response = make_request(
|
response = make_request(
|
||||||
@@ -63,7 +80,7 @@ def get_auth_token():
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_channel_info(channel_name):
|
def get_channel_info(channel_name: str) -> dict:
|
||||||
|
|
||||||
"""Get the channel information and ID from the channel name.
|
"""Get the channel information and ID from the channel name.
|
||||||
"""
|
"""
|
||||||
@@ -99,7 +116,7 @@ def get_channel_info(channel_name):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_subscribers(channel_id, auth_token = None):
|
def get_subscribers(channel_id: str, auth_token: str = None) -> int:
|
||||||
|
|
||||||
"""Get the number of subscribers for a channel.
|
"""Get the number of subscribers for a channel.
|
||||||
"""
|
"""
|
||||||
@@ -124,19 +141,19 @@ def get_subscribers(channel_id, auth_token = None):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_all_videos(channel_id):
|
def get_raw_video_info_list(channel_id: str) -> dict:
|
||||||
|
|
||||||
"""Get a list of all videos posted by a specified channel name.
|
"""Get a list of all videos posted by a specified channel name.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
all_videos: list<dict>
|
raw_video_info_list: list<dict>
|
||||||
List of dictionaries, with each dict corresponding to a JSON response
|
List of dictionaries, with each dict corresponding to a JSON response
|
||||||
containing data about a single video.
|
containing data about a single video.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
all_videos = []
|
raw_video_info_list = []
|
||||||
|
|
||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
@@ -164,14 +181,14 @@ def get_all_videos(channel_id):
|
|||||||
if not videos:
|
if not videos:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
all_videos.extend(videos)
|
raw_video_info_list.extend(videos)
|
||||||
page += 1
|
page += 1
|
||||||
|
|
||||||
return all_videos
|
return raw_video_info_list
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_views(video_id, auth_token = None):
|
def get_views(video_id: str, auth_token: str = None) -> int:
|
||||||
|
|
||||||
"""Get the number of views for a given video.
|
"""Get the number of views for a given video.
|
||||||
"""
|
"""
|
||||||
@@ -195,7 +212,7 @@ def get_views(video_id, auth_token = None):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_video_reactions(video_id, auth_token = None):
|
def get_video_reactions(video_id: str, auth_token: str = None) -> Tuple[Optional[int], Optional[int]]:
|
||||||
|
|
||||||
"""Get all reactions for a given video.
|
"""Get all reactions for a given video.
|
||||||
"""
|
"""
|
||||||
@@ -223,7 +240,7 @@ def get_video_reactions(video_id, auth_token = None):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_all_comments(video_id):
|
def get_all_comments(video_id: str) -> List[dict]:
|
||||||
|
|
||||||
"""Get a list of all comments for a single video.
|
"""Get a list of all comments for a single video.
|
||||||
|
|
||||||
@@ -277,7 +294,7 @@ def get_all_comments(video_id):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def append_comment_reactions(comment_info_list):
|
def append_comment_reactions(comment_info_list: List[dict]) -> List[dict]:
|
||||||
|
|
||||||
"""Get reaction data for each comment and insert ``'reactions'`` key into
|
"""Get reaction data for each comment and insert ``'reactions'`` key into
|
||||||
dict for each comment.
|
dict for each comment.
|
||||||
@@ -325,7 +342,7 @@ def append_comment_reactions(comment_info_list):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_recommended(video_title, video_id):
|
def get_recommended(video_title: str, video_id: str) -> List[dict]:
|
||||||
|
|
||||||
name = quote(video_title)
|
name = quote(video_title)
|
||||||
|
|
||||||
@@ -350,7 +367,7 @@ def get_recommended(video_title, video_id):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def normalized_name_to_video_info(normalized_name):
|
def normalized_name_to_video_info(normalized_name: str) -> dict:
|
||||||
|
|
||||||
video_url = f"lbry://{normalized_name}"
|
video_url = f"lbry://{normalized_name}"
|
||||||
|
|
||||||
@@ -372,7 +389,7 @@ def normalized_name_to_video_info(normalized_name):
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
def get_streaming_url(canonical_url):
|
def get_streaming_url(canonical_url: str) -> str:
|
||||||
|
|
||||||
json_data = {
|
json_data = {
|
||||||
"jsonrpc":"2.0",
|
"jsonrpc":"2.0",
|
||||||
|
|||||||
@@ -7,48 +7,111 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import typing
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from polyphemus import api
|
from polyphemus import api
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
class OdyseeChannel:
|
@dataclass
|
||||||
|
class Channel:
|
||||||
|
channel_id: str
|
||||||
|
created: datetime
|
||||||
|
subscribers: int
|
||||||
|
raw : str
|
||||||
|
title : typing.Optional[str] = None
|
||||||
|
description: typing.Optional[str] = None
|
||||||
|
cover_image: typing.Optional[str] = None
|
||||||
|
thumbnail_image: typing.Optional[str] = None
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Video:
|
||||||
|
canonical_url: str
|
||||||
|
streaming_url: str
|
||||||
|
type: str
|
||||||
|
claim_id: str
|
||||||
|
created: datetime
|
||||||
|
title: str
|
||||||
|
views: int
|
||||||
|
raw: str
|
||||||
|
text: typing.Optional[str] = None
|
||||||
|
thumbnail : typing.Optional[str] = None
|
||||||
|
channel_id: typing.Optional[str] = None
|
||||||
|
channel_name: typing.Optional[str] = None
|
||||||
|
duration: typing.Optional[int] = None
|
||||||
|
languages : typing.Optional[typing.List[str]] = None
|
||||||
|
tags: typing.Optional[typing.List[str]] = None
|
||||||
|
likes: typing.Optional[int] = None
|
||||||
|
dislikes: typing.Optional[int] = None
|
||||||
|
is_comment: bool = False
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Comment:
|
||||||
|
text: str
|
||||||
|
created: datetime
|
||||||
|
claim_id : str
|
||||||
|
video_claim_id : str
|
||||||
|
channel_id: str
|
||||||
|
channel_name : str
|
||||||
|
replies: int
|
||||||
|
likes: int
|
||||||
|
dislikes: int
|
||||||
|
raw : str
|
||||||
|
is_comment: bool = True
|
||||||
|
|
||||||
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
|
class OdyseeChannelScraper:
|
||||||
|
|
||||||
#-------------------------------------------------------------------------#
|
#-------------------------------------------------------------------------#
|
||||||
|
|
||||||
def __init__(self, channel_name, auth_token = None):
|
def __init__(self, channel_name: str, auth_token: str = None):
|
||||||
|
|
||||||
self._channel_name = unquote(channel_name)
|
self._channel_name = unquote(channel_name)
|
||||||
|
|
||||||
info = api.get_channel_info(channel_name = self._channel_name)
|
|
||||||
|
|
||||||
self.info = info
|
|
||||||
self._channel_id = self.info['channel_id']
|
|
||||||
|
|
||||||
if auth_token is None:
|
if auth_token is None:
|
||||||
self.auth_token = api.get_auth_token()
|
self.auth_token = api.get_auth_token()
|
||||||
else:
|
else:
|
||||||
self.auth_token = auth_token
|
self.auth_token = auth_token
|
||||||
|
|
||||||
self.info['subscribers'] = api.get_subscribers(
|
self._raw_channel_info = api.get_channel_info(channel_name = self._channel_name)
|
||||||
channel_id = self.info['channel_id'],
|
self._channel_id = self._raw_channel_info['channel_id']
|
||||||
auth_token = self.auth_token)
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------#
|
#-------------------------------------------------------------------------#
|
||||||
|
|
||||||
def get_all_videos(self):
|
def get_entity(self) -> Channel:
|
||||||
|
|
||||||
"""Return list of OdyseeVideo objects for all videos posted by the channel
|
subscribers = api.get_subscribers(
|
||||||
|
channel_id = self._channel_id,
|
||||||
|
auth_token = self.auth_token)
|
||||||
|
|
||||||
|
return Channel(
|
||||||
|
channel_id=self._raw_channel_info['channel_id'],
|
||||||
|
title=self._raw_channel_info['title'],
|
||||||
|
created=datetime.fromtimestamp(self._raw_channel_info['created']),
|
||||||
|
description=self._raw_channel_info['description'],
|
||||||
|
cover_image=self._raw_channel_info['cover_image'],
|
||||||
|
thumbnail_image=self._raw_channel_info['thumbnail_image'],
|
||||||
|
raw=self._raw_channel_info['raw'],
|
||||||
|
subscribers=subscribers)
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------#
|
||||||
|
|
||||||
|
def get_all_videos(self) -> typing.Generator[Video, None, None]:
|
||||||
|
|
||||||
|
"""Return list of Video objects for all videos posted by the channel
|
||||||
"""
|
"""
|
||||||
|
|
||||||
all_video_info = api.get_all_videos(channel_id=self.info['channel_id'])
|
raw_video_info_list = api.get_raw_video_info_list(channel_id=self._channel_id)
|
||||||
self.all_videos = (OdyseeVideo(video, self.auth_token) for video in all_video_info)
|
videos = (process_raw_video_info(raw_video_info, self.auth_token) for raw_video_info in raw_video_info_list)
|
||||||
|
|
||||||
return self.all_videos
|
return videos
|
||||||
|
|
||||||
#-------------------------------------------------------------------------#
|
#-------------------------------------------------------------------------#
|
||||||
|
|
||||||
def get_all_videos_and_comments(self):
|
def get_all_videos_and_comments(self) -> typing.Tuple[typing.List['Video'], typing.List['Comment']]:
|
||||||
|
|
||||||
"""Return list of OdyseeVideo and OdyseeComment objects for all videos
|
"""Return list of OdyseeVideo and OdyseeComment objects for all videos
|
||||||
posted by the channel and all comments posted to those videos
|
posted by the channel and all comments posted to those videos
|
||||||
@@ -56,133 +119,131 @@ class OdyseeChannel:
|
|||||||
|
|
||||||
all_videos = list(self.get_all_videos())
|
all_videos = list(self.get_all_videos())
|
||||||
|
|
||||||
all_comments = []
|
raw_comment_info_list = []
|
||||||
|
|
||||||
for video in all_videos:
|
for video in all_videos:
|
||||||
all_comments.extend(video.get_all_comments())
|
raw_comment_info_list.extend(api.get_all_comments(video_id=video.claim_id))
|
||||||
|
|
||||||
|
all_comments = [process_raw_comment_info(raw_comment_info) for raw_comment_info in raw_comment_info_list]
|
||||||
|
|
||||||
return all_videos, all_comments
|
return all_videos, all_comments
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
class OdyseeVideo:
|
def process_raw_video_info(raw_video_info: dict, auth_token = None) -> Video:
|
||||||
|
|
||||||
#-------------------------------------------------------------------------#
|
if auth_token is None:
|
||||||
|
auth_token = api.get_auth_token()
|
||||||
|
else:
|
||||||
|
auth_token = auth_token
|
||||||
|
|
||||||
|
# Handle edge cases
|
||||||
|
#.....................................................................#
|
||||||
|
|
||||||
|
if 'video' in raw_video_info['value']:
|
||||||
|
video_type = 'video'
|
||||||
|
duration = raw_video_info['value']['video'].get('duration')
|
||||||
|
elif 'audio' in raw_video_info['value']:
|
||||||
|
video_type = 'audio'
|
||||||
|
duration = raw_video_info['value']['audio'].get('duration')
|
||||||
|
elif 'claim_hash' in raw_video_info['value']:
|
||||||
|
video_type = 'repost'
|
||||||
|
duration = None
|
||||||
|
raw_video_info['value'] = raw_video_info['reposted_claim']['value']
|
||||||
|
raw_video_info['canonical_url'] = raw_video_info['reposted_claim']['canonical_url']
|
||||||
|
elif 'image' in raw_video_info['value']:
|
||||||
|
video_type = 'image'
|
||||||
|
duration = None
|
||||||
|
else:
|
||||||
|
video_type = 'other'
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
if 'signing_channel' in raw_video_info:
|
||||||
|
channel_name = raw_video_info['signing_channel'].get('name')
|
||||||
|
if 'claim_id' in raw_video_info['signing_channel']:
|
||||||
|
channel_id = raw_video_info['signing_channel']['claim_id']
|
||||||
|
else:
|
||||||
|
channel_id = raw_video_info['signing_channel']['channel_id']
|
||||||
|
else:
|
||||||
|
channel_name = None
|
||||||
|
channel_id = None
|
||||||
|
|
||||||
|
if 'release_time' in raw_video_info['value']:
|
||||||
|
created = raw_video_info['value']['release_time']
|
||||||
|
else:
|
||||||
|
created = raw_video_info['meta']['creation_timestamp']
|
||||||
|
|
||||||
|
if 'thumbnail' in raw_video_info['value']:
|
||||||
|
thumbnail = raw_video_info['value']['thumbnail'].get('url', None)
|
||||||
|
else:
|
||||||
|
thumbnail = None
|
||||||
|
|
||||||
def __init__(self, full_video_info, auth_token = None):
|
# Retrieve additional fields
|
||||||
|
#.....................................................................#
|
||||||
if auth_token is None:
|
|
||||||
self.auth_token = api.get_auth_token()
|
|
||||||
else:
|
|
||||||
self.auth_token = auth_token
|
|
||||||
|
|
||||||
# Handle edge cases
|
|
||||||
#.....................................................................#
|
|
||||||
|
|
||||||
if 'video' in full_video_info['value']:
|
|
||||||
video_type = 'video'
|
|
||||||
duration = full_video_info['value']['video'].get('duration')
|
|
||||||
elif 'audio' in full_video_info['value']:
|
|
||||||
video_type = 'audio'
|
|
||||||
duration = full_video_info['value']['audio'].get('duration')
|
|
||||||
elif 'claim_hash' in full_video_info['value']:
|
|
||||||
video_type = 'repost'
|
|
||||||
duration = None
|
|
||||||
full_video_info['value'] = full_video_info['reposted_claim']['value']
|
|
||||||
full_video_info['canonical_url'] = full_video_info['reposted_claim']['canonical_url']
|
|
||||||
elif 'image' in full_video_info['value']:
|
|
||||||
video_type = 'image'
|
|
||||||
duration = None
|
|
||||||
else:
|
|
||||||
video_type = 'other'
|
|
||||||
duration = None
|
|
||||||
|
|
||||||
if 'signing_channel' in full_video_info:
|
|
||||||
channel_name = full_video_info['signing_channel'].get('name')
|
|
||||||
if 'claim_id' in full_video_info['signing_channel']:
|
|
||||||
channel_id = full_video_info['signing_channel']['claim_id']
|
|
||||||
else:
|
|
||||||
channel_id = full_video_info['signing_channel']['channel_id']
|
|
||||||
else:
|
|
||||||
channel_name = None
|
|
||||||
channel_id = None
|
|
||||||
|
|
||||||
if 'release_time' in full_video_info['value']:
|
|
||||||
created = full_video_info['value']['release_time']
|
|
||||||
else:
|
|
||||||
created = full_video_info['meta']['creation_timestamp']
|
|
||||||
|
|
||||||
if 'thumbnail' in full_video_info['value']:
|
|
||||||
thumbnail = full_video_info['value']['thumbnail'].get('url', None)
|
|
||||||
else:
|
|
||||||
thumbnail = None
|
|
||||||
|
|
||||||
# Store relevant information in flat dict
|
|
||||||
#.....................................................................#
|
|
||||||
|
|
||||||
self.info = {
|
|
||||||
'canonical_url' : full_video_info['canonical_url'],
|
|
||||||
'type' : video_type,
|
|
||||||
'channel_id' : channel_id,
|
|
||||||
'channel_name' : channel_name,
|
|
||||||
'claim_id' : full_video_info['claim_id'],
|
|
||||||
'created' : int(created),
|
|
||||||
'text' : full_video_info['value'].get('description'),
|
|
||||||
'languages' : full_video_info['value'].get('languages'),
|
|
||||||
'tags' : full_video_info['value'].get('tags',[]),
|
|
||||||
'title' : full_video_info['value']['title'],
|
|
||||||
'duration' : duration,
|
|
||||||
'thumbnail' : thumbnail,
|
|
||||||
'is_comment' : False,
|
|
||||||
'raw' : json.dumps(full_video_info)}
|
|
||||||
|
|
||||||
self.claim_id = self.info['claim_id']
|
|
||||||
|
|
||||||
self.info['views'] = api.get_views(video_id=self.claim_id, auth_token = self.auth_token)
|
|
||||||
|
|
||||||
self.info['likes'], self.info['dislikes'] = api.get_video_reactions(
|
|
||||||
video_id = self.claim_id,
|
|
||||||
auth_token = self.auth_token)
|
|
||||||
|
|
||||||
self.info['streaming_url'] = api.get_streaming_url(self.info['canonical_url'])
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------#
|
|
||||||
|
|
||||||
def get_all_comments(self):
|
|
||||||
|
|
||||||
all_comment_info = api.get_all_comments(video_id=self.claim_id)
|
|
||||||
self.all_comments = (OdyseeComment(comment) for comment in all_comment_info)
|
|
||||||
|
|
||||||
return self.all_comments
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------#
|
|
||||||
|
|
||||||
def get_recommended(self):
|
claim_id = raw_video_info['claim_id']
|
||||||
|
|
||||||
recommended_video_info = api.get_recommended(
|
|
||||||
video_title=self.info['title'], video_id=self.claim_id)
|
|
||||||
recommended_videos = [OdyseeVideo(video_info, self.auth_token) for video_info in recommended_video_info]
|
|
||||||
|
|
||||||
return recommended_videos
|
views = api.get_views(video_id=claim_id, auth_token = auth_token)
|
||||||
|
|
||||||
|
likes, dislikes = api.get_video_reactions(
|
||||||
|
video_id = claim_id,
|
||||||
|
auth_token = auth_token)
|
||||||
|
|
||||||
|
streaming_url = api.get_streaming_url(raw_video_info['canonical_url'])
|
||||||
|
|
||||||
|
# Return Video object
|
||||||
|
#.....................................................................#
|
||||||
|
|
||||||
|
return Video(
|
||||||
|
canonical_url = raw_video_info['canonical_url'],
|
||||||
|
type = video_type,
|
||||||
|
channel_id = channel_id,
|
||||||
|
channel_name = channel_name,
|
||||||
|
claim_id = raw_video_info['claim_id'],
|
||||||
|
created = datetime.fromtimestamp(int(created)),
|
||||||
|
text = raw_video_info['value'].get('description'),
|
||||||
|
languages = raw_video_info['value'].get('languages'),
|
||||||
|
tags = raw_video_info['value'].get('tags',[]),
|
||||||
|
title = raw_video_info['value']['title'],
|
||||||
|
duration = duration,
|
||||||
|
thumbnail = thumbnail,
|
||||||
|
is_comment = False,
|
||||||
|
raw = json.dumps(raw_video_info),
|
||||||
|
views = views,
|
||||||
|
likes = likes,
|
||||||
|
dislikes = dislikes,
|
||||||
|
streaming_url = streaming_url)
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
class OdyseeComment:
|
def process_raw_comment_info(raw_comment_info: dict) -> Comment:
|
||||||
|
|
||||||
def __init__(self, full_comment_info):
|
return Comment(
|
||||||
|
text = raw_comment_info['comment'],
|
||||||
# Store relevant information in flat dict
|
created = raw_comment_info['timestamp'],
|
||||||
self.info = {
|
claim_id = raw_comment_info.get('comment_id'),
|
||||||
'text' : full_comment_info['comment'],
|
video_claim_id = raw_comment_info['claim_id'],
|
||||||
'created' : full_comment_info['timestamp'],
|
channel_id = raw_comment_info['channel_id'],
|
||||||
'claim_id' : full_comment_info.get('comment_id'),
|
channel_name = raw_comment_info['channel_name'],
|
||||||
'video_claim_id' : full_comment_info['claim_id'],
|
replies = raw_comment_info.get('replies', 0),
|
||||||
'channel_id' : full_comment_info['channel_id'],
|
likes = raw_comment_info['likes'],
|
||||||
'channel_name' : full_comment_info['channel_name'],
|
dislikes = raw_comment_info['dislikes'],
|
||||||
'replies' : full_comment_info.get('replies', 0),
|
is_comment = True,
|
||||||
'likes' : full_comment_info['likes'],
|
raw = json.dumps(raw_comment_info))
|
||||||
'dislikes' : full_comment_info['dislikes'],
|
|
||||||
'is_comment' : True,
|
|
||||||
'raw' : json.dumps(full_comment_info)}
|
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
|
def get_recommended(video: Video, auth_token: str = None) -> typing.List['Video']:
|
||||||
|
|
||||||
|
if auth_token is None:
|
||||||
|
auth_token = api.get_auth_token()
|
||||||
|
else:
|
||||||
|
auth_token = auth_token
|
||||||
|
|
||||||
|
recommended_video_info_list = api.get_recommended(
|
||||||
|
video_title=video.title, video_id=video.claim_id)
|
||||||
|
recommended_videos = [process_raw_video_info(raw_video_info, auth_token) for raw_video_info in recommended_video_info_list]
|
||||||
|
|
||||||
|
return recommended_videos
|
||||||
|
|
||||||
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
12
tests/api.py
12
tests/api.py
@@ -23,7 +23,7 @@ KWARGS_LIST = [
|
|||||||
('get_auth_token', []),
|
('get_auth_token', []),
|
||||||
('get_channel_info', ['channel_name']),
|
('get_channel_info', ['channel_name']),
|
||||||
('get_subscribers', ['channel_id', 'auth_token']),
|
('get_subscribers', ['channel_id', 'auth_token']),
|
||||||
('get_all_videos', ['channel_id']),
|
('get_raw_video_info_list', ['channel_id']),
|
||||||
('get_views', ['video_id', 'auth_token']),
|
('get_views', ['video_id', 'auth_token']),
|
||||||
('get_video_reactions', ['video_id', 'auth_token']),
|
('get_video_reactions', ['video_id', 'auth_token']),
|
||||||
('get_all_comments', ['video_id']),
|
('get_all_comments', ['video_id']),
|
||||||
@@ -34,12 +34,12 @@ KWARGS_LIST = [
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
@pytest.mark.parametrize( 'function_str,kwargs', KWARGS_LIST )
|
@pytest.mark.parametrize('function_str,kwargs', KWARGS_LIST)
|
||||||
def test_minimal_init( resources, function_str, kwargs ):
|
def test_minimal_init(resources, function_str, kwargs):
|
||||||
|
|
||||||
function = eval( f'api.{function_str}')
|
function = eval(f'api.{function_str}')
|
||||||
function_kwargs = { kwarg : resources[ kwarg ] for kwarg in kwargs }
|
function_kwargs = {kwarg: resources[kwarg] for kwarg in kwargs}
|
||||||
|
|
||||||
function( **function_kwargs )
|
function(**function_kwargs)
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
@@ -19,38 +19,35 @@ from polyphemus import base
|
|||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
class TestOdyseeChannel:
|
class TestOdyseeChannelScraper:
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def test_simple_init(self, resources):
|
def test_simple_init(self, resources):
|
||||||
self.channel = base.OdyseeChannel(channel_name = resources['channel_name'])
|
self.scraper = base.OdyseeChannelScraper(channel_name = resources['channel_name'])
|
||||||
|
|
||||||
|
def test_get_entity(self):
|
||||||
|
self.scraper.get_entity()
|
||||||
|
|
||||||
def test_get_all_videos(self):
|
def test_get_all_videos(self):
|
||||||
self.channel.get_all_videos()
|
self.scraper.get_all_videos()
|
||||||
|
|
||||||
def test_get_all_videos_and_comments(self):
|
def test_get_all_videos_and_comments(self):
|
||||||
self.channel.get_all_videos_and_comments()
|
self.scraper.get_all_videos_and_comments()
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
class TestOdyseeVideo:
|
def test_process_raw_video_info(resources):
|
||||||
|
video = base.process_raw_video_info(raw_video_info = resources['full_video_info'], auth_token = resources['auth_token'])
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def test_simple_init(self, resources):
|
|
||||||
self.video = base.OdyseeVideo(full_video_info = resources['full_video_info'])
|
|
||||||
|
|
||||||
def test_get_all_comments(self):
|
|
||||||
self.video.get_all_comments()
|
|
||||||
|
|
||||||
def test_get_recommended(self):
|
|
||||||
self.video.get_recommended()
|
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
|
|
||||||
class TestOdyseeComment:
|
def test_get_recommended(resources):
|
||||||
|
video = base.process_raw_video_info(raw_video_info = resources['full_video_info'], auth_token = resources['auth_token'])
|
||||||
|
base.get_recommended(video = video)
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
def test_simple_init(self, resources):
|
|
||||||
self.comment = base.OdyseeComment(full_comment_info = resources['full_comment_info'])
|
def test_process_raw_comment_info(resources):
|
||||||
|
base.process_raw_comment_info(raw_comment_info = resources['full_comment_info'])
|
||||||
|
|
||||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||||
@@ -91,7 +91,7 @@ def resources():
|
|||||||
normalized_name = NORMALIZED_NAME,
|
normalized_name = NORMALIZED_NAME,
|
||||||
canonical_url = CANONICAL_URL,
|
canonical_url = CANONICAL_URL,
|
||||||
full_video_info = FULL_VIDEO_INFO,
|
full_video_info = FULL_VIDEO_INFO,
|
||||||
full_comment_info = {**COMMENT_INFO_LIST[0], **{'likes' : 8, 'dislikes' : 0}},
|
full_comment_info = {**COMMENT_INFO_LIST[0], **{'likes': 8, 'dislikes': 0}},
|
||||||
comment_info_list = COMMENT_INFO_LIST,
|
comment_info_list = COMMENT_INFO_LIST,
|
||||||
auth_token = get_auth_token())
|
auth_token = get_auth_token())
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user