polyphemus/polyphemus/api.py

# -*- coding: UTF-8 -*-

"""Functions to request and process information from Odysee APIs
"""

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

import json
from urllib.parse import quote
from typing import Tuple, Optional, List
import time

import requests

# API endpoints for Odysee data
#-----------------------------------------------------------------------------#

BACKEND_API_URL = 'https://api.na-backend.odysee.com/api/v1/proxy'
SUBSCRIBER_API_URL = 'https://api.odysee.com/subscription/sub_count'
VIEW_API_URL = 'https://api.odysee.com/file/view_count'
REACTION_API_URL = 'https://api.odysee.com/reaction/list'
COMMENT_API_URL = 'https://comments.odysee.com/api/v2'
RECOMMENDATION_API_URL = 'https://recsys.odysee.com/search'
NEW_USER_API_URL = 'https://api.odysee.com/user/new'

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def make_request(request: str, kwargs: dict) -> requests.Response:

    """Wrapper for retrying request multiple times.
    """

    if request not in [requests.get, requests.post]:
        msg = f'`request` argument must be either `requests.get` or `requests.post`, not {type(request)}'
        raise ValueError(msg)

    if 'timeout' not in kwargs:
        kwargs['timeout'] = 15

    n_retries = 0

    response = requests.Response()
    response.status_code = 418

    while n_retries < 5:
        time.sleep(2 ** n_retries - 1)
        try:
            response = request(**kwargs)
            if response.status_code == 200:
                return response
            else:
                n_retries += 1
        except Exception:
            n_retries += 1

    if response.status_code != 200:
        msg = f'Maximum number of retries reached for request {request} with kwargs {kwargs}: status code {response.status_code}'
        raise ValueError(msg)

    return response

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_auth_token() -> str:

    """Get a fresh authorization token, to use for API calls that require it.

    Note: calling this function many times in quick succession may result in a
    503 error.
    """

    response = make_request(
        request = requests.post,
        kwargs = {
            'url' : NEW_USER_API_URL})

    auth_token = json.loads(response.text)['data']['auth_token']

    return auth_token

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_channel_info(channel_name: str) -> dict:

    """Get the channel information and ID from the channel name.
    """

    channel_url = f'lbry://@{channel_name}'

    json_data = {
        "jsonrpc":"2.0",
        "method":"resolve",
        "params":{
            "urls":[channel_url]}}

    response = make_request(
        request = requests.post,
        kwargs = {
            'url' : BACKEND_API_URL,
            'json': json_data})

    result = json.loads(response.text)

    info = result['result'][channel_url]

    info = {
        'channel_id' : info['claim_id'],
        'title' : info['value'].get('title'),
        'created': info['timestamp'],
        'description': info['value'].get('description'),
        'cover_image': info['value'].get('cover',{}).get('url'),
        'thumbnail_image': info['value'].get('thumbnail',{}).get('url'),
        'raw' : response.text}

    return info

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_subscribers(channel_id: str, auth_token: str = None) -> int:

    """Get the number of subscribers for a channel.
    """

    if auth_token is None:
        auth_token = get_auth_token()

    json_data = {
        'auth_token': auth_token,
        'claim_id': channel_id }

    response = make_request(
        request = requests.post,
        kwargs = {
            'url' : SUBSCRIBER_API_URL,
            'data': json_data})

    result = json.loads(response.text)
    subscribers = result['data'][0]

    return subscribers

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_raw_video_info_list(channel_id: str) -> dict:

    """Get a list of all videos posted by a specified channel name.

    Returns
    -------
    raw_video_info_list: list<dict>
        List of dictionaries, with each dict corresponding to a JSON response
        containing data about a single video.

    """

    raw_video_info_list = []

    page = 1

    while True:

        json_data = {
            "jsonrpc":"2.0",
            "method":"claim_search",
            "params":{
                "page_size":30,
                "page":page,
                "order_by":["release_time"],
                "channel_ids":[channel_id]}}

        response = make_request(
            request = requests.post,
            kwargs = {
                'url' : BACKEND_API_URL,
                'json': json_data})

        result = json.loads(response.text)

        videos = result['result']['items']

        if not videos:
            break
        else:
            raw_video_info_list.extend(videos)
            page += 1

    return raw_video_info_list

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_views(video_id: str, auth_token: str = None) -> int:

    """Get the number of views for a given video.
    """

    if auth_token is None:
        auth_token = get_auth_token()

    params = {
        'auth_token': auth_token,
        'claim_id': video_id }

    response = make_request(
        request = requests.get,
        kwargs = {
            'url' : VIEW_API_URL,
            'params': params})

    views = json.loads(response.text)['data'][0]

    return views

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_video_reactions(video_id: str, auth_token: str = None) -> Tuple[Optional[int], Optional[int]]:

    """Get all reactions for a given video.
    """

    if auth_token is None:
        auth_token = get_auth_token()

    post_data = {
        'auth_token': auth_token,
        'claim_ids': video_id }

    response = make_request(
        request = requests.post,
        kwargs = {
            'url' : REACTION_API_URL,
            'data': post_data})

    result = json.loads(response.text)

    if result['success']:
        reactions = result['data']['others_reactions'][video_id]
        return reactions['like'], reactions['dislike']
    else:
        return None, None

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_all_comments(video_id: str) -> List[dict]:

    """Get a list of all comments for a single video.

    Parameters
    ----------
    video_id: str
        Claim ID for the video whose comments are to be scraped
        e.g. ``'84d2a91e910bee523af5422439a639f677b9c78f'``

    Returns
    -------
    all_comments: list<dict>
        List of dictionaries, with each dict corresponding to a JSON response
        containing data about a single comment for the specified video.
    """

    all_comments = []

    page = 1

    while True:

        json_data = {
            "jsonrpc":"2.0",
            "id":1,
            "method":"comment.List",
            "params":{
                "page":page,
                "claim_id":video_id,
                "page_size":10,
                "top_level":False,
                "sort_by":3}}

        response = make_request(
            request = requests.post,
            kwargs = {
                'url' : COMMENT_API_URL,
                'json': json_data})

        result = json.loads(response.text)

        if 'items' not in result['result']:
            break
        else:
            _comments = result['result']['items']
            comments = append_comment_reactions(comment_info_list = _comments)
            all_comments.extend(comments)
            page += 1

    return all_comments

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def append_comment_reactions(comment_info_list: List[dict]) -> List[dict]:

    """Get reaction data for each comment and insert ``'reactions'`` key into
    dict for each comment.

    Parameters
    ----------
    comment_info_list: list<dict>
        List of dictionaries, with each dict corresponding to a JSON response
        containing data about a single comment for the specified video.

    Returns
    -------
    comments: list<dict>
        List of dictionaries, with each dict corresponding to a JSON response
        containing data about a single comment for the specified video, with
        additional ``'reactions'`` field containing reaction information for
        each comment.

    """

    comment_ids = ','.join([c['comment_id'] for c in comment_info_list])

    json_data = {
        "jsonrpc":"2.0",
        "id":1,
        "method":"reaction.List",
        "params":{
            "comment_ids":comment_ids}}

    response = make_request(
        request = requests.post,
        kwargs = {
            'url' : COMMENT_API_URL,
            'json': json_data})

    result = json.loads(response.text)

    reactions = result['result']['others_reactions']

    for comment in comment_info_list:
        comment['likes'] = reactions[comment['comment_id']]['like']
        comment['dislikes'] = reactions[comment['comment_id']]['dislike']

    return comment_info_list

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_recommended(video_title: str, video_id: str) -> List[dict]:

    name = quote(video_title)

    params = {
        's':name,
        'size':'20',
        'from':'0',
        'related_to':video_id}

    response = make_request(
        request = requests.get,
        kwargs = {
            'url' : RECOMMENDATION_API_URL,
            'params': params})

    result = json.loads(response.text)

    recommended_video_info = [ normalized_name_to_video_info(r['name']) for r in result]
    recommended_video_info = [vi for vi in recommended_video_info if ((vi.get('value_type') == 'stream') & any(key in vi.get('value', []) for key in ('video', 'audio')))]

    return recommended_video_info

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def normalized_name_to_video_info(normalized_name: str) -> dict:

    video_url = f"lbry://{normalized_name}"

    json_data = {
        "jsonrpc":"2.0",
        "method":"resolve",
        "params":{
            "urls":[video_url]}}

    response = make_request(
        request = requests.post,
        kwargs = {
            'url' : BACKEND_API_URL,
            'json': json_data})

    result = json.loads(response.text)

    return result['result'][video_url]

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

def get_streaming_url(canonical_url: str) -> str:

    json_data = {
        "jsonrpc":"2.0",
        "method":"get",
        "params":{
            "uri":canonical_url}}

    response = make_request(
        request = requests.post,
        kwargs = {
            'url' : BACKEND_API_URL,
            'json': json_data})

    video_url = json.loads(response.text)['result'].get('streaming_url')

    return video_url

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#