Files
auto-archiver/archivers/base_archiver.py
2022-02-23 16:07:58 +01:00

107 lines
3.0 KiB
Python

import os
import ffmpeg
import datetime
from dataclasses import dataclass
from abc import ABC, abstractmethod
from urllib.parse import urlparse
from storages import Storage
@dataclass
class ArchiveResult:
status: str
cdn_url: str = None
thumbnail: str = None
thumbnail_index: str = None
duration: float = None
title: str = None
timestamp: datetime.datetime = None
class Archiver(ABC):
name = "default"
def __init__(self, storage: Storage):
self.storage = storage
def __str__(self):
return self.__class__.__name__
@abstractmethod
def download(self, url, check_if_exists=False): pass
def get_netloc(self, url):
return urlparse(url).netloc
def get_key(self, filename):
"""
returns a key in the format "[archiverName]_[filename]" includes extension
"""
tail = os.path.split(filename)[1] # returns filename.ext from full path
_id, extension = os.path.splitext(tail) # returns [filename, .ext]
if 'unknown_video' in _id:
_id = _id.replace('unknown_video', 'jpg')
return f'{self.name}_{_id}{extension}'
def get_thumbnails(self, filename, key, duration=None):
thumbnails_folder = filename.split('.')[0] + '/'
key_folder = key.split('.')[0] + '/'
if not os.path.exists(thumbnails_folder):
os.mkdir(thumbnails_folder)
fps = 0.5
if duration is not None:
duration = float(duration)
if duration < 60:
fps = 10.0 / duration
elif duration < 120:
fps = 20.0 / duration
else:
fps = 40.0 / duration
stream = ffmpeg.input(filename)
stream = ffmpeg.filter(stream, 'fps', fps=fps).filter('scale', 512, -1)
stream.output(thumbnails_folder + 'out%d.jpg').run()
thumbnails = os.listdir(thumbnails_folder)
cdn_urls = []
for fname in thumbnails:
if fname[-3:] == 'jpg':
thumbnail_filename = thumbnails_folder + fname
key = key_folder + fname
cdn_url = self.storage.get_cdn_url(key)
self.storage.upload(thumbnail_filename, key)
cdn_urls.append(cdn_url)
os.remove(thumbnail_filename)
if len(cdn_urls) == 0:
return ('None', 'None')
key_thumb = cdn_urls[int(len(cdn_urls) * 0.1)]
index_page = f'''<html><head><title>{filename}</title></head>
<body>'''
for t in cdn_urls:
index_page += f'<img src="{t}" />'
index_page += f"</body></html>"
index_fname = thumbnails_folder + 'index.html'
with open(index_fname, 'w') as f:
f.write(index_page)
thumb_index = key_folder + 'index.html'
self.storage.upload(index_fname, thumb_index, extra_args={'ACL': 'public-read', 'ContentType': 'text/html'})
thumb_index_cdn_url = self.storage.get_cdn_url(thumb_index)
return (key_thumb, thumb_index_cdn_url)