Add hash and screenshot methods; switch to more recent ytdl fork

This commit is contained in:
Logan Williams
2022-02-25 13:54:40 +01:00
parent d76e3bc7ec
commit 1eb17e4de5
7 changed files with 444 additions and 37 deletions

View File

@@ -5,6 +5,10 @@ import shutil
from dataclasses import dataclass
from abc import ABC, abstractmethod
from urllib.parse import urlparse
import hashlib
from selenium.common.exceptions import TimeoutException
from loguru import logger
import time
from storages import Storage
from utils import mkdir_if_not_exists
@@ -19,13 +23,16 @@ class ArchiveResult:
duration: float = None
title: str = None
timestamp: datetime.datetime = None
screenshot: str = None
hash: str = None
class Archiver(ABC):
name = "default"
def __init__(self, storage: Storage):
def __init__(self, storage: Storage, driver):
self.storage = storage
self.driver = driver
def __str__(self):
return self.__class__.__name__
@@ -46,6 +53,26 @@ class Archiver(ABC):
_id = _id.replace('unknown_video', 'jpg')
return f'{self.name}_{_id}{extension}'
def get_hash(self, filename):
f = open(filename, "rb")
bytes = f.read() # read entire file as bytes
hash = hashlib.sha256(bytes)
f.close()
return hash.hexdigest()
def get_screenshot(self, url):
key = self.get_key(urlparse(url).path.replace(
"/", "_") + datetime.datetime.utcnow().isoformat().replace(" ", "_") + ".png")
filename = 'tmp/' + key
self.driver.get(url)
time.sleep(6)
self.driver.save_screenshot(filename)
self.storage.upload(filename, key, extra_args={
'ACL': 'public-read', 'ContentType': 'image/png'})
return self.storage.get_cdn_url(key)
def get_thumbnails(self, filename, key, duration=None):
thumbnails_folder = filename.split('.')[0] + '/'
key_folder = key.split('.')[0] + '/'