WIP refactor logic

This commit is contained in:
msramalho
2022-11-15 15:00:52 +00:00
parent 6a0ce5ced1
commit 65dd155c90
12 changed files with 320 additions and 39 deletions

View File

@@ -0,0 +1,2 @@
from .enricher import Enricher
from .enricher_screenshot import ScreenshotEnricher

20
src/enrichers/enricher.py Normal file
View File

@@ -0,0 +1,20 @@
from __future__ import annotations
from dataclasses import dataclass
from abc import abstractmethod, ABC
from metadata import Metadata
from step import Step
@dataclass
class Enricher(Step, ABC):
name = "enricher"
def __init__(self, config: dict) -> None:
Step.__init__(self)
# only for typing...
def init(name: str, config: dict) -> Enricher:
return Step.init(name, config, Enricher)
@abstractmethod
def enrich(self, item: Metadata) -> Metadata: pass

View File

@@ -0,0 +1,53 @@
from . import Enricher
from metadata import Metadata
from loguru import logger
class ScreenshotEnricher(Enricher):
name = "screenshot"
@staticmethod
def configs() -> dict:
return {
"width": {"default": 1280, "help": "width of the screenshots"},
"height": {"default": 720, "help": "height of the screenshots"},
}
def enrich(self, item: Metadata) -> Metadata:
url = self.get_url(item)
print("enrich")
# driver = config.webdriver
# with driver as Webdriver(): # TODO: make a util
# #TODO: take screenshot
# pass
# logger.debug(f"getting screenshot for {url=}")
# key = self._get_key_from_url(url, ".png", append_datetime=True)
# filename = os.path.join(Storage.TMP_FOLDER, key)
# # Accept cookies popup dismiss for ytdlp video
# if 'facebook.com' in url:
# try:
# logger.debug(f'Trying fb click accept cookie popup for {url}')
# self.driver.get("http://www.facebook.com")
# foo = self.driver.find_element(By.XPATH, "//button[@data-cookiebanner='accept_only_essential_button']")
# foo.click()
# logger.debug(f'fb click worked')
# # linux server needs a sleep otherwise facebook cookie won't have worked and we'll get a popup on next page
# time.sleep(2)
# except:
# logger.warning(f'Failed on fb accept cookies for url {url}')
# try:
# self.driver.get(url)
# time.sleep(6)
# except TimeoutException:
# logger.info("TimeoutException loading page for screenshot")
# self.driver.save_screenshot(filename)
# self.storage.upload(filename, key, extra_args={'ACL': 'public-read', 'ContentType': 'image/png'})
# cdn_url = self.storage.get_cdn_url(key)
# self.add_to_media(cdn_url, key)
# return cdn_url