diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py new file mode 100644 index 0000000..c46ca17 --- /dev/null +++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py @@ -0,0 +1,77 @@ +from contextlib import suppress +from auto_archiver.core.metadata import Metadata +from auto_archiver.modules.antibot_extractor_enricher.dropin import Dropin + +from loguru import logger + + +class RedditDropin(Dropin): + """ + A class to handle Reddit drop-in functionality for the antibot extractor enricher module. + """ + + @staticmethod + def suitable(url: str) -> bool: + return "reddit.com" in url + + @staticmethod + def images_selectors() -> str: + return "shreddit-post img" + + @staticmethod + def video_selectors() -> str: + return "shreddit-post video, shreddit-post source" + + def open_page(self, url) -> bool: + if self.sb.is_text_visible("You've been blocked by network security."): + self._login() + if url != self.sb.get_current_url(): + self.sb.open(url) + return True + + def _login(self): + self.sb.click_link_text("Log in") + self.sb.wait_for_ready_state_complete() + self._close_cookies_banner() + + username, password = self._get_username_password("reddit.com") + logger.debug("RedditDropin Logging in to VK with username: {}", username) + + self.sb.type("#login-username", username) + self.sb.type("#login-password", password) + + elem = self.sb.find_element("button.login") + self.sb.execute_script("arguments[0].scrollIntoView(true);", elem) + self.sb.slow_click("button.login") + self.sb.wait_for_ready_state_complete() + + if "https://www.reddit.com/login/" in self.sb.get_current_url(): + self.sb.sleep(5) + self.sb.wait_for_ready_state_complete() + + if self.sb.is_text_visible("You've been blocked by network security."): + self.sb.click_link_text("Log in") + self.sb.wait_for_ready_state_complete() + if self.sb.is_text_visible("Welcome back"): + logger.debug("RedditDropin Login successful") + self.sb.click_if_visible("this link") + + def _close_cookies_banner(self): + with suppress(Exception): # selenium.common.exceptions.JavascriptException + self.sb.execute_script(""" + document + .querySelector("reddit-cookie-banner") + .shadowRoot.querySelector("faceplate-dialog") + .querySelector("#accept-all-cookies-button button") + .click() + """) + + @logger.catch + def add_extra_media(self, to_enrich: Metadata) -> tuple[int, int]: + filtered_urls = self.sb.execute_script(f""" + return [...document.querySelectorAll("{self.video_selectors()}")] + .map(el => el.src || el.href) + .filter(url => url && /\.(m3u8|mpd|ism)$/.test(url)); + """) + logger.debug("RedditDropin Found {} video URLs", len(filtered_urls)) + return 0, self._download_videos_with_ytdlp(filtered_urls, to_enrich)