diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/tiktok.py b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/tiktok.py new file mode 100644 index 0000000..82b4f21 --- /dev/null +++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/tiktok.py @@ -0,0 +1,50 @@ +from contextlib import suppress +from typing import Mapping +from auto_archiver.modules.antibot_extractor_enricher.dropin import Dropin + + +class TikTokDropin(Dropin): + """ + A class to handle TikTok drop-in functionality for the antibot extractor enricher module. + """ + + def documentation() -> Mapping[str, str]: + return { + "name": "TikTok Dropin", + "description": "Handles TikTok posts and works without authentication.", + "site": "tiktok.com", + } + + @staticmethod + def suitable(url: str) -> bool: + return "tiktok.com" in url + + @staticmethod + def images_selectors() -> str: + return '[data-e2e="detail-photo"] img' + + @staticmethod + def video_selectors() -> str: + return None # TikTok videos should be handled by the generic extractor + + def open_page(self, url) -> bool: + self.sb.wait_for_ready_state_complete() + self._close_cookies_banner() + # TODO: implement login logic + if url != self.sb.get_current_url(): + return False + return True + + def hit_auth_wall(self) -> bool: + return False # TikTok does not require authentication for public posts + + def _close_cookies_banner(self): + with suppress(Exception): # selenium.common.exceptions.JavascriptException + self.sb.execute_script(""" + document + .querySelector("tiktok-cookie-banner") + .shadowRoot.querySelector("faceplate-dialog") + .querySelector("button") + .click() + """) + self.sb.click_if_visible("Skip") diff --git a/tests/extractors/test_antibot_extractor_enricher.py b/tests/extractors/test_antibot_extractor_enricher.py index a8a51dd..3ec34f8 100644 --- a/tests/extractors/test_antibot_extractor_enricher.py +++ b/tests/extractors/test_antibot_extractor_enricher.py @@ -88,6 +88,13 @@ class TestAntibotExtractorEnricher(TestExtractorBase): 5, 0, ), + ( + "https://www.tiktok.com/@tracy_2424/photo/7418200173953830162", + "TikTok", + "Dito ko lang", + 1, + 0, + ), ], ) def test_download_pages_with_media(self, setup_module, make_item, url, in_title, in_text, image_count, video_count):