adds tests in minor improvements

This commit is contained in:
msramalho
2025-06-07 19:58:18 +01:00
parent 48c1ab3c1f
commit d13a5ef003
5 changed files with 111 additions and 18 deletions

View File

@@ -93,7 +93,6 @@ class AntibotExtractorEnricher(Extractor, Enricher):
def enrich(self, to_enrich: Metadata, custom_data_dir: bool = True) -> bool:
using_user_data_dir = self.user_data_dir if custom_data_dir else None
url = to_enrich.get_url()
# TODO: implement cookies auth = self.auth_for_site(url) and combine with if UrlUtil.is_auth_wall(url) like in ScreenshotEnricher
url_sample = url[:75]
try:

View File

@@ -24,7 +24,6 @@ class Dropin:
def suitable(url: str) -> bool:
"""
Check if the URL is suitable for processing with this dropin.
:param url: The URL to check.
:return: True if the URL is suitable for processing, False otherwise.
"""
@@ -33,7 +32,7 @@ class Dropin:
@staticmethod
def sanitize_url(url: str) -> str:
"""
Used to clean unnecessary URL parameters OR unfurl redirect links
Used to clean URLs before processing them.
"""
return url
@@ -48,8 +47,6 @@ class Dropin:
def add_extra_media(self, to_enrich: Metadata) -> tuple[int, int]:
"""
Extract image and/or video data from the currently open post with SeleniumBase. Media is added to the `to_enrich` Metadata object.
:return: A tuple (number of Images added, number of Videos added).
"""
raise NotImplementedError("This method should be implemented in the subclass")

View File

@@ -21,19 +21,14 @@ class VkDropin(Dropin):
@staticmethod
def suitable(url: str) -> bool:
"""
Only suitable for VK URLs that match the wall, photo, or video patterns.
Otherwise, for example, for pages a large amount of media may be downloaded.
"""
return "vk.com" in url
@staticmethod
def sanitize_url(url: str) -> str:
# TODO: test method
"""
Transforms modal URLs like 'https://vk.com/page_name?w=wall-123456_7890' to 'https://vk.com/wall-123456_7890'
"""
for pattern in [VkDropin.WALL_PATTERN, VkDropin.PHOTO_PATTERN, VkDropin.VIDEO_PATTERN]:
for pattern in [VkDropin.WALL_PATTERN, VkDropin.VIDEO_PATTERN, VkDropin.PHOTO_PATTERN]:
match = pattern.search(url)
if match:
return f"https://vk.com/{match.group(1)}"
@@ -49,6 +44,7 @@ class VkDropin(Dropin):
return True
def _login(self) -> bool:
# TODO: test method
self.sb.activate_cdp_mode("https://vk.com")
self.sb.wait_for_ready_state_complete()
if "/feed" in self.sb.get_current_url():
@@ -91,8 +87,10 @@ class VkDropin(Dropin):
:return: A tuple (number of Images added, number of Videos added).
"""
max_videos = self.extractor.max_download_videos
video_urls = [v.get_attribute("href") for v in self.sb.find_elements('a[href*="/video-"]')][:max_videos]
video_urls = [v.get_attribute("href") for v in self.sb.find_elements('a[href*="/video-"]')]
if type(self.extractor.max_download_videos) is int:
video_urls = video_urls[: self.extractor.max_download_videos]
if not video_urls:
return 0, 0
@@ -100,7 +98,7 @@ class VkDropin(Dropin):
ydl_options = [
"-o",
os.path.join(self.extractor.tmp_dir, "%(id)s.%(ext)s"),
# "--quiet",
"--quiet",
"--no-playlist",
"--no-write-subs",
"--no-write-auto-subs",