REMOVES vk_extractor until further notice

2026-06-12 05:08:28 +03:00 · 2025-06-02 12:06:02 +01:00
parent ac24fd8f49
commit 2d7206f99d
8 changed files with 618 additions and 872 deletions
--- a/src/auto_archiver/modules/vk_extractor/init.py
+++ b/src/auto_archiver/modules/vk_extractor/init.py
@@ -1 +0,0 @@
-from .vk_extractor import VkExtractor
--- a/src/auto_archiver/modules/vk_extractor/manifest.py
+++ b/src/auto_archiver/modules/vk_extractor/manifest.py
@@ -1,37 +0,0 @@
-{
-    "name": "VKontakte Extractor",
-    "type": ["extractor"],
-    "requires_setup": True,
-    "depends": ["core", "utils"],
-    "dependencies": {
-        "python": ["loguru", "vk_url_scraper"],
-    },
-    "configs": {
-        "username": {"required": True, "help": "valid VKontakte username"},
-        "password": {"required": True, "help": "valid VKontakte password"},
-        "session_file": {
-            "default": "secrets/vk_config.v2.json",
-            "help": "valid VKontakte password",
-        },
-    },
-    "description": """
-The `VkExtractor` fetches posts, text, and images from VK (VKontakte) social media pages. 
-This archiver is specialized for `/wall` posts and uses the `VkScraper` library to extract 
-and download content. Note that VK videos are handled separately by the `YTDownloader`.
-
-### Features
- Extracts text, timestamps, and metadata from VK `/wall` posts.
- Downloads associated images and attaches them to the resulting `Metadata` object.
- Processes multiple segments of VK URLs that contain mixed content (e.g., wall, photo).
- Outputs structured metadata and media using `Metadata` and `Media` objects.
-
-### Setup
-To use the `VkArchiver`, you must provide valid VKontakte login credentials and session information:
- **Username**: A valid VKontakte account username.
- **Password**: The corresponding password for the VKontakte account.
- **Session File**: Optional. Path to a session configuration file (`.json`) for persistent VK login.
-
-Credentials can be set in the configuration file or directly via environment variables. Ensure you 
-have access to the VKontakte API by creating an account at [VKontakte](https://vk.com/).
-""",
-}
--- a/src/auto_archiver/modules/vk_extractor/vk_extractor.py
+++ b/src/auto_archiver/modules/vk_extractor/vk_extractor.py
@@ -1,43 +0,0 @@
-from loguru import logger
-from vk_url_scraper import VkScraper
-
-from auto_archiver.utils.misc import dump_payload
-from auto_archiver.core import Extractor
-from auto_archiver.core import Metadata, Media
-
-
-class VkExtractor(Extractor):
-    """ "
-    VK videos are handled by YTDownloader, this archiver gets posts text and images.
-    Currently only works for /wall posts
-    """
-
-    def setup(self) -> None:
-        self.vks = VkScraper(self.username, self.password, session_file=self.session_file)
-
-    def download(self, item: Metadata) -> Metadata:
-        url = item.get_url()
-
-        if "vk.com" not in item.netloc:
-            return False
-
-        # some urls can contain multiple wall/photo/... parts and all will be fetched
-        vk_scrapes = self.vks.scrape(url)
-        if not len(vk_scrapes):
-            return False
-        logger.debug(f"VK: got {len(vk_scrapes)} scraped instances")
-
-        result = Metadata()
-        for scrape in vk_scrapes:
-            if not result.get_title():
-                result.set_title(scrape["text"])
-            if not result.get_timestamp():
-                result.set_timestamp(scrape["datetime"])
-
-        result.set_content(dump_payload(vk_scrapes))
-
-        filenames = self.vks.download_media(vk_scrapes, self.tmp_dir)
-        for filename in filenames:
-            result.add_media(Media(filename))
-
-        return result.success("vk")