From 176ce7e8da2d4c853053bdfa6d38290af6999317 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Wed, 18 Jan 2023 21:37:29 +0000 Subject: [PATCH] vk cleanup --- src/archivers/vk_archiverv2.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/archivers/vk_archiverv2.py b/src/archivers/vk_archiverv2.py index 147424d..ec06808 100644 --- a/src/archivers/vk_archiverv2.py +++ b/src/archivers/vk_archiverv2.py @@ -1,7 +1,5 @@ -import re, json, mimetypes, os - from loguru import logger -from vk_url_scraper import VkScraper, DateTimeEncoder +from vk_url_scraper import VkScraper from metadata import Metadata from media import Media @@ -15,8 +13,6 @@ class VkArchiver(Archiverv2): Currently only works for /wall posts """ name = "vk_archiver" - wall_pattern = re.compile(r"(wall.{0,1}\d+_\d+)") - photo_pattern = re.compile(r"(photo.{0,1}\d+_\d+)") def __init__(self, config: dict) -> None: super().__init__(config) @@ -40,6 +36,7 @@ class VkArchiver(Archiverv2): # some urls can contain multiple wall/photo/... parts and all will be fetched vk_scrapes = self.vks.scrape(url) if not len(vk_scrapes): return False + logger.debug(f"VK: got {len(vk_scrapes)} scraped instances") result = Metadata() for scrape in vk_scrapes: @@ -50,16 +47,6 @@ class VkArchiver(Archiverv2): result.set_content(dump_payload(vk_scrapes)) - textual_output = "" - title, datetime = vk_scrapes[0]["text"], vk_scrapes[0]["datetime"] - urls_found = [] - for scrape in vk_scrapes: - textual_output += f"id: {scrape['id']}
time utc: {scrape['datetime']}
text: {scrape['text']}
payload: {dump_payload(scrape['payload'])}


" - title = scrape["text"] if len(title) == 0 else title - datetime = scrape["datetime"] if not datetime else datetime - for attachments in scrape["attachments"].values(): - urls_found.extend(attachments) - filenames = self.vks.download_media(vk_scrapes, item.get_tmp_dir()) for filename in filenames: result.add_media(Media(filename))