mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
check if exists
This commit is contained in:
@@ -225,7 +225,6 @@ class Archiver(ABC):
|
|||||||
key = key_folder + fname
|
key = key_folder + fname
|
||||||
|
|
||||||
self.storage.upload(thumbnail_filename, key)
|
self.storage.upload(thumbnail_filename, key)
|
||||||
|
|
||||||
cdn_url = self.storage.get_cdn_url(key)
|
cdn_url = self.storage.get_cdn_url(key)
|
||||||
cdn_urls.append(cdn_url)
|
cdn_urls.append(cdn_url)
|
||||||
|
|
||||||
|
|||||||
@@ -28,12 +28,21 @@ class VkArchiver(Archiver):
|
|||||||
# detect URLs that this archiver can handle
|
# detect URLs that this archiver can handle
|
||||||
has_wall = self.wall_pattern.search(url)
|
has_wall = self.wall_pattern.search(url)
|
||||||
if has_wall:
|
if has_wall:
|
||||||
wall_url = f'https://vk.com/{has_wall[0]}'
|
wall_id = has_wall[0]
|
||||||
logger.info(f"found valid wall id from {url=} : {wall_url=}")
|
wall_url = f'https://vk.com/{wall_id}'
|
||||||
return self.archive_wall(wall_url, check_if_exists)
|
logger.info(f"found valid wall id from {url=} : {wall_id=}")
|
||||||
|
key = self.get_html_key(wall_url)
|
||||||
|
|
||||||
|
# if check if exists will not download again
|
||||||
|
if check_if_exists and self.storage.exists(key):
|
||||||
|
screenshot = self.get_screenshot(wall_url)
|
||||||
|
cdn_url = self.storage.get_cdn_url(key)
|
||||||
|
return ArchiveResult(status="already archived", cdn_url=cdn_url, screenshot=screenshot)
|
||||||
|
|
||||||
|
return self.archive_wall(wall_url)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def archive_wall(self, wall_url, check_if_exists):
|
def archive_wall(self, wall_url):
|
||||||
res = self.vk_session.http.get(wall_url).text
|
res = self.vk_session.http.get(wall_url).text
|
||||||
soup = BeautifulSoup(res, "html.parser")
|
soup = BeautifulSoup(res, "html.parser")
|
||||||
image_urls = []
|
image_urls = []
|
||||||
|
|||||||
Reference in New Issue
Block a user