mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 20:58:29 +03:00
removes rearchiving logic
This commit is contained in:
@@ -27,11 +27,6 @@ class Archiver(Step):
|
||||
# used to clean unnecessary URL parameters OR unfurl redirect links
|
||||
return url
|
||||
|
||||
def is_rearchivable(self, url: str) -> bool:
|
||||
# archivers can signal if it does not make sense to rearchive a piece of content
|
||||
# default is rearchiving
|
||||
return True
|
||||
|
||||
def _guess_file_type(self, path: str) -> str:
|
||||
"""
|
||||
Receives a URL or filename and returns global mimetype like 'image' or 'video'
|
||||
|
||||
@@ -19,10 +19,6 @@ class TelegramArchiver(Archiver):
|
||||
def configs() -> dict:
|
||||
return {}
|
||||
|
||||
def is_rearchivable(self, url: str) -> bool:
|
||||
# telegram posts are static
|
||||
return False
|
||||
|
||||
def download(self, item: Metadata) -> Metadata:
|
||||
url = item.get_url()
|
||||
# detect URLs that we definitely cannot handle
|
||||
|
||||
@@ -38,10 +38,6 @@ class TelethonArchiver(Archiver):
|
||||
}
|
||||
}
|
||||
|
||||
def is_rearchivable(self, url: str) -> bool:
|
||||
# telegram posts are static
|
||||
return False
|
||||
|
||||
def setup(self) -> None:
|
||||
"""
|
||||
1. trigger login process for telegram or proceed if already saved in a session file
|
||||
|
||||
@@ -16,10 +16,6 @@ class TiktokArchiver(Archiver):
|
||||
def configs() -> dict:
|
||||
return {}
|
||||
|
||||
def is_rearchivable(self, url: str) -> bool:
|
||||
# TikTok posts are static
|
||||
return False
|
||||
|
||||
def download(self, item: Metadata) -> Metadata:
|
||||
url = item.get_url()
|
||||
if 'tiktok.com' not in url:
|
||||
|
||||
@@ -37,9 +37,8 @@ class TwitterArchiver(Archiver):
|
||||
# https://twitter.com/MeCookieMonster/status/1617921633456640001?s=20&t=3d0g4ZQis7dCbSDg-mE7-w
|
||||
return self.link_clean_pattern.sub("\\1", url)
|
||||
|
||||
def is_rearchivable(self, url: str) -> bool:
|
||||
# Twitter posts are static (for now)
|
||||
return False
|
||||
def best_quality_url(self, url: str) -> str:
|
||||
return re.sub(r"name=(\w+)", "name=orig", url, 1)
|
||||
|
||||
def download(self, item: Metadata) -> Metadata:
|
||||
"""
|
||||
@@ -78,7 +77,7 @@ class TwitterArchiver(Archiver):
|
||||
media.set("src", variant.url)
|
||||
mimetype = variant.contentType
|
||||
elif type(tweet_media) == Photo:
|
||||
media.set("src", tweet_media.fullUrl.replace('name=large', 'name=orig').replace('name=small', 'name=orig'))
|
||||
media.set("src", self.best_quality_url(tweet_media.fullUrl))
|
||||
mimetype = "image/jpeg"
|
||||
else:
|
||||
logger.warning(f"Could not get media URL of {tweet_media}")
|
||||
@@ -118,6 +117,7 @@ class TwitterArchiver(Archiver):
|
||||
|
||||
for i, u in enumerate(urls):
|
||||
media = Media(filename="")
|
||||
u = self.best_quality_url(u)
|
||||
media.set("src", u)
|
||||
ext = ""
|
||||
if (mtype := mimetypes.guess_type(UrlUtil.remove_get_parameters(u))[0]):
|
||||
|
||||
@@ -27,10 +27,6 @@ class VkArchiver(Archiver):
|
||||
"session_file": {"default": "secrets/vk_config.v2.json", "help": "valid VKontakte password"},
|
||||
}
|
||||
|
||||
def is_rearchivable(self, url: str) -> bool:
|
||||
# VK content is static
|
||||
return False
|
||||
|
||||
def download(self, item: Metadata) -> Metadata:
|
||||
url = item.get_url()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user