From 363a8ef67a39a55d0386c53a097c6d3a8f1d15a1 Mon Sep 17 00:00:00 2001 From: Dave Mateer Date: Mon, 18 Jul 2022 13:15:48 +0100 Subject: [PATCH 1/6] Added hash_algorithm to config to choose between SHA256 and SHA3_512 --- .gitignore | 4 +++- archivers/base_archiver.py | 14 +++++++++----- archivers/telegram_archiver.py | 3 +++ archivers/telethon_archiver.py | 4 ++-- archivers/tiktok_archiver.py | 3 +++ archivers/twitter_api_archiver.py | 6 +++--- archivers/twitter_archiver.py | 4 ++++ archivers/vk_archiver.py | 4 ++-- archivers/wayback_archiver.py | 4 ++-- archivers/youtubedl_archiver.py | 4 ++-- auto_archive.py | 16 ++++++++-------- configs/config.py | 2 ++ example.config.yaml | 5 +++++ 13 files changed, 48 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 2885782..62a5815 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,6 @@ config.yaml config-*.yaml logs/* local_archive/ -vk_config*.json \ No newline at end of file +vk_config*.json + +secrets/* \ No newline at end of file diff --git a/archivers/base_archiver.py b/archivers/base_archiver.py index 815d31e..8951115 100644 --- a/archivers/base_archiver.py +++ b/archivers/base_archiver.py @@ -31,9 +31,10 @@ class Archiver(ABC): name = "default" retry_regex = r"retrying at (\d+)$" - def __init__(self, storage: Storage, driver): + def __init__(self, storage: Storage, driver, hash_algorithm): self.storage = storage self.driver = driver + self.hash_algorithm = hash_algorithm def __str__(self): return self.__class__.__name__ @@ -163,10 +164,13 @@ class Archiver(ABC): def get_hash(self, filename): with open(filename, "rb") as f: bytes = f.read() # read entire file as bytes - # TODO: customizable hash - hash = hashlib.sha256(bytes) - # option to use SHA3_512 instead - # hash = hashlib.sha3_512(bytes) + ha = self.hash_algorithm + logger.debug(f'Hash algorithm is {ha}') + + if ha == "SHA3_512": hash = hashlib.sha3_512(bytes) + elif ha == "SHA256": hash = hashlib.sha256(bytes) + else: raise Exception("Unknown Hash Algorithm of {ha}") + return hash.hexdigest() def get_screenshot(self, url): diff --git a/archivers/telegram_archiver.py b/archivers/telegram_archiver.py index 0b6e777..c38dd30 100644 --- a/archivers/telegram_archiver.py +++ b/archivers/telegram_archiver.py @@ -11,6 +11,9 @@ from storages import Storage class TelegramArchiver(Archiver): name = "telegram" + def __init__(self, storage: Storage, driver, hash_algorithm): + super().__init__(storage, driver, hash_algorithm) + def download(self, url, check_if_exists=False): # detect URLs that we definitely cannot handle if 't.me' != self.get_netloc(url): diff --git a/archivers/telethon_archiver.py b/archivers/telethon_archiver.py index f35e323..bce34d2 100644 --- a/archivers/telethon_archiver.py +++ b/archivers/telethon_archiver.py @@ -15,8 +15,8 @@ class TelethonArchiver(Archiver): name = "telethon" link_pattern = re.compile(r"https:\/\/t\.me(\/c){0,1}\/(.+)\/(\d+)") - def __init__(self, storage: Storage, driver, config: TelethonConfig): - super().__init__(storage, driver) + def __init__(self, storage: Storage, driver, config: TelethonConfig, hash_algorithm): + super().__init__(storage, driver, hash_algorithm) if config: self.client = TelegramClient("./anon", config.api_id, config.api_hash) self.bot_token = config.bot_token diff --git a/archivers/tiktok_archiver.py b/archivers/tiktok_archiver.py index 8100bb1..30b8c7c 100644 --- a/archivers/tiktok_archiver.py +++ b/archivers/tiktok_archiver.py @@ -15,6 +15,9 @@ class TiktokArchiver(Archiver): status = 'success' + def __init__(self, storage: Storage, driver, hash_algorithm): + super().__init__(storage, driver, hash_algorithm) + try: info = tiktok_downloader.info_post(url) key = self.get_key(f'{info.id}.mp4') diff --git a/archivers/twitter_api_archiver.py b/archivers/twitter_api_archiver.py index ef2bf40..99fb8f1 100644 --- a/archivers/twitter_api_archiver.py +++ b/archivers/twitter_api_archiver.py @@ -13,8 +13,8 @@ from .twitter_archiver import TwitterArchiver class TwitterApiArchiver(TwitterArchiver): name = "twitter_api" - def __init__(self, storage: Storage, driver, config: TwitterApiConfig): - super().__init__(storage, driver) + def __init__(self, storage: Storage, driver, config: TwitterApiConfig, hash_algorithm): + super().__init__(storage, driver, hash_algorithm) if config.bearer_token: self.api = Api(bearer_token=config.bearer_token) @@ -54,7 +54,7 @@ class TwitterApiArchiver(TwitterArchiver): for u in urls: if u is None: - logger.error(f"Should not have gotten None url for {tweet.includes.media=}") + logger.debug(f"Should not have gotten None url for {tweet.includes.media=} so going to download_alternative in twitter_archiver") return self.download_alternative(url, tweet_id) logger.debug(f"found {urls=}") diff --git a/archivers/twitter_archiver.py b/archivers/twitter_archiver.py index 1c1b173..750d2c4 100644 --- a/archivers/twitter_archiver.py +++ b/archivers/twitter_archiver.py @@ -5,12 +5,16 @@ from snscrape.modules.twitter import TwitterTweetScraper, Video, Gif, Photo from .base_archiver import Archiver, ArchiveResult +from storages import Storage class TwitterArchiver(Archiver): """ This Twitter Archiver uses unofficial scraping methods, and it works as an alternative to TwitterApiArchiver when no API credentials are provided. """ + def __init__(self, storage: Storage, driver, hash_algorithm): + super().__init__(storage, driver, hash_algorithm) + name = "twitter" link_pattern = re.compile(r"twitter.com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)") diff --git a/archivers/vk_archiver.py b/archivers/vk_archiver.py index c448367..6ddba10 100644 --- a/archivers/vk_archiver.py +++ b/archivers/vk_archiver.py @@ -17,8 +17,8 @@ class VkArchiver(Archiver): wall_pattern = re.compile(r"(wall.{0,1}\d+_\d+)") photo_pattern = re.compile(r"(photo.{0,1}\d+_\d+)") - def __init__(self, storage: Storage, driver, config: VkConfig): - super().__init__(storage, driver) + def __init__(self, storage: Storage, driver, config: VkConfig, hash_algorithm): + super().__init__(storage, driver, hash_algorithm) if config != None: self.vks = VkScraper(config.username, config.password) diff --git a/archivers/wayback_archiver.py b/archivers/wayback_archiver.py index f46d1cb..c19ca4f 100644 --- a/archivers/wayback_archiver.py +++ b/archivers/wayback_archiver.py @@ -15,8 +15,8 @@ class WaybackArchiver(Archiver): """ name = "wayback" - def __init__(self, storage: Storage, driver, config: WaybackConfig): - super(WaybackArchiver, self).__init__(storage, driver) + def __init__(self, storage: Storage, driver, config: WaybackConfig, hash_algorithm): + super(WaybackArchiver, self).__init__(storage, driver, hash_algorithm) self.config = config self.seen_urls = {} diff --git a/archivers/youtubedl_archiver.py b/archivers/youtubedl_archiver.py index 7990131..a41b6c6 100644 --- a/archivers/youtubedl_archiver.py +++ b/archivers/youtubedl_archiver.py @@ -12,8 +12,8 @@ class YoutubeDLArchiver(Archiver): name = "youtube_dl" ydl_opts = {'outtmpl': f'{Storage.TMP_FOLDER}%(id)s.%(ext)s', 'quiet': False} - def __init__(self, storage: Storage, driver, fb_cookie): - super().__init__(storage, driver) + def __init__(self, storage: Storage, driver, fb_cookie, hash_algorithm): + super().__init__(storage, driver, hash_algorithm) self.fb_cookie = fb_cookie def download(self, url, check_if_exists=False): diff --git a/auto_archive.py b/auto_archive.py index f12b9c4..72cb748 100644 --- a/auto_archive.py +++ b/auto_archive.py @@ -104,14 +104,14 @@ def process_sheet(c: Config): # order matters, first to succeed excludes remaining active_archivers = [ - TelethonArchiver(storage, c.webdriver, c.telegram_config), - TiktokArchiver(storage, c.webdriver), - TwitterApiArchiver(storage, c.webdriver, c.twitter_config), - YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie), - TelegramArchiver(storage, c.webdriver), - TwitterArchiver(storage, c.webdriver), - VkArchiver(storage, c.webdriver, c.vk_config), - WaybackArchiver(storage, c.webdriver, c.wayback_config) + TelethonArchiver(storage, c.webdriver, c.telegram_config, c.hash_algorithm), + TiktokArchiver(storage, c.webdriver, c.hash_algorithm), + TwitterApiArchiver(storage, c.webdriver, c.twitter_config, c.hash_algorithm), + YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie,c.hash_algorithm), + TelegramArchiver(storage, c.webdriver, c.hash_algorithm), + TwitterArchiver(storage, c.webdriver, c.hash_algorithm), + VkArchiver(storage, c.webdriver, c.vk_config, c.hash_algorithm), + WaybackArchiver(storage, c.webdriver, c.wayback_config, c.hash_algorithm) ] for archiver in active_archivers: diff --git a/configs/config.py b/configs/config.py index 41b531a..2d134da 100644 --- a/configs/config.py +++ b/configs/config.py @@ -81,6 +81,8 @@ class Config: ) self.webdriver = "not initialized" + self.hash_algorithm = execution.get("hash_algorithm") + # ---------------------- SECRETS - APIs and service configurations secrets = self.config.get("secrets", {}) diff --git a/example.config.yaml b/example.config.yaml index 3092efc..f823c47 100644 --- a/example.config.yaml +++ b/example.config.yaml @@ -104,3 +104,8 @@ execution: duration: duration screenshot: screenshot hash: hash + + # Must be either SHA256 or SHA3_512 + hash_algorithm: SHA3_512 + # hash_algorithm: SHA256 + From 7b8be95e250dc3f0c42ca8840972b08048eacadf Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 25 Jul 2022 12:12:14 +0100 Subject: [PATCH 2/6] removing empty line --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 62a5815..2059faa 100644 --- a/.gitignore +++ b/.gitignore @@ -17,5 +17,4 @@ config-*.yaml logs/* local_archive/ vk_config*.json - secrets/* \ No newline at end of file From 2d7d8c4e0803095a967ff78aeba42933a1a8f835 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 25 Jul 2022 12:12:43 +0100 Subject: [PATCH 3/6] renaming and making default SHA-256 --- archivers/base_archiver.py | 4 ++-- configs/config.py | 3 ++- example.config.yaml | 8 ++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/archivers/base_archiver.py b/archivers/base_archiver.py index 8951115..b377d31 100644 --- a/archivers/base_archiver.py +++ b/archivers/base_archiver.py @@ -167,8 +167,8 @@ class Archiver(ABC): ha = self.hash_algorithm logger.debug(f'Hash algorithm is {ha}') - if ha == "SHA3_512": hash = hashlib.sha3_512(bytes) - elif ha == "SHA256": hash = hashlib.sha256(bytes) + if ha == "SHA3-512": hash = hashlib.sha3_512(bytes) + elif ha == "SHA-256": hash = hashlib.sha256(bytes) else: raise Exception("Unknown Hash Algorithm of {ha}") return hash.hexdigest() diff --git a/configs/config.py b/configs/config.py index 2d134da..4003282 100644 --- a/configs/config.py +++ b/configs/config.py @@ -81,7 +81,7 @@ class Config: ) self.webdriver = "not initialized" - self.hash_algorithm = execution.get("hash_algorithm") + self.hash_algorithm = execution.get("hash_algorithm", "SHA-256") # ---------------------- SECRETS - APIs and service configurations secrets = self.config.get("secrets", {}) @@ -261,6 +261,7 @@ class Config: "storage": self.storage, "header": self.header, "check_if_exists": self.check_if_exists, + "hash_algorithm": self.hash_algorithm, "save_logs": self.save_logs, "selenium_config": asdict(self.selenium_config), "selenium_webdriver": self.webdriver != None, diff --git a/example.config.yaml b/example.config.yaml index f823c47..2cded09 100644 --- a/example.config.yaml +++ b/example.config.yaml @@ -80,6 +80,10 @@ execution: storage: s3 # defaults to false, when true will try to avoid duplicate URL archives check_if_exists: true + + # choose a hash algorithm (either SHA-256 or SHA3-512, defaults to SHA-256) + # hash_algorithm: SHA-256 + # optional configurations for the selenium browser that takes screenshots, these are the defaults selenium: # values under 10s might mean screenshots fail to grab screenshot @@ -105,7 +109,3 @@ execution: screenshot: screenshot hash: hash - # Must be either SHA256 or SHA3_512 - hash_algorithm: SHA3_512 - # hash_algorithm: SHA256 - From 9317b5e03582a6a27d7eb3318fcbe8ea870fc091 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 25 Jul 2022 12:27:50 +0100 Subject: [PATCH 4/6] turning HASH_ALGORITHM into global archiver prop --- archivers/base_archiver.py | 15 ++++++--------- archivers/telegram_archiver.py | 4 ++-- archivers/telethon_archiver.py | 4 ++-- archivers/tiktok_archiver.py | 4 ++-- archivers/twitter_api_archiver.py | 4 ++-- archivers/twitter_archiver.py | 4 ++-- archivers/vk_archiver.py | 4 ++-- archivers/wayback_archiver.py | 4 ++-- archivers/youtubedl_archiver.py | 4 ++-- auto_archive.py | 16 ++++++++-------- configs/config.py | 5 +++-- 11 files changed, 33 insertions(+), 35 deletions(-) diff --git a/archivers/base_archiver.py b/archivers/base_archiver.py index b377d31..902f626 100644 --- a/archivers/base_archiver.py +++ b/archivers/base_archiver.py @@ -26,15 +26,14 @@ class ArchiveResult: screenshot: str = None hash: str = None - class Archiver(ABC): + HASH_ALGORITHM="SHA-256" # can be overwritten by user configs name = "default" retry_regex = r"retrying at (\d+)$" - def __init__(self, storage: Storage, driver, hash_algorithm): + def __init__(self, storage: Storage, driver): self.storage = storage self.driver = driver - self.hash_algorithm = hash_algorithm def __str__(self): return self.__class__.__name__ @@ -48,7 +47,6 @@ class Archiver(ABC): def get_netloc(self, url): return urlparse(url).netloc - # generate the html page eg SM3013/twitter__minmyatnaing13_status_1499415562937503751.html def generate_media_page_html(self, url, urls_info: dict, object, thumbnail=None): """ Generates an index.html page where each @urls_info is displayed @@ -164,12 +162,11 @@ class Archiver(ABC): def get_hash(self, filename): with open(filename, "rb") as f: bytes = f.read() # read entire file as bytes - ha = self.hash_algorithm - logger.debug(f'Hash algorithm is {ha}') + logger.debug(f'Hash algorithm is {self.HASH_ALGORITHM}') - if ha == "SHA3-512": hash = hashlib.sha3_512(bytes) - elif ha == "SHA-256": hash = hashlib.sha256(bytes) - else: raise Exception("Unknown Hash Algorithm of {ha}") + if self.HASH_ALGORITHM == "SHA-256": hash = hashlib.sha256(bytes) + elif self.HASH_ALGORITHM == "SHA3-512": hash = hashlib.sha3_512(bytes) + else: raise Exception(f"Unknown Hash Algorithm of {self.HASH_ALGORITHM}") return hash.hexdigest() diff --git a/archivers/telegram_archiver.py b/archivers/telegram_archiver.py index c38dd30..4b2e59c 100644 --- a/archivers/telegram_archiver.py +++ b/archivers/telegram_archiver.py @@ -11,8 +11,8 @@ from storages import Storage class TelegramArchiver(Archiver): name = "telegram" - def __init__(self, storage: Storage, driver, hash_algorithm): - super().__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver): + super().__init__(storage, driver) def download(self, url, check_if_exists=False): # detect URLs that we definitely cannot handle diff --git a/archivers/telethon_archiver.py b/archivers/telethon_archiver.py index bce34d2..f35e323 100644 --- a/archivers/telethon_archiver.py +++ b/archivers/telethon_archiver.py @@ -15,8 +15,8 @@ class TelethonArchiver(Archiver): name = "telethon" link_pattern = re.compile(r"https:\/\/t\.me(\/c){0,1}\/(.+)\/(\d+)") - def __init__(self, storage: Storage, driver, config: TelethonConfig, hash_algorithm): - super().__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver, config: TelethonConfig): + super().__init__(storage, driver) if config: self.client = TelegramClient("./anon", config.api_id, config.api_hash) self.bot_token = config.bot_token diff --git a/archivers/tiktok_archiver.py b/archivers/tiktok_archiver.py index 30b8c7c..771a7f4 100644 --- a/archivers/tiktok_archiver.py +++ b/archivers/tiktok_archiver.py @@ -15,8 +15,8 @@ class TiktokArchiver(Archiver): status = 'success' - def __init__(self, storage: Storage, driver, hash_algorithm): - super().__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver): + super().__init__(storage, driver) try: info = tiktok_downloader.info_post(url) diff --git a/archivers/twitter_api_archiver.py b/archivers/twitter_api_archiver.py index 99fb8f1..6aa1742 100644 --- a/archivers/twitter_api_archiver.py +++ b/archivers/twitter_api_archiver.py @@ -13,8 +13,8 @@ from .twitter_archiver import TwitterArchiver class TwitterApiArchiver(TwitterArchiver): name = "twitter_api" - def __init__(self, storage: Storage, driver, config: TwitterApiConfig, hash_algorithm): - super().__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver, config: TwitterApiConfig): + super().__init__(storage, driver) if config.bearer_token: self.api = Api(bearer_token=config.bearer_token) diff --git a/archivers/twitter_archiver.py b/archivers/twitter_archiver.py index 750d2c4..6fe5901 100644 --- a/archivers/twitter_archiver.py +++ b/archivers/twitter_archiver.py @@ -12,8 +12,8 @@ class TwitterArchiver(Archiver): This Twitter Archiver uses unofficial scraping methods, and it works as an alternative to TwitterApiArchiver when no API credentials are provided. """ - def __init__(self, storage: Storage, driver, hash_algorithm): - super().__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver): + super().__init__(storage, driver) name = "twitter" link_pattern = re.compile(r"twitter.com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)") diff --git a/archivers/vk_archiver.py b/archivers/vk_archiver.py index 6ddba10..c448367 100644 --- a/archivers/vk_archiver.py +++ b/archivers/vk_archiver.py @@ -17,8 +17,8 @@ class VkArchiver(Archiver): wall_pattern = re.compile(r"(wall.{0,1}\d+_\d+)") photo_pattern = re.compile(r"(photo.{0,1}\d+_\d+)") - def __init__(self, storage: Storage, driver, config: VkConfig, hash_algorithm): - super().__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver, config: VkConfig): + super().__init__(storage, driver) if config != None: self.vks = VkScraper(config.username, config.password) diff --git a/archivers/wayback_archiver.py b/archivers/wayback_archiver.py index c19ca4f..f46d1cb 100644 --- a/archivers/wayback_archiver.py +++ b/archivers/wayback_archiver.py @@ -15,8 +15,8 @@ class WaybackArchiver(Archiver): """ name = "wayback" - def __init__(self, storage: Storage, driver, config: WaybackConfig, hash_algorithm): - super(WaybackArchiver, self).__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver, config: WaybackConfig): + super(WaybackArchiver, self).__init__(storage, driver) self.config = config self.seen_urls = {} diff --git a/archivers/youtubedl_archiver.py b/archivers/youtubedl_archiver.py index a41b6c6..7990131 100644 --- a/archivers/youtubedl_archiver.py +++ b/archivers/youtubedl_archiver.py @@ -12,8 +12,8 @@ class YoutubeDLArchiver(Archiver): name = "youtube_dl" ydl_opts = {'outtmpl': f'{Storage.TMP_FOLDER}%(id)s.%(ext)s', 'quiet': False} - def __init__(self, storage: Storage, driver, fb_cookie, hash_algorithm): - super().__init__(storage, driver, hash_algorithm) + def __init__(self, storage: Storage, driver, fb_cookie): + super().__init__(storage, driver) self.fb_cookie = fb_cookie def download(self, url, check_if_exists=False): diff --git a/auto_archive.py b/auto_archive.py index 72cb748..c9a6b08 100644 --- a/auto_archive.py +++ b/auto_archive.py @@ -104,14 +104,14 @@ def process_sheet(c: Config): # order matters, first to succeed excludes remaining active_archivers = [ - TelethonArchiver(storage, c.webdriver, c.telegram_config, c.hash_algorithm), - TiktokArchiver(storage, c.webdriver, c.hash_algorithm), - TwitterApiArchiver(storage, c.webdriver, c.twitter_config, c.hash_algorithm), - YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie,c.hash_algorithm), - TelegramArchiver(storage, c.webdriver, c.hash_algorithm), - TwitterArchiver(storage, c.webdriver, c.hash_algorithm), - VkArchiver(storage, c.webdriver, c.vk_config, c.hash_algorithm), - WaybackArchiver(storage, c.webdriver, c.wayback_config, c.hash_algorithm) + TelethonArchiver(storage, c.webdriver, c.telegram_config), + TiktokArchiver(storage, c.webdriver), + TwitterApiArchiver(storage, c.webdriver, c.twitter_config,), + YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie), + TelegramArchiver(storage, c.webdriver), + TwitterArchiver(storage, c.webdriver), + VkArchiver(storage, c.webdriver, c.vk_config), + WaybackArchiver(storage, c.webdriver, c.wayback_config) ] for archiver in active_archivers: diff --git a/configs/config.py b/configs/config.py index 4003282..063c4d7 100644 --- a/configs/config.py +++ b/configs/config.py @@ -1,5 +1,6 @@ import argparse, yaml, json +from archivers.base_archiver import Archiver import gspread from loguru import logger from selenium import webdriver @@ -81,7 +82,7 @@ class Config: ) self.webdriver = "not initialized" - self.hash_algorithm = execution.get("hash_algorithm", "SHA-256") + Archiver.HASH_ALGORITHM = execution.get("hash_algorithm", Archiver.HASH_ALGORITHM) # ---------------------- SECRETS - APIs and service configurations secrets = self.config.get("secrets", {}) @@ -261,7 +262,7 @@ class Config: "storage": self.storage, "header": self.header, "check_if_exists": self.check_if_exists, - "hash_algorithm": self.hash_algorithm, + "hash_algorithm": Archiver.HASH_ALGORITHM, "save_logs": self.save_logs, "selenium_config": asdict(self.selenium_config), "selenium_webdriver": self.webdriver != None, From e180b82b0d7e7f0168d5771fcbc8f8bf385b4cca Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 25 Jul 2022 12:29:42 +0100 Subject: [PATCH 5/6] removing useless constructors --- archivers/telegram_archiver.py | 3 --- archivers/tiktok_archiver.py | 3 --- archivers/twitter_archiver.py | 4 ---- 3 files changed, 10 deletions(-) diff --git a/archivers/telegram_archiver.py b/archivers/telegram_archiver.py index 4b2e59c..0b6e777 100644 --- a/archivers/telegram_archiver.py +++ b/archivers/telegram_archiver.py @@ -11,9 +11,6 @@ from storages import Storage class TelegramArchiver(Archiver): name = "telegram" - def __init__(self, storage: Storage, driver): - super().__init__(storage, driver) - def download(self, url, check_if_exists=False): # detect URLs that we definitely cannot handle if 't.me' != self.get_netloc(url): diff --git a/archivers/tiktok_archiver.py b/archivers/tiktok_archiver.py index 771a7f4..8100bb1 100644 --- a/archivers/tiktok_archiver.py +++ b/archivers/tiktok_archiver.py @@ -15,9 +15,6 @@ class TiktokArchiver(Archiver): status = 'success' - def __init__(self, storage: Storage, driver): - super().__init__(storage, driver) - try: info = tiktok_downloader.info_post(url) key = self.get_key(f'{info.id}.mp4') diff --git a/archivers/twitter_archiver.py b/archivers/twitter_archiver.py index 6fe5901..8f646fd 100644 --- a/archivers/twitter_archiver.py +++ b/archivers/twitter_archiver.py @@ -5,15 +5,11 @@ from snscrape.modules.twitter import TwitterTweetScraper, Video, Gif, Photo from .base_archiver import Archiver, ArchiveResult -from storages import Storage - class TwitterArchiver(Archiver): """ This Twitter Archiver uses unofficial scraping methods, and it works as an alternative to TwitterApiArchiver when no API credentials are provided. """ - def __init__(self, storage: Storage, driver): - super().__init__(storage, driver) name = "twitter" link_pattern = re.compile(r"twitter.com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)") From 63140d69c145adb44c22081e7742f1bdc6ca633f Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 25 Jul 2022 12:35:27 +0100 Subject: [PATCH 6/6] format --- auto_archive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/auto_archive.py b/auto_archive.py index c9a6b08..f12b9c4 100644 --- a/auto_archive.py +++ b/auto_archive.py @@ -106,11 +106,11 @@ def process_sheet(c: Config): active_archivers = [ TelethonArchiver(storage, c.webdriver, c.telegram_config), TiktokArchiver(storage, c.webdriver), - TwitterApiArchiver(storage, c.webdriver, c.twitter_config,), + TwitterApiArchiver(storage, c.webdriver, c.twitter_config), YoutubeDLArchiver(storage, c.webdriver, c.facebook_cookie), TelegramArchiver(storage, c.webdriver), TwitterArchiver(storage, c.webdriver), - VkArchiver(storage, c.webdriver, c.vk_config), + VkArchiver(storage, c.webdriver, c.vk_config), WaybackArchiver(storage, c.webdriver, c.wayback_config) ]