From 1970fa3c822e2be4bbb6f3bdf85e19b1de4c092f Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Fri, 17 Feb 2023 16:15:25 +0000 Subject: [PATCH] new instagram archiver via telegram bot --- example.orchestration.yaml | 3 +- .../archivers/instagram_tbot_archiver.py | 67 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 src/auto_archiver/archivers/instagram_tbot_archiver.py diff --git a/example.orchestration.yaml b/example.orchestration.yaml index f13b3dc..ce7332b 100644 --- a/example.orchestration.yaml +++ b/example.orchestration.yaml @@ -8,7 +8,8 @@ steps: # - telegram_archiver # - twitter_archiver # - twitter_api_archiver - # # - instagram_archiver + # - instagram_archiver + # - instagram_tbot_archiver # - tiktok_archiver - youtubedl_archiver # - wayback_archiver_enricher diff --git a/src/auto_archiver/archivers/instagram_tbot_archiver.py b/src/auto_archiver/archivers/instagram_tbot_archiver.py new file mode 100644 index 0000000..7909cf2 --- /dev/null +++ b/src/auto_archiver/archivers/instagram_tbot_archiver.py @@ -0,0 +1,67 @@ + +from telethon.sync import TelegramClient +from loguru import logger +import time, os + +from . import Archiver +from ..core import Metadata, Media + + +class InstagramTbotArchiver(Archiver): + """ + calls a telegram bot to fetch instagram posts/stories... + https://github.com/adw0rd/instagrapi + https://t.me/instagram_load_bot + """ + name = "instagram_tbot_archiver" + + def __init__(self, config: dict) -> None: + super().__init__(config) + self.assert_valid_string("api_id") + self.assert_valid_string("api_hash") + self.timeout = int(self.timeout) + self.client = TelegramClient(self.session_file, self.api_id, self.api_hash) + + @staticmethod + def configs() -> dict: + return { + "api_id": {"default": None, "help": "telegram API_ID value, go to https://my.telegram.org/apps"}, + "api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"}, + "session_file": {"default": "secrets/anon", "help": "optional, records the telegram login session for future usage, '.session' will be appended to the provided value."}, + "timeout": {"default": 15, "help": "timeout to fetch the instagram content in seconds."}, + } + + def setup(self) -> None: + logger.info(f"SETUP {self.name} checking login...") + with self.client.start(): + logger.success(f"SETUP {self.name} login works.") + + def download(self, item: Metadata) -> Metadata: + url = item.get_url() + if not "instagram.com" in url: return False + + result = Metadata() + tmp_dir = item.get_tmp_dir() + with self.client.start(): + chat = self.client.get_entity("instagram_load_bot") + since_id = self.client.send_message(entity=chat, message=url).id + + attempts = 0 + media = None + message = "" + time.sleep(4) + while attempts < self.timeout and (not message or not media): + attempts += 1 + time.sleep(1) + for post in self.client.iter_messages(chat, min_id=since_id): + since_id = max(since_id, post.id) + if post.media and not media: + filename_dest = os.path.join(tmp_dir, f'{chat.id}_{post.id}') + media = self.client.download_media(post.media, filename_dest) + if media: result.add_media(Media(media)) + if post.message: message += post.message + + if message: + result.set_content(message).set_title(message[:128]) + + return result.success("insta-via-bot")