diff --git a/src/auto_archiver/archivers/telethon_archiver.py b/src/auto_archiver/archivers/telethon_archiver.py index 2fd556b..e0e2bed 100644 --- a/src/auto_archiver/archivers/telethon_archiver.py +++ b/src/auto_archiver/archivers/telethon_archiver.py @@ -32,6 +32,7 @@ class TelethonArchiver(Archiver): "api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"}, # "bot_token": {"default": None, "help": "optional, but allows access to more content such as large videos, talk to @botfather"}, "session_file": {"default": "secrets/anon", "help": "optional, records the telegram login session for future usage"}, + "join_channels": {"default": True, "help": "disables the initial setup with channel_invites config, useful if you have a lot and get stuck"}, "channel_invites": { "default": {}, "help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup", @@ -51,7 +52,7 @@ class TelethonArchiver(Archiver): logger.info(f"SETUP {self.name} checking login...") with self.client.start(): pass - if len(self.channel_invites): + if self.join_channels and len(self.channel_invites): logger.info(f"SETUP {self.name} joining channels...") with self.client.start(): # get currently joined channels diff --git a/src/auto_archiver/core/metadata.py b/src/auto_archiver/core/metadata.py index 09587e5..d666843 100644 --- a/src/auto_archiver/core/metadata.py +++ b/src/auto_archiver/core/metadata.py @@ -21,8 +21,7 @@ class Metadata: metadata: Dict[str, Any] = field(default_factory=dict) tmp_keys: Set[str] = field(default_factory=set, repr=False, metadata={"exclude": True}) # keys that are not to be saved in DBs media: List[Media] = field(default_factory=list) - final_media: Media = None # can be overwritten by formatters - rearchivable: bool = True # defaults to true, archivers can overwrite + rearchivable: bool = True # defaults to true, archivers can overwrite def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata: """ @@ -73,7 +72,6 @@ class Metadata: # custom getter/setters - def set_url(self, url: str) -> Metadata: assert type(url) is str and len(url) > 0, "invalid URL" return self.set("url", url) @@ -115,30 +113,27 @@ class Metadata: def add_media(self, media: Media, id: str = None) -> Metadata: # adds a new media, optionally including an id if media is None: return - if id is not None: media.set("id", id) + if id is not None: + assert not len([1 for m in self.media if m.get("id") == id]), f"cannot add 2 pieces of media with the same id {id}" + media.set("id", id) self.media.append(media) return media - def get_media_by_id(self, id: str) -> Media: + def get_media_by_id(self, id: str, default=None) -> Media: for m in self.media: if m.get("id") == id: return m - return None + return default def set_final_media(self, final: Media) -> Metadata: - if final: - if self.final_media: - logger.warning(f"overwriting final media value :{self.final_media} with {final}") - self.final_media = final - return self + """final media is a special type of media: if you can show only 1 this is it, it's useful for some DBs like GsheetDb""" + self.add_media(final, "_final_media") - def get_single_media(self) -> Media: - # TODO: could be refactored to use a custom media.id or metadata - if self.final_media: - return self.final_media - return self.media[0] + def get_final_media(self) -> Media: + _default = self.media[0] if len(self.media) else None + return self.get_media_by_id("_final_media", _default) def get_clean_metadata(self) -> Metadata: return dict( {k: v for k, v in self.metadata.items() if k not in self.tmp_keys}, **{"processed_at": self._processed_at} - ) \ No newline at end of file + ) diff --git a/src/auto_archiver/databases/gsheet_db.py b/src/auto_archiver/databases/gsheet_db.py index 557570e..de1548e 100644 --- a/src/auto_archiver/databases/gsheet_db.py +++ b/src/auto_archiver/databases/gsheet_db.py @@ -60,7 +60,7 @@ class GsheetsDb(Database): cell_updates.append((row, 'status', item.status)) - media: Media = item.get_single_media() + media: Media = item.get_final_media() batch_if_valid('archive', "\n".join(media.urls)) batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())