From 0765640bff96a1557adcb67042330dfbbaa3aab1 Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Mon, 17 Mar 2025 10:31:22 +0000 Subject: [PATCH] Fix up tiktok dropin for slightly modified generic_extractor format --- .../modules/generic_extractor/generic_extractor.py | 14 +++++++++----- .../modules/generic_extractor/tiktok.py | 11 +++++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 481f4ea..2f44ba8 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -198,9 +198,13 @@ class GenericExtractor(Extractor): result = self.download_additional_media(video_data, info_extractor, result) # keep both 'title' and 'fulltitle', but prefer 'title', falling back to 'fulltitle' if it doesn't exist - result.set_title(video_data.pop("title", video_data.pop("fulltitle", ""))) - result.set_url(url) - if "description" in video_data: + if not result.get_title(): + result.set_title(video_data.pop("title", video_data.pop("fulltitle", ""))) + + if not result.get("url"): + result.set_url(url) + + if "description" in video_data and not result.get_content(): result.set_content(video_data["description"]) # extract comments if enabled if self.comments: @@ -217,10 +221,10 @@ class GenericExtractor(Extractor): ) # then add the common metadata - if timestamp := video_data.pop("timestamp", None): + if timestamp := video_data.pop("timestamp", None) and not result.get("timestamp"): timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat() result.set_timestamp(timestamp) - if upload_date := video_data.pop("upload_date", None): + if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"): upload_date = datetime.datetime.strptime(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc) result.set("upload_date", upload_date) diff --git a/src/auto_archiver/modules/generic_extractor/tiktok.py b/src/auto_archiver/modules/generic_extractor/tiktok.py index e05d298..b25abca 100644 --- a/src/auto_archiver/modules/generic_extractor/tiktok.py +++ b/src/auto_archiver/modules/generic_extractor/tiktok.py @@ -38,6 +38,9 @@ class Tiktok(GenericDropin): api_data["video_url"] = video_url return api_data + def keys_to_clean(self, video_data: dict, info_extractor): + return ["video_url", "title", "create_time", "author", "cover", "origin_cover", "ai_dynamic_cover", "duration"] + def create_metadata(self, post: dict, ie_instance, archiver, url): # prepare result, start by downloading video result = Metadata() @@ -54,17 +57,17 @@ class Tiktok(GenericDropin): logger.error(f"failed to download video from {video_url}") return False video_media = Media(video_downloaded) - if duration := post.pop("duration", None): + if duration := post.get("duration", None): video_media.set("duration", duration) result.add_media(video_media) # add remaining metadata - result.set_title(post.pop("title", "")) + result.set_title(post.get("title", "")) - if created_at := post.pop("create_time", None): + if created_at := post.get("create_time", None): result.set_timestamp(datetime.fromtimestamp(created_at, tz=timezone.utc)) - if author := post.pop("author", None): + if author := post.get("author", None): result.set("author", author) result.set("api_data", post)