From 69bcfea2eb96fe3619073cee9975b7ad99d6fff9 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Tue, 18 Apr 2023 18:48:51 +0100 Subject: [PATCH] to_json fix --- src/auto_archiver/core/media.py | 2 +- src/auto_archiver/enrichers/whisper_enricher.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/auto_archiver/core/media.py b/src/auto_archiver/core/media.py index 772a5a8..d16ca64 100644 --- a/src/auto_archiver/core/media.py +++ b/src/auto_archiver/core/media.py @@ -19,7 +19,7 @@ class Media: urls: List[str] = field(default_factory=list) properties: dict = field(default_factory=dict) _mimetype: str = None # eg: image/jpeg - _stored: bool = field(default=False, repr=False, metadata=config(exclude=True)) + _stored: bool = field(default=False, repr=False, metadata=config(exclude=lambda _: True)) # always exclude def store(self: Media, override_storages: List = None, url: str = "url-not-available"): # stores the media into the provided/available storages [Storage] diff --git a/src/auto_archiver/enrichers/whisper_enricher.py b/src/auto_archiver/enrichers/whisper_enricher.py index d32de18..9a3fd88 100644 --- a/src/auto_archiver/enrichers/whisper_enricher.py +++ b/src/auto_archiver/enrichers/whisper_enricher.py @@ -100,7 +100,19 @@ class WhisperEnricher(Enricher): r_res = requests.get(f'{self.api_endpoint}/jobs/{job_id}/artifacts', headers={'Authorization': f'Bearer {self.api_key}'}) assert r_res.status_code == 200, f"Job artifacts did not respond with 200, instead with: {r_res.status_code}" logger.success(r_res.json()) - return [artifact.get("data").get("text", "") for artifact in r_res.json()] + result = [] + for artifact in r_res.json(): + subtitle = [] + full_text = [] + for i, d in enumerate(artifact.get("data")): + subtitle.append(f"{i+1}\n{d.get('start')} --> {d.get('end')}\n{d.get('text').strip()}") + full_text.append(d.get('text').strip()) + if not len(subtitle): continue + result.append({ + "subtitle": "\n".join(subtitle), + "full_text": "\n".join(full_text), + }) + return result return False def _get_s3_storage(self) -> S3Storage: