mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-10 20:28:28 +03:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d2d6db162b | ||
|
|
5cfbcc0137 | ||
|
|
5fdaa6c739 | ||
|
|
3d389ee05b |
@@ -114,7 +114,7 @@ class ArchivingOrchestrator:
|
||||
# 6 - format and store formatted if needed
|
||||
# enrichers typically need access to already stored URLs etc
|
||||
if (final_media := self.formatter.format(result)):
|
||||
final_media.store()
|
||||
final_media.store(url=url)
|
||||
result.set_final_media(final_media)
|
||||
|
||||
if result.is_empty():
|
||||
|
||||
@@ -42,7 +42,7 @@ class WhisperEnricher(Enricher):
|
||||
job_results = {}
|
||||
for i, m in enumerate(to_enrich.media):
|
||||
if m.is_video() or m.is_audio():
|
||||
m.store()
|
||||
m.store(url=url)
|
||||
try:
|
||||
job_id = self.submit_job(m)
|
||||
job_results[job_id] = False
|
||||
@@ -58,7 +58,7 @@ class WhisperEnricher(Enricher):
|
||||
job_id = to_enrich.media[i].get("whisper_model")["job_id"]
|
||||
to_enrich.media[i].set("whisper_model", {
|
||||
"job_id": job_id,
|
||||
self.action: job_results[job_id]
|
||||
**job_results[job_id]
|
||||
})
|
||||
|
||||
def submit_job(self, media: Media):
|
||||
@@ -100,18 +100,16 @@ class WhisperEnricher(Enricher):
|
||||
r_res = requests.get(f'{self.api_endpoint}/jobs/{job_id}/artifacts', headers={'Authorization': f'Bearer {self.api_key}'})
|
||||
assert r_res.status_code == 200, f"Job artifacts did not respond with 200, instead with: {r_res.status_code}"
|
||||
logger.success(r_res.json())
|
||||
result = []
|
||||
for artifact in r_res.json():
|
||||
result = {}
|
||||
for art_id, artifact in enumerate(r_res.json()):
|
||||
subtitle = []
|
||||
full_text = []
|
||||
for i, d in enumerate(artifact.get("data")):
|
||||
subtitle.append(f"{i+1}\n{d.get('start')} --> {d.get('end')}\n{d.get('text').strip()}")
|
||||
full_text.append(d.get('text').strip())
|
||||
if not len(subtitle): continue
|
||||
result.append({
|
||||
"subtitle": "\n".join(subtitle),
|
||||
"full_text": "\n".join(full_text),
|
||||
})
|
||||
result[f"artifact_{art_id}_subtitle"] = "\n".join(subtitle)
|
||||
result[f"artifact_{art_id}_text"] = "\n".join(full_text)
|
||||
return result
|
||||
return False
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
}
|
||||
|
||||
.copy:hover {
|
||||
font-weight: 600;
|
||||
background: aliceblue;
|
||||
cursor: copy;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ _MAJOR = "0"
|
||||
_MINOR = "5"
|
||||
# On main and in a nightly release the patch should be one ahead of the last
|
||||
# released build.
|
||||
_PATCH = "6"
|
||||
_PATCH = "7"
|
||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||
_SUFFIX = ""
|
||||
|
||||
Reference in New Issue
Block a user