mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 13:18:28 +03:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
567edfc35e | ||
|
|
8c22a9df72 | ||
|
|
d2d6db162b | ||
|
|
5cfbcc0137 | ||
|
|
5fdaa6c739 | ||
|
|
3d389ee05b |
@@ -47,7 +47,7 @@ class Metadata:
|
|||||||
# calls .store for all contained media. storages [Storage]
|
# calls .store for all contained media. storages [Storage]
|
||||||
storages = override_storages or ArchivingContext.get("storages")
|
storages = override_storages or ArchivingContext.get("storages")
|
||||||
for media in self.media:
|
for media in self.media:
|
||||||
media.store(override_storages=storages)
|
media.store(override_storages=storages, url=self.get_url())
|
||||||
|
|
||||||
def set(self, key: str, val: Any) -> Metadata:
|
def set(self, key: str, val: Any) -> Metadata:
|
||||||
self.metadata[key] = val
|
self.metadata[key] = val
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ class ArchivingOrchestrator:
|
|||||||
# 6 - format and store formatted if needed
|
# 6 - format and store formatted if needed
|
||||||
# enrichers typically need access to already stored URLs etc
|
# enrichers typically need access to already stored URLs etc
|
||||||
if (final_media := self.formatter.format(result)):
|
if (final_media := self.formatter.format(result)):
|
||||||
final_media.store()
|
final_media.store(url=url)
|
||||||
result.set_final_media(final_media)
|
result.set_final_media(final_media)
|
||||||
|
|
||||||
if result.is_empty():
|
if result.is_empty():
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ class WhisperEnricher(Enricher):
|
|||||||
job_results = {}
|
job_results = {}
|
||||||
for i, m in enumerate(to_enrich.media):
|
for i, m in enumerate(to_enrich.media):
|
||||||
if m.is_video() or m.is_audio():
|
if m.is_video() or m.is_audio():
|
||||||
m.store()
|
m.store(url=url)
|
||||||
try:
|
try:
|
||||||
job_id = self.submit_job(m)
|
job_id = self.submit_job(m)
|
||||||
job_results[job_id] = False
|
job_results[job_id] = False
|
||||||
@@ -58,7 +58,7 @@ class WhisperEnricher(Enricher):
|
|||||||
job_id = to_enrich.media[i].get("whisper_model")["job_id"]
|
job_id = to_enrich.media[i].get("whisper_model")["job_id"]
|
||||||
to_enrich.media[i].set("whisper_model", {
|
to_enrich.media[i].set("whisper_model", {
|
||||||
"job_id": job_id,
|
"job_id": job_id,
|
||||||
self.action: job_results[job_id]
|
**job_results[job_id]
|
||||||
})
|
})
|
||||||
|
|
||||||
def submit_job(self, media: Media):
|
def submit_job(self, media: Media):
|
||||||
@@ -100,18 +100,16 @@ class WhisperEnricher(Enricher):
|
|||||||
r_res = requests.get(f'{self.api_endpoint}/jobs/{job_id}/artifacts', headers={'Authorization': f'Bearer {self.api_key}'})
|
r_res = requests.get(f'{self.api_endpoint}/jobs/{job_id}/artifacts', headers={'Authorization': f'Bearer {self.api_key}'})
|
||||||
assert r_res.status_code == 200, f"Job artifacts did not respond with 200, instead with: {r_res.status_code}"
|
assert r_res.status_code == 200, f"Job artifacts did not respond with 200, instead with: {r_res.status_code}"
|
||||||
logger.success(r_res.json())
|
logger.success(r_res.json())
|
||||||
result = []
|
result = {}
|
||||||
for artifact in r_res.json():
|
for art_id, artifact in enumerate(r_res.json()):
|
||||||
subtitle = []
|
subtitle = []
|
||||||
full_text = []
|
full_text = []
|
||||||
for i, d in enumerate(artifact.get("data")):
|
for i, d in enumerate(artifact.get("data")):
|
||||||
subtitle.append(f"{i+1}\n{d.get('start')} --> {d.get('end')}\n{d.get('text').strip()}")
|
subtitle.append(f"{i+1}\n{d.get('start')} --> {d.get('end')}\n{d.get('text').strip()}")
|
||||||
full_text.append(d.get('text').strip())
|
full_text.append(d.get('text').strip())
|
||||||
if not len(subtitle): continue
|
if not len(subtitle): continue
|
||||||
result.append({
|
result[f"artifact_{art_id}_subtitle"] = "\n".join(subtitle)
|
||||||
"subtitle": "\n".join(subtitle),
|
result[f"artifact_{art_id}_text"] = "\n".join(full_text)
|
||||||
"full_text": "\n".join(full_text),
|
|
||||||
})
|
|
||||||
return result
|
return result
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
@@ -42,7 +42,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.copy:hover {
|
.copy:hover {
|
||||||
font-weight: 600;
|
background: aliceblue;
|
||||||
cursor: copy;
|
cursor: copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ _MAJOR = "0"
|
|||||||
_MINOR = "5"
|
_MINOR = "5"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "6"
|
_PATCH = "8"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user