mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 05:08:28 +03:00
s3 storaging + WIP gsheets DB
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from typing import Union, Tuple
|
||||
import gspread
|
||||
import gspread, datetime
|
||||
|
||||
# from metadata import Metadata
|
||||
from loguru import logger
|
||||
@@ -7,6 +7,7 @@ from loguru import logger
|
||||
# from . import Enricher
|
||||
from databases import Database
|
||||
from metadata import Metadata
|
||||
from media import Media
|
||||
from steps.gsheet import Gsheets
|
||||
from utils import GWorksheet
|
||||
|
||||
@@ -48,8 +49,37 @@ class GsheetsDb(Database):
|
||||
"""archival result ready - should be saved to DB"""
|
||||
logger.success(f"DONE {item}")
|
||||
gw, row = self._retrieve_gsheet(item)
|
||||
self._safe_status_update(item, 'done')
|
||||
pass
|
||||
# self._safe_status_update(item, 'done')
|
||||
|
||||
cell_updates = []
|
||||
row_values = gw.get_row(row)
|
||||
|
||||
def batch_if_valid(col, val, final_value=None):
|
||||
final_value = final_value or val
|
||||
if val and gw.col_exists(col) and gw.get_cell(row_values, col) == '':
|
||||
cell_updates.append((row, col, final_value))
|
||||
|
||||
cell_updates.append((row, 'status', item.status))
|
||||
|
||||
media: Media = item.get_single_media()
|
||||
|
||||
batch_if_valid('archive', media.cdn_url)
|
||||
batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())
|
||||
batch_if_valid('title', item.get_title())
|
||||
batch_if_valid('text', item.get("content", "")[:500])
|
||||
batch_if_valid('timestamp', item.get_timestamp())
|
||||
|
||||
# TODO: AFTER ENRICHMENTS
|
||||
# batch_if_valid('hash', media.hash)
|
||||
# batch_if_valid('thumbnail', result.thumbnail, f'=IMAGE("{result.thumbnail}")')
|
||||
# batch_if_valid('thumbnail_index', result.thumbnail_index)
|
||||
# batch_if_valid('duration', result.duration, str(result.duration))
|
||||
# batch_if_valid('screenshot', result.screenshot)
|
||||
# if result.wacz is not None:
|
||||
# batch_if_valid('wacz', result.wacz)
|
||||
# batch_if_valid('replaywebpage', f'https://replayweb.page/?source={quote(result.wacz)}#view=pages&url={quote(url)}')
|
||||
|
||||
gw.batch_set_cell(cell_updates)
|
||||
|
||||
def _safe_status_update(self, item: Metadata, new_status: str) -> None:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user