s3 storaging + WIP gsheets DB

This commit is contained in:
msramalho
2023-01-04 18:02:44 +00:00
parent bb512b36c9
commit 1cdc006b27
9 changed files with 110 additions and 37 deletions

View File

@@ -1,5 +1,5 @@
from typing import Union, Tuple
import gspread
import gspread, datetime
# from metadata import Metadata
from loguru import logger
@@ -7,6 +7,7 @@ from loguru import logger
# from . import Enricher
from databases import Database
from metadata import Metadata
from media import Media
from steps.gsheet import Gsheets
from utils import GWorksheet
@@ -48,8 +49,37 @@ class GsheetsDb(Database):
"""archival result ready - should be saved to DB"""
logger.success(f"DONE {item}")
gw, row = self._retrieve_gsheet(item)
self._safe_status_update(item, 'done')
pass
# self._safe_status_update(item, 'done')
cell_updates = []
row_values = gw.get_row(row)
def batch_if_valid(col, val, final_value=None):
final_value = final_value or val
if val and gw.col_exists(col) and gw.get_cell(row_values, col) == '':
cell_updates.append((row, col, final_value))
cell_updates.append((row, 'status', item.status))
media: Media = item.get_single_media()
batch_if_valid('archive', media.cdn_url)
batch_if_valid('date', True, datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat())
batch_if_valid('title', item.get_title())
batch_if_valid('text', item.get("content", "")[:500])
batch_if_valid('timestamp', item.get_timestamp())
# TODO: AFTER ENRICHMENTS
# batch_if_valid('hash', media.hash)
# batch_if_valid('thumbnail', result.thumbnail, f'=IMAGE("{result.thumbnail}")')
# batch_if_valid('thumbnail_index', result.thumbnail_index)
# batch_if_valid('duration', result.duration, str(result.duration))
# batch_if_valid('screenshot', result.screenshot)
# if result.wacz is not None:
# batch_if_valid('wacz', result.wacz)
# batch_if_valid('replaywebpage', f'https://replayweb.page/?source={quote(result.wacz)}#view=pages&url={quote(url)}')
gw.batch_set_cell(cell_updates)
def _safe_status_update(self, item: Metadata, new_status: str) -> None:
try: