From 1942e8b819135128993b149e32702f459afaa0b3 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 24 Jan 2025 13:34:30 +0000 Subject: [PATCH] Gsheets utility revert --- src/auto_archiver/utils/gsheet.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/auto_archiver/utils/gsheet.py b/src/auto_archiver/utils/gsheet.py index 78f01c5..485344f 100644 --- a/src/auto_archiver/utils/gsheet.py +++ b/src/auto_archiver/utils/gsheet.py @@ -16,6 +16,36 @@ class Gsheets(Step): assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}" assert self.sheet is not None or self.sheet_id is not None, "You need to define either a 'sheet' name or a 'sheet_id' in your orchestration file when using gsheets." + # TODO merge this into gsheets processors manifest + @staticmethod + def configs() -> dict: + return { + "sheet": {"default": None, "help": "name of the sheet to archive"}, + "sheet_id": {"default": None, "help": "(alternative to sheet name) the id of the sheet to archive"}, + "header": {"default": 1, "help": "index of the header row (starts at 1)"}, + "service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path"}, + "columns": { + "default": { + 'url': 'link', + 'status': 'archive status', + 'folder': 'destination folder', + 'archive': 'archive location', + 'date': 'archive date', + 'thumbnail': 'thumbnail', + 'timestamp': 'upload timestamp', + 'title': 'upload title', + 'text': 'text content', + 'screenshot': 'screenshot', + 'hash': 'hash', + 'pdq_hash': 'perceptual hashes', + 'wacz': 'wacz', + 'replaywebpage': 'replaywebpage', + }, + "help": "names of columns in the google sheet (stringified JSON object)", + "cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val)) + }, + } + def open_sheet(self): if self.sheet: return self.gsheets_client.open(self.sheet)