mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 13:18:28 +03:00
WIP feeder
This commit is contained in:
42
src/steps/gsheet.py
Normal file
42
src/steps/gsheet.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import json, gspread
|
||||
|
||||
from loguru import logger
|
||||
from steps.step import Step
|
||||
|
||||
|
||||
class Gsheets(Step):
|
||||
name = "gsheets"
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
self.gsheets_client = gspread.service_account(filename=self.service_account)
|
||||
assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}"
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict:
|
||||
return {
|
||||
"sheet": {"default": None, "help": "name of the sheet to archive"},
|
||||
"header": {"default": 1, "help": "index of the header row (starts at 1)"},
|
||||
"service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path"},
|
||||
"columns": {
|
||||
"default": {
|
||||
'url': 'link',
|
||||
'status': 'archive status',
|
||||
'folder': 'destination folder',
|
||||
'archive': 'archive location',
|
||||
'date': 'archive date',
|
||||
'thumbnail': 'thumbnail',
|
||||
'thumbnail_index': 'thumbnail index',
|
||||
'timestamp': 'upload timestamp',
|
||||
'title': 'upload title',
|
||||
'duration': 'duration',
|
||||
'screenshot': 'screenshot',
|
||||
'hash': 'hash',
|
||||
'wacz': 'wacz',
|
||||
'replaywebpage': 'replaywebpage',
|
||||
},
|
||||
"help": "names of columns in the google sheet",
|
||||
"cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
|
||||
},
|
||||
}
|
||||
36
src/steps/step.py
Normal file
36
src/steps/step.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from inspect import ClassFoundException
|
||||
from typing import Type
|
||||
from metadata import Metadata
|
||||
from abc import ABC
|
||||
# from collections.abc import Iterable
|
||||
|
||||
|
||||
@dataclass
|
||||
class Step(ABC):
|
||||
name: str = None
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
# reads the configs into object properties
|
||||
# self.config = config[self.name]
|
||||
for k, v in config[self.name].items():
|
||||
self.__setattr__(k, v)
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict: {}
|
||||
|
||||
def init(name: str, config: dict, child: Type[Step]) -> Step:
|
||||
"""
|
||||
cannot find subclasses of child.subclasses
|
||||
"""
|
||||
for sub in child.__subclasses__():
|
||||
if sub.name == name:
|
||||
print(sub.name, "CALLING NEW")
|
||||
return sub(config)
|
||||
raise ClassFoundException(f"Unable to initialize STEP with {name=}")
|
||||
|
||||
def get_url(self, item: Metadata) -> str:
|
||||
url = item.get("url")
|
||||
assert type(url) is str and len(url) > 0
|
||||
return url
|
||||
Reference in New Issue
Block a user