Compare commits

...

7 Commits

Author SHA1 Message Date
msramalho
3e44b9b577 Bump version to v0.5.16 for release 2023-05-23 18:12:56 +01:00
msramalho
1a5797d0f8 feat: orchestrator fed returns archive result 2023-05-23 18:12:04 +01:00
msramalho
768b8fce9f Bump version to v0.5.15 for release 2023-05-19 12:35:26 +01:00
msramalho
613b1f1e50 properly overwrite configs 2023-05-19 12:35:19 +01:00
msramalho
919c37bfb6 Bump version to v0.5.14 for release 2023-05-19 12:18:02 +01:00
msramalho
a655b3c987 gsheet accepts ID too 2023-05-19 12:17:34 +01:00
msramalho
d645b840ee disable duplicate GH actions 2023-05-19 12:17:03 +01:00
9 changed files with 28 additions and 11 deletions

View File

@@ -9,7 +9,7 @@ on:
release:
types: [published]
push:
branches: [ "main" ]
# branches: [ "main" ]
tags: [ "v*.*.*" ]
env:

View File

@@ -12,7 +12,7 @@ on:
release:
types: [published]
push:
branches: [ "main" ]
# branches: [ "main" ]
tags: [ "v*.*.*" ]
permissions:

View File

@@ -13,6 +13,7 @@ from ..formatters import Formatter
from ..storages import Storage
from ..enrichers import Enricher
from . import Step
from ..utils import update_nested_dict
@dataclass
@@ -38,7 +39,7 @@ class Config:
self.cli_ops = {}
self.config = {}
def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs:str={}):
def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
"""
if yaml_config_filename is provided, the --config argument is ignored,
useful for library usage when the config values are preloaded
@@ -81,7 +82,7 @@ class Config:
# 2. read YAML config file (or use provided value)
self.yaml_config = self.read_yaml(yaml_config_filename)
self.yaml_config.update(overwrite_configs) # optional override programmatically
update_nested_dict(self.yaml_config, overwrite_configs)
# 3. CONFIGS: decide value with priority: CLI >> config.yaml >> default
self.config = defaultdict(dict)

View File

@@ -31,7 +31,7 @@ class ArchivingOrchestrator:
def feed(self) -> None:
for item in self.feeder:
self.feed_item(item)
yield self.feed_item(item)
def feed_item(self, item: Metadata) -> Metadata:
try:

View File

@@ -21,7 +21,7 @@ class Step(ABC):
def init(name: str, config: dict, child: Type[Step]) -> Step:
"""
looks into direct subclasses of child for name and returns such ab object
looks into direct subclasses of child for name and returns such an object
TODO: cannot find subclasses of child.subclasses
"""
for sub in child.__subclasses__():

View File

@@ -39,7 +39,7 @@ class GsheetsFeeder(Gsheets, Feeder):
})
def __iter__(self) -> Metadata:
sh = self.gsheets_client.open(self.sheet)
sh = self.open_sheet()
for ii, wks in enumerate(sh.worksheets()):
if not self.should_process_sheet(wks.title):
logger.debug(f"SKIPPED worksheet '{wks.title}' due to allow/block rules")

View File

@@ -10,16 +10,17 @@ class Gsheets(Step):
# without this STEP.__init__ is not called
super().__init__(config)
self.gsheets_client = gspread.service_account(filename=self.service_account)
#TODO: config should be responsible for conversions
# TODO: config should be responsible for conversions
try: self.header = int(self.header)
except: pass
assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}"
assert self.sheet is not None, "You need to define a sheet name in your orchestration file when using gsheets."
assert self.sheet is not None or self.sheet_id is not None, "You need to define either a 'sheet' name or a 'sheet_id' in your orchestration file when using gsheets."
@staticmethod
def configs() -> dict:
return {
"sheet": {"default": None, "help": "name of the sheet to archive"},
"sheet_id": {"default": None, "help": "(alternative to sheet name) the id of the sheet to archive"},
"header": {"default": 1, "help": "index of the header row (starts at 1)"},
"service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path"},
"columns": {
@@ -41,4 +42,10 @@ class Gsheets(Step):
"help": "names of columns in the google sheet (stringified JSON object)",
"cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
},
}
}
def open_sheet(self):
if self.sheet:
return self.gsheets_client.open(self.sheet)
else: # self.sheet_id
return self.gsheets_client.open_by_key(self.sheet_id)

View File

@@ -40,3 +40,12 @@ class DateTimeEncoder(json.JSONEncoder):
def dump_payload(p):
return json.dumps(p, ensure_ascii=False, indent=4, cls=DateTimeEncoder)
def update_nested_dict(dictionary, update_dict):
# takes 2 dicts and overwrites the first with the second only on the changed balues
for key, value in update_dict.items():
if key in dictionary and isinstance(value, dict) and isinstance(dictionary[key], dict):
update_nested_dict(dictionary[key], value)
else:
dictionary[key] = value

View File

@@ -3,7 +3,7 @@ _MAJOR = "0"
_MINOR = "5"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "13"
_PATCH = "16"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""