bump to patch 23

Fix ValueError
fix: on missing col
2026-06-12 21:28:29 +03:00 · 2023-06-06 12:24:43 -06:00 · 2023-06-06 12:13:08 -06:00 · 2023-05-24 20:25:30 +01:00 · 2023-05-24 20:24:35 +01:00 · 2023-05-24 20:24:15 +01:00
14 changed files with 80 additions and 22 deletions
--- a/.github/workflows/docker-publish.yaml
+++ b/.github/workflows/docker-publish.yaml
@@ -9,7 +9,7 @@ on:
  release:
    types: [published]
  push:
-    branches: [ "main" ]
+    # branches: [ "main" ]
    tags: [ "v*.*.*" ]
 env:
--- a/.github/workflows/python-publish.yaml
+++ b/.github/workflows/python-publish.yaml
@@ -12,7 +12,7 @@ on:
  release:
    types: [published]
  push:
-    branches: [ "main" ]
+    # branches: [ "main" ]
    tags: [ "v*.*.*" ]
 permissions:
--- a/src/auto_archiver/main.py
+++ b/src/auto_archiver/main.py
@@ -5,7 +5,7 @@ def main():
    config = Config()
    config.parse()
    orchestrator = ArchivingOrchestrator(config)
-    orchestrator.feed()
+    for r in orchestrator.feed(): pass
 if __name__ == "__main__":
--- a/src/auto_archiver/core/config.py
+++ b/src/auto_archiver/core/config.py
@@ -13,6 +13,7 @@ from ..formatters import Formatter
 from ..storages import Storage
 from ..enrichers import Enricher
 from . import Step
 from ..utils import update_nested_dict
@dataclass
@@ -38,7 +39,7 @@ class Config:
        self.cli_ops = {}
        self.config = {}
-    def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs:str={}):
+    def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
        """
        if yaml_config_filename is provided, the --config argument is ignored, 
        useful for library usage when the config values are preloaded
@@ -81,7 +82,7 @@ class Config:
        # 2. read YAML config file (or use provided value)
        self.yaml_config = self.read_yaml(yaml_config_filename)
-        self.yaml_config.update(overwrite_configs) # optional override programmatically
+        update_nested_dict(self.yaml_config, overwrite_configs)
        # 3. CONFIGS: decide value with priority: CLI >> config.yaml >> default
        self.config = defaultdict(dict)
--- a/src/auto_archiver/core/media.py
+++ b/src/auto_archiver/core/media.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
-from ast import List
+from typing import Any, List
 from typing import Any
 from dataclasses import dataclass, field
 from dataclasses_json import dataclass_json, config
 import mimetypes
--- a/src/auto_archiver/core/metadata.py
+++ b/src/auto_archiver/core/metadata.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
-from ast import List, Set
+from typing import Any, List, Union, Dict
 from typing import Any, Union, Dict
 from dataclasses import dataclass, field
 from dataclasses_json import dataclass_json, config
 import datetime
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
-from ast import List
+from typing import Generator, Union, List
 from typing import Union
 from .context import ArchivingContext
@@ -10,7 +9,6 @@ from ..formatters import Formatter
 from ..storages import Storage
 from ..enrichers import Enricher
 from ..databases import Database
 from .media import Media
 from .metadata import Metadata
 import tempfile, traceback
@@ -29,9 +27,9 @@ class ArchivingOrchestrator:
        for a in self.archivers: a.setup()
-    def feed(self) -> None:
+    def feed(self) -> Generator[Metadata]:
        for item in self.feeder:
-            self.feed_item(item)
+            yield self.feed_item(item)
    def feed_item(self, item: Metadata) -> Metadata:
        try:
--- a/src/auto_archiver/core/step.py
+++ b/src/auto_archiver/core/step.py
@@ -21,7 +21,7 @@ class Step(ABC):
    def init(name: str, config: dict, child: Type[Step]) -> Step:
        """
-        looks into direct subclasses of child for name and returns such ab object
+        looks into direct subclasses of child for name and returns such an object
        TODO: cannot find subclasses of child.subclasses
        """
        for sub in child.__subclasses__():
--- a/src/auto_archiver/databases/init.py
+++ b/src/auto_archiver/databases/init.py
@@ -2,3 +2,4 @@ from .database import Database
 from .gsheet_db import GsheetsDb
 from .console_db import ConsoleDb
 from .csv_db import CSVDb
 from .api_db import AAApiDb
--- a/src/auto_archiver/databases/api_db.py
+++ b/src/auto_archiver/databases/api_db.py
@@ -0,0 +1,41 @@
 import requests, os
 from loguru import logger
 from . import Database
 from ..core import Metadata
 class AAApiDb(Database):
    """
        Connects to auto-archiver-api instance
    """
    name = "auto_archiver_api_db"
    def __init__(self, config: dict) -> None:
        # without this STEP.__init__ is not called
        super().__init__(config)
        self.assert_valid_string("api_endpoint")
        self.assert_valid_string("api_secret")
    @staticmethod
    def configs() -> dict:
        return {
            "api_endpoint": {"default": None, "help": "API endpoint where calls are made to"},
            "api_secret": {"default": None, "help": "API authentication secret"},
            "public": {"default": False, "help": "whether the URL should be publicly available via the API"},
            "author_id": {"default": None, "help": "which email to assign as author"},
            "group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
            "tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
        }
    def done(self, item: Metadata) -> None:
        """archival result ready - should be saved to DB"""
        logger.info(f"saving archive of {item.get_url()} to the AA API.")
        payload = {'result': item.to_json(), 'public': self.public, 'author_id': self.author_id, 'group_id': self.group_id, 'tags': list(self.tags)}
        response = requests.post(os.path.join(self.api_endpoint, "submit-archive"), json=payload, auth=("abc", self.api_secret))
        if response.status_code == 200:
            logger.success(f"AA API: {response.json()}")
        else:
            logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
--- a/src/auto_archiver/feeders/gsheet_feeder.py
+++ b/src/auto_archiver/feeders/gsheet_feeder.py
@@ -39,7 +39,7 @@ class GsheetsFeeder(Gsheets, Feeder):
            })
    def __iter__(self) -> Metadata:
-        sh = self.gsheets_client.open(self.sheet)
+        sh = self.open_sheet()
        for ii, wks in enumerate(sh.worksheets()):
            if not self.should_process_sheet(wks.title):
                logger.debug(f"SKIPPED worksheet '{wks.title}' due to allow/block rules")
@@ -64,7 +64,10 @@ class GsheetsFeeder(Gsheets, Feeder):
                # All checks done - archival process starts here
                m = Metadata().set_url(url)
                ArchivingContext.set("gsheet", {"row": row, "worksheet": gw}, keep_on_reset=True)
-                folder = slugify(gw.get_cell(row, 'folder').strip())
+                if gw.get_cell_or_default(row, 'folder', "") is None:
                    folder = ''
                else:
                    folder = slugify(gw.get_cell_or_default(row, 'folder', "").strip())
                if len(folder):
                    if self.use_sheet_names_in_stored_paths:
                        ArchivingContext.set("folder", os.path.join(folder, slugify(self.sheet), slugify(wks.title)), True)
--- a/src/auto_archiver/utils/gsheet.py
+++ b/src/auto_archiver/utils/gsheet.py
@@ -10,16 +10,17 @@ class Gsheets(Step):
        # without this STEP.__init__ is not called
        super().__init__(config)
        self.gsheets_client = gspread.service_account(filename=self.service_account)
-        #TODO: config should be responsible for conversions
+        # TODO: config should be responsible for conversions
        try: self.header = int(self.header)
        except: pass
        assert type(self.header) == int, f"header ({self.header}) value must be an integer not {type(self.header)}"
-        assert self.sheet is not None, "You need to define a sheet name in your orchestration file when using gsheets."
+        assert self.sheet is not None or self.sheet_id is not None, "You need to define either a 'sheet' name or a 'sheet_id' in your orchestration file when using gsheets."
    @staticmethod
    def configs() -> dict:
        return {
            "sheet": {"default": None, "help": "name of the sheet to archive"},
            "sheet_id": {"default": None, "help": "(alternative to sheet name) the id of the sheet to archive"},
            "header": {"default": 1, "help": "index of the header row (starts at 1)"},
            "service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path"},
            "columns": {
@@ -42,3 +43,9 @@ class Gsheets(Step):
                "cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
            },
        }
    def open_sheet(self):
        if self.sheet:
            return self.gsheets_client.open(self.sheet)
        else:  # self.sheet_id
            return self.gsheets_client.open_by_key(self.sheet_id)
--- a/src/auto_archiver/utils/misc.py
+++ b/src/auto_archiver/utils/misc.py
@@ -40,3 +40,12 @@ class DateTimeEncoder(json.JSONEncoder):
 def dump_payload(p):
    return json.dumps(p, ensure_ascii=False, indent=4, cls=DateTimeEncoder)
 def update_nested_dict(dictionary, update_dict):
    # takes 2 dicts and overwrites the first with the second only on the changed balues
    for key, value in update_dict.items():
        if key in dictionary and isinstance(value, dict) and isinstance(dictionary[key], dict):
            update_nested_dict(dictionary[key], value)
        else:
            dictionary[key] = value
--- a/src/auto_archiver/version.py
+++ b/src/auto_archiver/version.py
@@ -3,7 +3,7 @@ _MAJOR = "0"
 _MINOR = "5"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "13"
+_PATCH = "23"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""
Author	SHA1	Message	Date
Logan Williams	cc66ee3fd4	bump to patch 23	2023-06-06 12:24:43 -06:00
Logan Williams	b3b727b005	Fix ValueError	2023-06-06 12:13:08 -06:00
msramalho	ee37b20e6c	fix: on missing col	2023-05-24 20:25:30 +01:00
msramalho	a184bf7b97	Bump version to v0.5.20 for release	2023-05-24 20:24:35 +01:00
msramalho	e535f44a88	optional folder	2023-05-24 20:24:15 +01:00
msramalho	0f28bf0e35	Bump version to v0.5.19 for release	2023-05-24 19:57:51 +01:00
msramalho	18a8636552	feat: new DB for auto-archiver-api	2023-05-24 19:24:53 +01:00
msramalho	81be65c828	Bump version to v0.5.18 for release	2023-05-24 11:19:02 +01:00
msramalho	0a91863212	typing fixes	2023-05-24 11:18:39 +01:00
msramalho	3ad8349e3f	Bump version to v0.5.17 for release	2023-05-23 19:05:53 +01:00
msramalho	2768225cd1	fix: generator not called	2023-05-23 19:05:47 +01:00
msramalho	3e44b9b577	Bump version to v0.5.16 for release	2023-05-23 18:12:56 +01:00
msramalho	1a5797d0f8	feat: orchestrator fed returns archive result	2023-05-23 18:12:04 +01:00
msramalho	768b8fce9f	Bump version to v0.5.15 for release	2023-05-19 12:35:26 +01:00
msramalho	613b1f1e50	properly overwrite configs	2023-05-19 12:35:19 +01:00
msramalho	919c37bfb6	Bump version to v0.5.14 for release	2023-05-19 12:18:02 +01:00
msramalho	a655b3c987	gsheet accepts ID too	2023-05-19 12:17:34 +01:00
msramalho	d645b840ee	disable duplicate GH actions	2023-05-19 12:17:03 +01:00