Compare commits

..

14 Commits

Author SHA1 Message Date
Logan Williams
b3b727b005 Fix ValueError 2023-06-06 12:13:08 -06:00
msramalho
ee37b20e6c fix: on missing col 2023-05-24 20:25:30 +01:00
msramalho
a184bf7b97 Bump version to v0.5.20 for release 2023-05-24 20:24:35 +01:00
msramalho
e535f44a88 optional folder 2023-05-24 20:24:15 +01:00
msramalho
0f28bf0e35 Bump version to v0.5.19 for release 2023-05-24 19:57:51 +01:00
msramalho
18a8636552 feat: new DB for auto-archiver-api 2023-05-24 19:24:53 +01:00
msramalho
81be65c828 Bump version to v0.5.18 for release 2023-05-24 11:19:02 +01:00
msramalho
0a91863212 typing fixes 2023-05-24 11:18:39 +01:00
msramalho
3ad8349e3f Bump version to v0.5.17 for release 2023-05-23 19:05:53 +01:00
msramalho
2768225cd1 fix: generator not called 2023-05-23 19:05:47 +01:00
msramalho
3e44b9b577 Bump version to v0.5.16 for release 2023-05-23 18:12:56 +01:00
msramalho
1a5797d0f8 feat: orchestrator fed returns archive result 2023-05-23 18:12:04 +01:00
msramalho
768b8fce9f Bump version to v0.5.15 for release 2023-05-19 12:35:26 +01:00
msramalho
613b1f1e50 properly overwrite configs 2023-05-19 12:35:19 +01:00
10 changed files with 66 additions and 15 deletions

View File

@@ -5,7 +5,7 @@ def main():
config = Config()
config.parse()
orchestrator = ArchivingOrchestrator(config)
orchestrator.feed()
for r in orchestrator.feed(): pass
if __name__ == "__main__":

View File

@@ -13,6 +13,7 @@ from ..formatters import Formatter
from ..storages import Storage
from ..enrichers import Enricher
from . import Step
from ..utils import update_nested_dict
@dataclass
@@ -38,7 +39,7 @@ class Config:
self.cli_ops = {}
self.config = {}
def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs:str={}):
def parse(self, use_cli=True, yaml_config_filename: str = None, overwrite_configs: str = {}):
"""
if yaml_config_filename is provided, the --config argument is ignored,
useful for library usage when the config values are preloaded
@@ -81,7 +82,7 @@ class Config:
# 2. read YAML config file (or use provided value)
self.yaml_config = self.read_yaml(yaml_config_filename)
self.yaml_config.update(overwrite_configs) # optional override programmatically
update_nested_dict(self.yaml_config, overwrite_configs)
# 3. CONFIGS: decide value with priority: CLI >> config.yaml >> default
self.config = defaultdict(dict)

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
from ast import List
from typing import Any
from typing import Any, List
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json, config
import mimetypes

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
from ast import List, Set
from typing import Any, Union, Dict
from typing import Any, List, Union, Dict
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json, config
import datetime

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
from ast import List
from typing import Union
from typing import Generator, Union, List
from .context import ArchivingContext
@@ -10,7 +9,6 @@ from ..formatters import Formatter
from ..storages import Storage
from ..enrichers import Enricher
from ..databases import Database
from .media import Media
from .metadata import Metadata
import tempfile, traceback
@@ -29,9 +27,9 @@ class ArchivingOrchestrator:
for a in self.archivers: a.setup()
def feed(self) -> None:
def feed(self) -> Generator[Metadata]:
for item in self.feeder:
self.feed_item(item)
yield self.feed_item(item)
def feed_item(self, item: Metadata) -> Metadata:
try:

View File

@@ -1,4 +1,5 @@
from .database import Database
from .gsheet_db import GsheetsDb
from .console_db import ConsoleDb
from .csv_db import CSVDb
from .csv_db import CSVDb
from .api_db import AAApiDb

View File

@@ -0,0 +1,41 @@
import requests, os
from loguru import logger
from . import Database
from ..core import Metadata
class AAApiDb(Database):
"""
Connects to auto-archiver-api instance
"""
name = "auto_archiver_api_db"
def __init__(self, config: dict) -> None:
# without this STEP.__init__ is not called
super().__init__(config)
self.assert_valid_string("api_endpoint")
self.assert_valid_string("api_secret")
@staticmethod
def configs() -> dict:
return {
"api_endpoint": {"default": None, "help": "API endpoint where calls are made to"},
"api_secret": {"default": None, "help": "API authentication secret"},
"public": {"default": False, "help": "whether the URL should be publicly available via the API"},
"author_id": {"default": None, "help": "which email to assign as author"},
"group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
"tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
}
def done(self, item: Metadata) -> None:
"""archival result ready - should be saved to DB"""
logger.info(f"saving archive of {item.get_url()} to the AA API.")
payload = {'result': item.to_json(), 'public': self.public, 'author_id': self.author_id, 'group_id': self.group_id, 'tags': list(self.tags)}
response = requests.post(os.path.join(self.api_endpoint, "submit-archive"), json=payload, auth=("abc", self.api_secret))
if response.status_code == 200:
logger.success(f"AA API: {response.json()}")
else:
logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")

View File

@@ -64,7 +64,10 @@ class GsheetsFeeder(Gsheets, Feeder):
# All checks done - archival process starts here
m = Metadata().set_url(url)
ArchivingContext.set("gsheet", {"row": row, "worksheet": gw}, keep_on_reset=True)
folder = slugify(gw.get_cell(row, 'folder').strip())
if gw.get_cell_or_default(row, 'folder', "") is None:
folder = ''
else:
folder = slugify(gw.get_cell_or_default(row, 'folder', "").strip())
if len(folder):
if self.use_sheet_names_in_stored_paths:
ArchivingContext.set("folder", os.path.join(folder, slugify(self.sheet), slugify(wks.title)), True)

View File

@@ -40,3 +40,12 @@ class DateTimeEncoder(json.JSONEncoder):
def dump_payload(p):
return json.dumps(p, ensure_ascii=False, indent=4, cls=DateTimeEncoder)
def update_nested_dict(dictionary, update_dict):
# takes 2 dicts and overwrites the first with the second only on the changed balues
for key, value in update_dict.items():
if key in dictionary and isinstance(value, dict) and isinstance(dictionary[key], dict):
update_nested_dict(dictionary[key], value)
else:
dictionary[key] = value

View File

@@ -3,7 +3,7 @@ _MAJOR = "0"
_MINOR = "5"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "14"
_PATCH = "20"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""