Move storage configs into individual manifests, assert format on useage.

This commit is contained in:
erinhmclark
2025-01-23 17:01:30 +00:00
parent c3403ced26
commit 50f4ebcdc3
5 changed files with 52 additions and 43 deletions

View File

@@ -15,29 +15,6 @@ from slugify import slugify
@dataclass
class Storage(Step):
name = "storage"
PATH_GENERATOR_OPTIONS = ["flat", "url", "random"]
FILENAME_GENERATOR_CHOICES = ["random", "static"]
def __init__(self, config: dict) -> None:
# without this STEP.__init__ is not called
super().__init__(config)
assert self.path_generator in Storage.PATH_GENERATOR_OPTIONS, f"path_generator must be one of {Storage.PATH_GENERATOR_OPTIONS}"
assert self.filename_generator in Storage.FILENAME_GENERATOR_CHOICES, f"filename_generator must be one of {Storage.FILENAME_GENERATOR_CHOICES}"
@staticmethod
def configs() -> dict:
return {
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
"choices": Storage.PATH_GENERATOR_OPTIONS
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"choices": Storage.FILENAME_GENERATOR_CHOICES
}
}
def init(name: str, config: dict) -> Storage:
# only for typing...
@@ -68,19 +45,27 @@ class Storage(Step):
folder = ArchivingContext.get("folder", "")
filename, ext = os.path.splitext(media.filename)
# path_generator logic
if self.path_generator == "flat":
# Handle path_generator logic
path_generator = ArchivingContext.get("path_generator", "url")
if path_generator == "flat":
path = ""
filename = slugify(filename) # in case it comes with os.sep
elif self.path_generator == "url": path = slugify(url)
elif self.path_generator == "random":
filename = slugify(filename) # Ensure filename is slugified
elif path_generator == "url":
path = slugify(url)
elif path_generator == "random":
path = ArchivingContext.get("random_path", random_str(24), True)
else:
raise ValueError(f"Invalid path_generator: {path_generator}")
# filename_generator logic
if self.filename_generator == "random": filename = random_str(24)
elif self.filename_generator == "static":
# Handle filename_generator logic
filename_generator = ArchivingContext.get("filename_generator", "random")
if filename_generator == "random":
filename = random_str(24)
elif filename_generator == "static":
he = HashEnricher({"hash_enricher": {"algorithm": ArchivingContext.get("hash_enricher.algorithm"), "chunksize": 1.6e7}})
hd = he.calculate_hash(media.filename)
filename = hd[:24]
else:
raise ValueError(f"Invalid filename_generator: {filename_generator}")
media.key = os.path.join(folder, path, f"{filename}{ext}")

View File

@@ -2,23 +2,25 @@
"name": "atlos_storage",
"type": ["storage"],
"requires_setup": True,
"external_dependencies": {
"python": ["loguru", "requests"],
"bin": [""]
},
"external_dependencies": {"python": ["loguru", "requests"], "bin": [""]},
"configs": {
# TODO: get base storage configs
# TODO also? get_atlos_config_options()
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
},
"api_token": {
"default": None,
"help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
"cli_set": lambda cli_val, _: cli_val
"cli_set": lambda cli_val, _: cli_val,
},
"atlos_url": {
"default": "https://platform.atlos.org",
"help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
"cli_set": lambda cli_val, _: cli_val
"cli_set": lambda cli_val, _: cli_val,
},
},
"description": """
@@ -34,5 +36,5 @@
### Notes
- Requires Atlos API configuration, including `atlos_url` and `api_token`.
- Files are linked to an `atlos_id` in the metadata, ensuring proper association with Atlos source materials.
"""
""",
}

View File

@@ -12,6 +12,14 @@ m = {
],
},
"configs": {
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
},
# TODO: get base storage configs
"root_folder_id": {"default": None, "help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'"},
"oauth_token": {"default": None, "help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account."},

View File

@@ -6,7 +6,14 @@ m = {
"python": ["loguru"],
},
"configs": {
# TODO: get base storage configs
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
},
"save_to": {"default": "./archived", "help": "folder where to save archived content"},
"save_absolute": {"default": False, "help": "whether the path to the stored file is absolute or relative in the output result inc. formatters (WARN: leaks the file structure)"},
},

View File

@@ -6,7 +6,14 @@ m = {
"python": ["boto3", "loguru"],
},
"configs": {
# TODO: get base storage configs
"path_generator": {
"default": "url",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
},
"filename_generator": {
"default": "random",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
},
"bucket": {"default": None, "help": "S3 bucket name"},
"region": {"default": None, "help": "S3 region name"},
"key": {"default": None, "help": "S3 API key"},