diff --git a/src/auto_archiver/base_modules/storage.py b/src/auto_archiver/base_modules/storage.py index 147da1f..da6b2ef 100644 --- a/src/auto_archiver/base_modules/storage.py +++ b/src/auto_archiver/base_modules/storage.py @@ -15,29 +15,6 @@ from slugify import slugify @dataclass class Storage(Step): name = "storage" - PATH_GENERATOR_OPTIONS = ["flat", "url", "random"] - FILENAME_GENERATOR_CHOICES = ["random", "static"] - - def __init__(self, config: dict) -> None: - # without this STEP.__init__ is not called - super().__init__(config) - assert self.path_generator in Storage.PATH_GENERATOR_OPTIONS, f"path_generator must be one of {Storage.PATH_GENERATOR_OPTIONS}" - assert self.filename_generator in Storage.FILENAME_GENERATOR_CHOICES, f"filename_generator must be one of {Storage.FILENAME_GENERATOR_CHOICES}" - - @staticmethod - def configs() -> dict: - return { - "path_generator": { - "default": "url", - "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.", - "choices": Storage.PATH_GENERATOR_OPTIONS - }, - "filename_generator": { - "default": "random", - "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.", - "choices": Storage.FILENAME_GENERATOR_CHOICES - } - } def init(name: str, config: dict) -> Storage: # only for typing... @@ -68,19 +45,27 @@ class Storage(Step): folder = ArchivingContext.get("folder", "") filename, ext = os.path.splitext(media.filename) - # path_generator logic - if self.path_generator == "flat": + # Handle path_generator logic + path_generator = ArchivingContext.get("path_generator", "url") + if path_generator == "flat": path = "" - filename = slugify(filename) # in case it comes with os.sep - elif self.path_generator == "url": path = slugify(url) - elif self.path_generator == "random": + filename = slugify(filename) # Ensure filename is slugified + elif path_generator == "url": + path = slugify(url) + elif path_generator == "random": path = ArchivingContext.get("random_path", random_str(24), True) + else: + raise ValueError(f"Invalid path_generator: {path_generator}") - # filename_generator logic - if self.filename_generator == "random": filename = random_str(24) - elif self.filename_generator == "static": + # Handle filename_generator logic + filename_generator = ArchivingContext.get("filename_generator", "random") + if filename_generator == "random": + filename = random_str(24) + elif filename_generator == "static": he = HashEnricher({"hash_enricher": {"algorithm": ArchivingContext.get("hash_enricher.algorithm"), "chunksize": 1.6e7}}) hd = he.calculate_hash(media.filename) filename = hd[:24] + else: + raise ValueError(f"Invalid filename_generator: {filename_generator}") media.key = os.path.join(folder, path, f"{filename}{ext}") diff --git a/src/auto_archiver/modules/atlos/__manifest__.py b/src/auto_archiver/modules/atlos/__manifest__.py index cc357e3..c600e43 100644 --- a/src/auto_archiver/modules/atlos/__manifest__.py +++ b/src/auto_archiver/modules/atlos/__manifest__.py @@ -2,23 +2,25 @@ "name": "atlos_storage", "type": ["storage"], "requires_setup": True, - "external_dependencies": { - "python": ["loguru", "requests"], - "bin": [""] - }, + "external_dependencies": {"python": ["loguru", "requests"], "bin": [""]}, "configs": { - # TODO: get base storage configs - # TODO also? get_atlos_config_options() - + "path_generator": { + "default": "url", + "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.", + }, + "filename_generator": { + "default": "random", + "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.", + }, "api_token": { "default": None, "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/", - "cli_set": lambda cli_val, _: cli_val + "cli_set": lambda cli_val, _: cli_val, }, "atlos_url": { "default": "https://platform.atlos.org", "help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.", - "cli_set": lambda cli_val, _: cli_val + "cli_set": lambda cli_val, _: cli_val, }, }, "description": """ @@ -34,5 +36,5 @@ ### Notes - Requires Atlos API configuration, including `atlos_url` and `api_token`. - Files are linked to an `atlos_id` in the metadata, ensuring proper association with Atlos source materials. - """ + """, } diff --git a/src/auto_archiver/modules/gdrive_storage/__manifest__.py b/src/auto_archiver/modules/gdrive_storage/__manifest__.py index cc598e2..e7e4650 100644 --- a/src/auto_archiver/modules/gdrive_storage/__manifest__.py +++ b/src/auto_archiver/modules/gdrive_storage/__manifest__.py @@ -12,6 +12,14 @@ m = { ], }, "configs": { + "path_generator": { + "default": "url", + "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.", + }, + "filename_generator": { + "default": "random", + "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.", + }, # TODO: get base storage configs "root_folder_id": {"default": None, "help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'"}, "oauth_token": {"default": None, "help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account."}, diff --git a/src/auto_archiver/modules/local_storage/__manifest__.py b/src/auto_archiver/modules/local_storage/__manifest__.py index 5220555..7247885 100644 --- a/src/auto_archiver/modules/local_storage/__manifest__.py +++ b/src/auto_archiver/modules/local_storage/__manifest__.py @@ -6,7 +6,14 @@ m = { "python": ["loguru"], }, "configs": { - # TODO: get base storage configs + "path_generator": { + "default": "url", + "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.", + }, + "filename_generator": { + "default": "random", + "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.", + }, "save_to": {"default": "./archived", "help": "folder where to save archived content"}, "save_absolute": {"default": False, "help": "whether the path to the stored file is absolute or relative in the output result inc. formatters (WARN: leaks the file structure)"}, }, diff --git a/src/auto_archiver/modules/s3_storage/__manifest__.py b/src/auto_archiver/modules/s3_storage/__manifest__.py index 239e0fe..210eefa 100644 --- a/src/auto_archiver/modules/s3_storage/__manifest__.py +++ b/src/auto_archiver/modules/s3_storage/__manifest__.py @@ -6,7 +6,14 @@ m = { "python": ["boto3", "loguru"], }, "configs": { - # TODO: get base storage configs + "path_generator": { + "default": "url", + "help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.", + }, + "filename_generator": { + "default": "random", + "help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.", + }, "bucket": {"default": None, "help": "S3 bucket name"}, "region": {"default": None, "help": "S3 region name"}, "key": {"default": None, "help": "S3 API key"},