Update modules for new core structure.

This commit is contained in:
erinhmclark
2025-01-30 08:42:23 +00:00
parent 18ff36ce15
commit cddae65a90
35 changed files with 307 additions and 233 deletions

View File

@@ -1 +1 @@
from .s3 import S3Storage
from .s3_storage import S3Storage

View File

@@ -7,12 +7,12 @@
},
"configs": {
"path_generator": {
"default": "url",
"default": "flat",
"help": "how to store the file in terms of directory structure: 'flat' sets to root; 'url' creates a directory based on the provided URL; 'random' creates a random directory.",
"choices": ["flat", "url", "random"],
},
"filename_generator": {
"default": "random",
"default": "static",
"help": "how to name stored files: 'random' creates a random string; 'static' uses a replicable strategy such as a hash.",
"choices": ["random", "static"],
},
@@ -20,7 +20,9 @@
"region": {"default": None, "help": "S3 region name"},
"key": {"default": None, "help": "S3 API key"},
"secret": {"default": None, "help": "S3 API secret"},
"random_no_duplicate": {"default": False, "help": "if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `no-dups/`"},
"random_no_duplicate": {"default": False,
"type": "bool",
"help": "if set, it will override `path_generator`, `filename_generator` and `folder`. It will check if the file already exists and if so it will not upload it again. Creates a new root folder path `no-dups/`"},
"endpoint_url": {
"default": 'https://{region}.digitaloceanspaces.com',
"help": "S3 bucket endpoint, {region} are inserted at runtime"
@@ -29,7 +31,9 @@
"default": 'https://{bucket}.{region}.cdn.digitaloceanspaces.com/{key}',
"help": "S3 CDN url, {bucket}, {region} and {key} are inserted at runtime"
},
"private": {"default": False, "help": "if true S3 files will not be readable online"},
"private": {"default": False,
"type": "bool",
"help": "if true S3 files will not be readable online"},
},
"description": """
S3Storage: A storage module for saving media files to an S3-compatible object storage.

View File

@@ -1,19 +1,21 @@
from typing import IO
import boto3, os
from auto_archiver.utils.misc import random_str
from auto_archiver.core import Media
from auto_archiver.core import Storage
from auto_archiver.modules.hash_enricher import HashEnricher
import boto3
import os
from loguru import logger
NO_DUPLICATES_FOLDER = "no-dups/"
class S3Storage(Storage):
from auto_archiver.core import Media
from auto_archiver.core import Storage
from auto_archiver.modules.hash_enricher import HashEnricher
from auto_archiver.utils.misc import random_str
def __init__(self, config: dict) -> None:
super().__init__(config)
NO_DUPLICATES_FOLDER = "no-dups/"
class S3Storage(Storage, HashEnricher):
def setup(self, config: dict) -> None:
super().setup(config)
self.s3 = boto3.client(
's3',
region_name=self.region,
@@ -21,7 +23,6 @@ class S3Storage(Storage):
aws_access_key_id=self.key,
aws_secret_access_key=self.secret
)
self.random_no_duplicate = bool(self.random_no_duplicate)
if self.random_no_duplicate:
logger.warning("random_no_duplicate is set to True, this will override `path_generator`, `filename_generator` and `folder`.")
@@ -48,8 +49,7 @@ class S3Storage(Storage):
def is_upload_needed(self, media: Media) -> bool:
if self.random_no_duplicate:
# checks if a folder with the hash already exists, if so it skips the upload
he = HashEnricher({"hash_enricher": {"algorithm": "SHA-256", "chunksize": 1.6e7}})
hd = he.calculate_hash(media.filename)
hd = self.calculate_hash(media.filename)
path = os.path.join(NO_DUPLICATES_FOLDER, hd[:24])
if existing_key:=self.file_in_folder(path):
@@ -61,8 +61,7 @@ class S3Storage(Storage):
_, ext = os.path.splitext(media.key)
media.key = os.path.join(path, f"{random_str(24)}{ext}")
return True
def file_in_folder(self, path:str) -> str:
# checks if path exists and is not an empty folder
if not path.endswith('/'):