refactoring filenumber into subfolder

This commit is contained in:
msramalho
2022-05-26 19:18:29 +02:00
parent 03aa02e88b
commit 159adf9afe
13 changed files with 144 additions and 179 deletions

View File

@@ -1,5 +1,6 @@
from loguru import logger
from abc import ABC, abstractmethod
from pathlib import Path
class Storage(ABC):
@@ -19,3 +20,25 @@ class Storage(ABC):
logger.debug(f'[{self.__class__.__name__}] uploading file {filename} with key {key}')
with open(filename, 'rb') as f:
self.uploadf(f, key, **kwargs)
def update_properties(self, **kwargs):
"""
method used to update general properties that some children may use
and others not, but that all can call
"""
for k, v in kwargs.items():
if k in self.get_allowed_properties():
setattr(self, k, v)
else:
logger.warning(f'[{self.__class__.__name__}] does not accept dynamic property "{k}"')
def get_allowed_properties(self):
"""
child classes should specify which properties they allow to be set
"""
return set(["subfolder"])
def clean_path(self, folder, default="", add_forward_slash=True):
if folder is None or type(folder) != str or len(folder.strip()) == 0:
return default
return str(Path(folder)) + ("/" if add_forward_slash else "")

View File

@@ -15,6 +15,7 @@ class GDConfig:
class GDStorage(Storage):
DEFAULT_UPLOAD_FOLDER_NAME = "default"
def __init__(self, config: GDConfig):
self.root_folder_id = config.root_folder_id
@@ -22,19 +23,14 @@ class GDStorage(Storage):
creds = service_account.Credentials.from_service_account_file('service_account.json', scopes=SCOPES)
self.service = build('drive', 'v3', credentials=creds)
def _get_path(self, key):
return self.folder + key
def get_cdn_url(self, key):
# only support files saved in a folders for GD
# S3 supports folder and all stored in the root
# key will be SM0002/twitter__media_ExeUSW2UcAE6RbN.jpg
foldername = key.split('/', 1)[0]
# eg twitter__media_asdf.jpg
filename = key.split('/', 1)[1]
logger.debug(f'Looking for {foldername} and filename: {filename} on GD')
"""
only support files saved in a folder for GD
S3 supports folder and all stored in the root
"""
self.subfolder = self.clean_path(self.subfolder, GDStorage.DEFAULT_UPLOAD_FOLDER_NAME, False)
filename = key
logger.debug(f'Looking for {self.subfolder} and filename: {filename} on GD')
# retry policy on Google Drive
try_again = True
@@ -42,11 +38,11 @@ class GDStorage(Storage):
folder_id = None
while try_again:
# need to lookup the id of folder eg SM0002 which should be there already as this is get_cdn_url
results = self.service.files().list(q=f"'{self.root_folder_id}' in parents \
and name = '{foldername}' ",
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
results = self.service.files().list(
q=f"'{self.root_folder_id}' in parents and name = '{self.subfolder}' ",
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
items = results.get('files', [])
for item in items:
@@ -55,11 +51,11 @@ class GDStorage(Storage):
try_again = False
if folder_id is None:
logger.debug(f'Cant find {foldername=} waiting and trying again {counter=}')
logger.debug(f'Cannot find {self.subfolder=} waiting and trying again {counter=}')
counter += 1
time.sleep(10)
if counter > 18:
raise ValueError(f'Cant find {foldername} and retried 18 times pausing 10seconds at a time which is 3 minutes')
raise ValueError(f'Cannot find {self.subfolder} and retried 18 times pausing 10s at a time which is 3 minutes')
# check for sub folder in file eg youtube_dl_sDE-qZdi8p8/index.html'
# happens doing thumbnails
@@ -71,12 +67,11 @@ class GDStorage(Storage):
logger.debug(f'get_cdn_url: Found a subfolder so need to split on a: {a} and {b}')
# get id of the sub folder
results = self.service.files().list(q=f"'{folder_id}' in parents \
and mimeType='application/vnd.google-apps.folder' \
and name = '{a}' ",
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
results = self.service.files().list(
q=f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and name = '{a}' ",
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
items = results.get('files', [])
filename = None
@@ -87,11 +82,11 @@ class GDStorage(Storage):
raise ValueError(f'Problem finding sub folder {a}')
# get id of file inside folder (or sub folder)
results = self.service.files().list(q=f"'{folder_id}' in parents \
and name = '{filename}' ",
spaces='drive',
fields='files(id, name)'
).execute()
results = self.service.files().list(
q=f"'{folder_id}' in parents and name = '{filename}' ",
spaces='drive',
fields='files(id, name)'
).execute()
items = results.get('files', [])
file_id = None
@@ -110,41 +105,36 @@ class GDStorage(Storage):
return False
def uploadf(self, file, key, **_kwargs):
# split on first occurance of /
# eg SM0005
foldername = key.split('/', 1)[0]
# eg twitter__media_asdf.jpg
filename = key.split('/', 1)[1]
logger.debug(f"before {self.subfolder=}")
self.subfolder = self.clean_path(self.subfolder, GDStorage.DEFAULT_UPLOAD_FOLDER_NAME, False)
filename = key
logger.debug(f"after {self.subfolder=}")
# does folder eg SM0005 exist already inside parent of Files auto-archiver
results = self.service.files().list(q=f"'{self.root_folder_id}' in parents \
and mimeType='application/vnd.google-apps.folder' \
and name = '{foldername}' ",
spaces='drive',
fields='files(id, name)'
).execute()
results = self.service.files().list(
q=f"'{self.root_folder_id}' in parents and mimeType='application/vnd.google-apps.folder' and name = '{self.subfolder}' ",
spaces='drive',
fields='files(id, name)'
).execute()
items = results.get('files', [])
folder_id_to_upload_to = None
if len(items) > 1:
logger.error(f'Duplicate folder name of {foldername} which should never happen, but continuing anyway')
logger.error(f'Duplicate folder name of {self.subfolder} which should never happen, but continuing anyway')
for item in items:
logger.debug(f"Found existing folder of {item['name']}")
folder_id_to_upload_to = item['id']
if folder_id_to_upload_to is None:
logger.debug(f'Creating new folder {foldername}')
logger.debug(f'Creating new folder {self.subfolder}')
file_metadata = {
'name': [foldername],
'name': [self.subfolder],
'mimeType': 'application/vnd.google-apps.folder',
'parents': [self.root_folder_id]
}
gd_file = self.service.files().create(body=file_metadata, fields='id').execute()
folder_id_to_upload_to = gd_file.get('id')
# check for subfolder nema in file eg youtube_dl_sDE-qZdi8p8/out1.jpg'
# happens doing thumbnails
# check for subfolder name in file eg youtube_dl_sDE-qZdi8p8/out1.jpg', eg: thumbnails
# will always return a and a blank b even if there is nothing to split
# https://stackoverflow.com/a/38149500/26086
a, _, b = filename.partition('/')
@@ -155,12 +145,11 @@ class GDStorage(Storage):
logger.debug(f'uploadf: Found a subfolder so need to split on a: {a} and {b}')
# does the 'a' folder exist already in folder_id_to_upload_to eg SM0005
results = self.service.files().list(q=f"'{folder_id_to_upload_to}' in parents \
and mimeType='application/vnd.google-apps.folder' \
and name = '{a}' ",
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
results = self.service.files().list(
q=f"'{folder_id_to_upload_to}' in parents and mimeType='application/vnd.google-apps.folder' and name = '{a}' ",
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
items = results.get('files', [])
sub_folder_id_to_upload_to = None
if len(items) > 1:
@@ -184,17 +173,13 @@ class GDStorage(Storage):
folder_id_to_upload_to = sub_folder_id_to_upload_to
# back to normal control flow
# else:
# upload file to gd
file_metadata = {
# 'name': 'twitter__media_FMQg7yeXwAAwNEi.jpg',
'name': [filename],
'parents': [folder_id_to_upload_to]
}
media = MediaFileUpload(file, resumable=True)
gd_file = self.service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
gd_file = self.service.files().create(body=file_metadata, media_body=media, fields='id').execute()
def upload(self, filename: str, key: str, **kwargs):
# GD only requires the filename not a file reader

View File

@@ -2,6 +2,7 @@ import boto3
from botocore.errorfactory import ClientError
from .base_storage import Storage
from dataclasses import dataclass
from loguru import logger
@dataclass
@@ -19,12 +20,9 @@ class S3Storage(Storage):
def __init__(self, config: S3Config):
self.bucket = config.bucket
self.region = config.region
self.folder = config.folder
self.folder = self.clean_path(config.folder)
self.private = config.private
if len(self.folder) and self.folder[-1] != '/':
self.folder += '/'
self.s3 = boto3.client(
's3',
region_name=self.region,
@@ -34,7 +32,7 @@ class S3Storage(Storage):
)
def _get_path(self, key):
return self.folder + key
return self.folder + self.clean_path(self.subfolder) + key
def get_cdn_url(self, key):
return f'https://{self.bucket}.{self.region}.cdn.digitaloceanspaces.com/{self._get_path(key)}'
@@ -47,9 +45,9 @@ class S3Storage(Storage):
return False
def uploadf(self, file, key, **kwargs):
logger.debug(f'[S3 storage] uploading {file=}, {key=}')
if self.private:
extra_args = kwargs.get("extra_args", {})
else:
extra_args = kwargs.get("extra_args", {'ACL': 'public-read'})
self.s3.upload_fileobj(file, Bucket=self.bucket, Key=self._get_path(key), ExtraArgs=extra_args)