Merge branch 'main' into opentimestamps

This commit is contained in:
Patrick Robertson
2025-03-14 12:36:03 +00:00
167 changed files with 3227 additions and 2264 deletions

View File

@@ -1 +1 @@
from .gdrive_storage import GDriveStorage
from .gdrive_storage import GDriveStorage

View File

@@ -22,11 +22,18 @@
"help": "how to name stored files: 'random' creates a random string; 'static' uses a hash, with the settings of the 'hash_enricher' module (defaults to SHA256 if not enabled).",
"choices": ["random", "static"],
},
"root_folder_id": {"required": True,
"help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'"},
"oauth_token": {"default": None,
"help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account."},
"service_account": {"default": "secrets/service_account.json", "help": "service account JSON file path, same as used for Google Sheets. NOTE: storage used will count towards the developer account."},
"root_folder_id": {
"required": True,
"help": "root google drive folder ID to use as storage, found in URL: 'https://drive.google.com/drive/folders/FOLDER_ID'",
},
"oauth_token": {
"default": None,
"help": "JSON filename with Google Drive OAuth token: check auto-archiver repository scripts folder for create_update_gdrive_oauth_token.py. NOTE: storage used will count towards owner of GDrive folder, therefore it is best to use oauth_token_filename over service_account.",
},
"service_account": {
"default": "secrets/service_account.json",
"help": "service account JSON file path, same as used for Google Sheets. NOTE: storage used will count towards the developer account.",
},
},
"description": """
@@ -94,5 +101,5 @@ This module integrates Google Drive as a storage backend, enabling automatic fol
https://davemateer.com/2022/04/28/google-drive-with-python#tokens
"""
""",
}

View File

@@ -1,4 +1,3 @@
import json
import os
import time
@@ -15,12 +14,9 @@ from auto_archiver.core import Media
from auto_archiver.core import Storage
class GDriveStorage(Storage):
def setup(self) -> None:
self.scopes = ['https://www.googleapis.com/auth/drive']
self.scopes = ["https://www.googleapis.com/auth/drive"]
# Initialize Google Drive service
self._setup_google_drive_service()
@@ -37,25 +33,25 @@ class GDriveStorage(Storage):
def _initialize_with_oauth_token(self):
"""Initialize Google Drive service with OAuth token."""
with open(self.oauth_token, 'r') as stream:
with open(self.oauth_token, "r") as stream:
creds_json = json.load(stream)
creds_json['refresh_token'] = creds_json.get("refresh_token", "")
creds_json["refresh_token"] = creds_json.get("refresh_token", "")
creds = Credentials.from_authorized_user_info(creds_json, self.scopes)
if not creds.valid and creds.expired and creds.refresh_token:
creds.refresh(Request())
with open(self.oauth_token, 'w') as token_file:
with open(self.oauth_token, "w") as token_file:
logger.debug("Saving refreshed OAuth token.")
token_file.write(creds.to_json())
elif not creds.valid:
raise ValueError("Invalid OAuth token. Please regenerate the token.")
return build('drive', 'v3', credentials=creds)
return build("drive", "v3", credentials=creds)
def _initialize_with_service_account(self):
"""Initialize Google Drive service with service account."""
creds = service_account.Credentials.from_service_account_file(self.service_account, scopes=self.scopes)
return build('drive', 'v3', credentials=creds)
return build("drive", "v3", credentials=creds)
def get_cdn_url(self, media: Media) -> str:
"""
@@ -79,7 +75,7 @@ class GDriveStorage(Storage):
return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing"
def upload(self, media: Media, **kwargs) -> bool:
logger.debug(f'[{self.__class__.__name__}] storing file {media.filename} with key {media.key}')
logger.debug(f"[{self.__class__.__name__}] storing file {media.filename} with key {media.key}")
"""
1. for each sub-folder in the path check if exists or create
2. upload file to root_id/other_paths.../filename
@@ -95,25 +91,30 @@ class GDriveStorage(Storage):
parent_id = upload_to
# upload file to gd
logger.debug(f'uploading {filename=} to folder id {upload_to}')
file_metadata = {
'name': [filename],
'parents': [upload_to]
}
logger.debug(f"uploading {filename=} to folder id {upload_to}")
file_metadata = {"name": [filename], "parents": [upload_to]}
media = MediaFileUpload(media.filename, resumable=True)
gd_file = self.service.files().create(supportsAllDrives=True, body=file_metadata, media_body=media, fields='id').execute()
logger.debug(f'uploadf: uploaded file {gd_file["id"]} successfully in folder={upload_to}')
gd_file = (
self.service.files()
.create(supportsAllDrives=True, body=file_metadata, media_body=media, fields="id")
.execute()
)
logger.debug(f"uploadf: uploaded file {gd_file['id']} successfully in folder={upload_to}")
# must be implemented even if unused
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass
def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool:
pass
def _get_id_from_parent_and_name(self, parent_id: str,
name: str,
retries: int = 1,
sleep_seconds: int = 10,
use_mime_type: bool = False,
raise_on_missing: bool = True,
use_cache=False):
def _get_id_from_parent_and_name(
self,
parent_id: str,
name: str,
retries: int = 1,
sleep_seconds: int = 10,
use_mime_type: bool = False,
raise_on_missing: bool = True,
use_cache=False,
):
"""
Retrieves the id of a folder or file from its @name and the @parent_id folder
Optionally does multiple @retries and sleeps @sleep_seconds between them
@@ -134,32 +135,39 @@ class GDriveStorage(Storage):
debug_header: str = f"[searching {name=} in {parent_id=}]"
query_string = f"'{parent_id}' in parents and name = '{name}' and trashed = false "
if use_mime_type:
query_string += f" and mimeType='application/vnd.google-apps.folder' "
query_string += " and mimeType='application/vnd.google-apps.folder' "
for attempt in range(retries):
results = self.service.files().list(
# both below for Google Shared Drives
supportsAllDrives=True,
includeItemsFromAllDrives=True,
q=query_string,
spaces='drive', # ie not appDataFolder or photos
fields='files(id, name)'
).execute()
items = results.get('files', [])
results = (
self.service.files()
.list(
# both below for Google Shared Drives
supportsAllDrives=True,
includeItemsFromAllDrives=True,
q=query_string,
spaces="drive", # ie not appDataFolder or photos
fields="files(id, name)",
)
.execute()
)
items = results.get("files", [])
if len(items) > 0:
logger.debug(f"{debug_header} found {len(items)} matches, returning last of {','.join([i['id'] for i in items])}")
_id = items[-1]['id']
if use_cache: self.api_cache[cache_key] = _id
logger.debug(
f"{debug_header} found {len(items)} matches, returning last of {','.join([i['id'] for i in items])}"
)
_id = items[-1]["id"]
if use_cache:
self.api_cache[cache_key] = _id
return _id
else:
logger.debug(f'{debug_header} not found, attempt {attempt+1}/{retries}.')
logger.debug(f"{debug_header} not found, attempt {attempt + 1}/{retries}.")
if attempt < retries - 1:
logger.debug(f'sleeping for {sleep_seconds} second(s)')
logger.debug(f"sleeping for {sleep_seconds} second(s)")
time.sleep(sleep_seconds)
if raise_on_missing:
raise ValueError(f'{debug_header} not found after {retries} attempt(s)')
raise ValueError(f"{debug_header} not found after {retries} attempt(s)")
return None
def _mkdir(self, name: str, parent_id: str):
@@ -167,12 +175,7 @@ class GDriveStorage(Storage):
Creates a new GDrive folder @name inside folder @parent_id
Returns id of the created folder
"""
logger.debug(f'Creating new folder with {name=} inside {parent_id=}')
file_metadata = {
'name': [name],
'mimeType': 'application/vnd.google-apps.folder',
'parents': [parent_id]
}
gd_folder = self.service.files().create(supportsAllDrives=True, body=file_metadata, fields='id').execute()
return gd_folder.get('id')
logger.debug(f"Creating new folder with {name=} inside {parent_id=}")
file_metadata = {"name": [name], "mimeType": "application/vnd.google-apps.folder", "parents": [parent_id]}
gd_folder = self.service.files().create(supportsAllDrives=True, body=file_metadata, fields="id").execute()
return gd_folder.get("id")