diff --git a/.gitignore b/.gitignore index 2059faa..8da75c3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,6 @@ config-*.yaml logs/* local_archive/ vk_config*.json +gd-token.json +credentials.json secrets/* \ No newline at end of file diff --git a/configs/config.py b/configs/config.py index 063c4d7..0d11467 100644 --- a/configs/config.py +++ b/configs/config.py @@ -118,6 +118,7 @@ class Config: gd = secrets["google_drive"] self.gd_config = GDConfig( root_folder_id=gd.get("root_folder_id"), + oauth_token_filename=gd.get("oauth_token_filename"), service_account=gd.get("service_account", GDConfig.service_account) ) diff --git a/create_update_test_oauth_token.py b/create_update_test_oauth_token.py new file mode 100644 index 0000000..65b3086 --- /dev/null +++ b/create_update_test_oauth_token.py @@ -0,0 +1,73 @@ +import os.path + +from google.auth.transport.requests import Request +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +# If creating for first time download the OAuth Client Ids json `credentials.json` from https://console.cloud.google.com/apis/credentials OAuth 2.0 Client IDs +# add "http://localhost:55192/" to the list of "Authorised redirect URIs" +# https://davemateer.com/2022/04/28/google-drive-with-python for more information + +# You can run this code to get a new token and verify it belongs to the correct user +# This token will be refresh automatically by the auto-archiver + +# Code below from https://developers.google.com/drive/api/quickstart/python + +SCOPES = ['https://www.googleapis.com/auth/drive'] + + +def main(): + token_file = 'gd-token.json' + creds = None + + # The file token.json stores the user's access and refresh tokens, and is + # created automatically when the authorization flow completes for the first + # time. + if os.path.exists(token_file): + creds = Credentials.from_authorized_user_file(token_file, SCOPES) + + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + print('Requesting new token') + creds.refresh(Request()) + else: + print('First run through so putting up login dialog') + # credentials.json downloaded from https://console.cloud.google.com/apis/credentials + flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) + creds = flow.run_local_server(port=55192) + # Save the credentials for the next run + with open(token_file, 'w') as token: + print('Saving new token') + token.write(creds.to_json()) + else: + print('Token valid') + + try: + service = build('drive', 'v3', credentials=creds) + + # About the user + results = service.about().get(fields="*").execute() + emailAddress = results['user']['emailAddress'] + print(emailAddress) + + # Call the Drive v3 API and return some files + results = service.files().list( + pageSize=10, fields="nextPageToken, files(id, name)").execute() + items = results.get('files', []) + + if not items: + print('No files found.') + return + print('Files:') + for item in items: + print(u'{0} ({1})'.format(item['name'], item['id'])) + + except HttpError as error: + print(f'An error occurred: {error}') + + +if __name__ == '__main__': + main() diff --git a/example.config.yaml b/example.config.yaml index 2cded09..acbe52c 100644 --- a/example.config.yaml +++ b/example.config.yaml @@ -18,8 +18,19 @@ secrets: # needed if you use storage=gd google_drive: - # local filename can be the same or different file from google_sheets.service_account, defaults to service_account.json - service_account: "service_account.json" + # To authenticate with google you have two options (1. service account OR 2. OAuth token) + + # 1. service account - storage space will count towards the developer account + # filename can be the same or different file from google_sheets.service_account, defaults to "service_account.json" + # service_account: "service_account.json" + + # 2. OAuth token - storage space will count towards the owner of the GDrive folder + # (only 1. or 2. - if both specified then this 2. takes precedence) + # needs write access on the server so refresh flow works + # To get the token, run the file `create_update_test_oauth_token.py` + # you can edit that file if you want a different token filename, default is "gd-token.json" + oauth_token_filename: "gd-token.json" + root_folder_id: copy XXXX from https://drive.google.com/drive/folders/XXXX # needed if you use storage=local diff --git a/storages/gd_storage.py b/storages/gd_storage.py index d9a11de..933c168 100644 --- a/storages/gd_storage.py +++ b/storages/gd_storage.py @@ -8,19 +8,54 @@ from googleapiclient.http import MediaFileUpload from google.oauth2 import service_account +from google.oauth2.credentials import Credentials +from google.auth.transport.requests import Request + @dataclass class GDConfig: root_folder_id: str - folder: str = "default" + oauth_token_filename: str service_account: str = "service_account.json" - + folder: str = "default" class GDStorage(Storage): def __init__(self, config: GDConfig): self.folder = config.folder self.root_folder_id = config.root_folder_id - creds = service_account.Credentials.from_service_account_file( - config.service_account, scopes=['https://www.googleapis.com/auth/drive']) + + SCOPES=['https://www.googleapis.com/auth/drive'] + + token_file = config.oauth_token_filename + if token_file is not None: + """ + Tokens are refreshed after 1 hour + however keep working for 7 days (tbc) + so as long as the job doesn't last for 7 days + then this method of refreshing only once per run will work + see this link for details on the token + https://davemateer.com/2022/04/28/google-drive-with-python#tokens + """ + logger.debug(f'Using GD OAuth token {token_file}') + creds = Credentials.from_authorized_user_file(token_file, SCOPES) + + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + logger.debug('Requesting new GD OAuth token') + creds.refresh(Request()) + else: + raise Exception("Problem with creds - create the token again") + + # Save the credentials for the next run + with open(token_file, 'w') as token: + logger.debug('Saving new GD OAuth token') + token.write(creds.to_json()) + else: + logger.debug('GD OAuth Token valid') + else: + gd_service_account = config.service_account + logger.debug(f'Using GD Service Account {gd_service_account}') + creds = service_account.Credentials.from_service_account_file(gd_service_account, scopes=SCOPES) + self.service = build('drive', 'v3', credentials=creds) def get_cdn_url(self, key): @@ -88,13 +123,18 @@ class GDStorage(Storage): return key[1:] return key - def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=True): + # gets the Drive folderID if it is there + def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False): """ Retrieves the id of a folder or file from its @name and the @parent_id folder Optionally does multiple @retries and sleeps @sleep_seconds between them If @use_mime_type will restrict search to "mimeType='application/vnd.google-apps.folder'" If @raise_on_missing will throw error when not found, or returns None Will remember previous calls to avoid duplication if @use_cache + DM - caching giving a perf improvement in order of 41s to 46s + So I prefer not to use yet, purely as caching notoriously hard in terms of edge cases + and pro's don't outweigh cons for me (yet) + to be fair I just need to test this and make sure it always runs well! Returns the id of the file or folder from its name as a string """ # cache logic @@ -107,7 +147,7 @@ class GDStorage(Storage): # API logic debug_header: str = f"[searching {name=} in {parent_id=}]" - query_string = f"'{parent_id}' in parents and name = '{name}' " + query_string = f"'{parent_id}' in parents and name = '{name}' and trashed = false " if use_mime_type: query_string += f" and mimeType='application/vnd.google-apps.folder' "