From 9f9b9d8f634193bc7c202146da09cb8c2e6ac865 Mon Sep 17 00:00:00 2001 From: Dave Mateer Date: Mon, 18 Jul 2022 13:25:05 +0100 Subject: [PATCH 1/4] adding in GD token --- storages/gd_storage.py | 52 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/storages/gd_storage.py b/storages/gd_storage.py index d9a11de..e60e37f 100644 --- a/storages/gd_storage.py +++ b/storages/gd_storage.py @@ -8,19 +8,54 @@ from googleapiclient.http import MediaFileUpload from google.oauth2 import service_account +from google.oauth2.credentials import Credentials +from google.auth.transport.requests import Request + @dataclass class GDConfig: root_folder_id: str + oauth_token_file_path_and_name: str + service_account: str folder: str = "default" - service_account: str = "service_account.json" - class GDStorage(Storage): def __init__(self, config: GDConfig): self.folder = config.folder self.root_folder_id = config.root_folder_id - creds = service_account.Credentials.from_service_account_file( - config.service_account, scopes=['https://www.googleapis.com/auth/drive']) + + SCOPES=['https://www.googleapis.com/auth/drive'] + + token_file = config.oauth_token_file_path_and_name + if token_file is not None: + """ + Tokens are refreshed after 1 hour + however keep working for 7 days (tbc) + so as long as the job doesn't last for 7 days + then this method of refreshing only once per run will work + see this link for details on the token + https://davemateer.com/2022/04/28/google-drive-with-python#tokens + """ + logger.debug(f'Using GD OAuth token {token_file}') + creds = Credentials.from_authorized_user_file(token_file, SCOPES) + + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + logger.debug('Requesting new GD OAuth token') + creds.refresh(Request()) + else: + raise Exception("Problem with creds - create the token again") + + # Save the credentials for the next run + with open(token_file, 'w') as token: + logger.debug('Saving new GD OAuth token') + token.write(creds.to_json()) + else: + logger.debug('GD OAuth Token valid') + else: + gd_service_account = config.service_account + logger.debug(f'Using GD Service Account {gd_service_account}') + creds = service_account.Credentials.from_service_account_file(gd_service_account, scopes=SCOPES) + self.service = build('drive', 'v3', credentials=creds) def get_cdn_url(self, key): @@ -88,13 +123,18 @@ class GDStorage(Storage): return key[1:] return key - def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=True): + # gets the Drive folderID if it is there + def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False): """ Retrieves the id of a folder or file from its @name and the @parent_id folder Optionally does multiple @retries and sleeps @sleep_seconds between them If @use_mime_type will restrict search to "mimeType='application/vnd.google-apps.folder'" If @raise_on_missing will throw error when not found, or returns None Will remember previous calls to avoid duplication if @use_cache + DM - caching giving a perf improvement in order of 41s to 46s + So I prefer not to use yet, purely as caching notoriously hard in terms of edge cases + and pro's don't outweigh cons for me (yet) + to be fair I just need to test this and make sure it always runs well! Returns the id of the file or folder from its name as a string """ # cache logic @@ -107,7 +147,7 @@ class GDStorage(Storage): # API logic debug_header: str = f"[searching {name=} in {parent_id=}]" - query_string = f"'{parent_id}' in parents and name = '{name}' " + query_string = f"'{parent_id}' in parents and name = '{name}' and trashed = false " if use_mime_type: query_string += f" and mimeType='application/vnd.google-apps.folder' " From 524b40b8692c00d26a35ca256fbf91dab6369c40 Mon Sep 17 00:00:00 2001 From: Dave Mateer Date: Mon, 18 Jul 2022 13:39:00 +0100 Subject: [PATCH 2/4] Added Google OAuth flow for Google Drive so can use a real user and not a service account to save files --- configs/config.py | 3 +- create_update_test_oauth_token.py | 77 +++++++++++++++++++++++++++++++ example.config.yaml | 12 ++++- 3 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 create_update_test_oauth_token.py diff --git a/configs/config.py b/configs/config.py index 2d134da..2298c51 100644 --- a/configs/config.py +++ b/configs/config.py @@ -117,7 +117,8 @@ class Config: gd = secrets["google_drive"] self.gd_config = GDConfig( root_folder_id=gd.get("root_folder_id"), - service_account=gd.get("service_account", GDConfig.service_account) + oauth_token_file_path_and_name=gd.get("oauth_token_file_path_and_name"), + service_account=gd.get("service_account") ) if "local" in secrets: diff --git a/create_update_test_oauth_token.py b/create_update_test_oauth_token.py new file mode 100644 index 0000000..cfe2709 --- /dev/null +++ b/create_update_test_oauth_token.py @@ -0,0 +1,77 @@ +from __future__ import print_function + +import os.path + +from google.auth.transport.requests import Request +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +from googleapiclient.http import MediaFileUpload + +# If creating for first time download the json `credentials.json` from https://console.cloud.google.com/apis/credentials OAuth 2.0 Client IDs +# https://davemateer.com/2022/04/28/google-drive-with-python for more information + +# Can run this code to get a new token and verify the token is the correct user +# and it will refresh the token accordingly + +# Code below from https://developers.google.com/drive/api/quickstart/python + +SCOPES = ['https://www.googleapis.com/auth/drive'] + +def main(): + # token_file = 'gd-token.json' + + token_file = 'secrets/token-davemateer-gmail.json' + + creds = None + + # The file token.json stores the user's access and refresh tokens, and is + # created automatically when the authorization flow completes for the first + # time. + if os.path.exists(token_file): + creds = Credentials.from_authorized_user_file(token_file, SCOPES) + + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + print('Requesting new token') + creds.refresh(Request()) + else: + print('First run through so putting up login dialog') + # credentials.json downloaded from https://console.cloud.google.com/apis/credentials + flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) + creds = flow.run_local_server(port=0) + # Save the credentials for the next run + with open(token_file, 'w') as token: + print('Saving new token') + token.write(creds.to_json()) + else: + print('Token valid') + + try: + service = build('drive', 'v3', credentials=creds) + + # About the user + results = service.about().get(fields="*").execute() + emailAddress = results['user']['emailAddress'] + print(emailAddress) + + # Call the Drive v3 API and return some files + results = service.files().list( + pageSize=10, fields="nextPageToken, files(id, name)").execute() + items = results.get('files', []) + + if not items: + print('No files found.') + return + print('Files:') + for item in items: + print(u'{0} ({1})'.format(item['name'], item['id'])) + + except HttpError as error: + print(f'An error occurred: {error}') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/example.config.yaml b/example.config.yaml index f823c47..60753fa 100644 --- a/example.config.yaml +++ b/example.config.yaml @@ -18,8 +18,16 @@ secrets: # needed if you use storage=gd google_drive: - # local filename can be the same or different file from google_sheets.service_account, defaults to service_account.json - service_account: "service_account.json" + # 1.service account to write to google storage - be aware of 15GB limit. Recommend using OAuth user. + # filename can be the same or different file from google_sheets.service_account + # service_account: "service_account.json" + + # 2.token (only 1. or 2. - if both specified then this 2. token takes precedence) + # will need to have write access on the server so refresh flow works + # run the file `create_update_test_oauth_token.py` to create the token and save in a secrets directory so + # it is not checked into source control + oauth_token_file_path_and_name: "secrets/token-davemateer-gmail.json" + root_folder_id: copy XXXX from https://drive.google.com/drive/folders/XXXX # needed if you use storage=local From 6124bc5f72b9d2c6c3af62169bae5140be8f3f15 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 25 Jul 2022 14:52:50 +0100 Subject: [PATCH 3/4] refactored and simplified obtaining credentials --- .gitignore | 3 ++- configs/config.py | 4 ++-- create_update_test_oauth_token.py | 22 +++++++++------------- example.config.yaml | 17 ++++++++++------- storages/gd_storage.py | 6 +++--- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 62a5815..8da75c3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,5 +17,6 @@ config-*.yaml logs/* local_archive/ vk_config*.json - +gd-token.json +credentials.json secrets/* \ No newline at end of file diff --git a/configs/config.py b/configs/config.py index 2298c51..1169048 100644 --- a/configs/config.py +++ b/configs/config.py @@ -117,8 +117,8 @@ class Config: gd = secrets["google_drive"] self.gd_config = GDConfig( root_folder_id=gd.get("root_folder_id"), - oauth_token_file_path_and_name=gd.get("oauth_token_file_path_and_name"), - service_account=gd.get("service_account") + oauth_token_filename=gd.get("oauth_token_filename"), + service_account=gd.get("service_account", GDConfig.service_account) ) if "local" in secrets: diff --git a/create_update_test_oauth_token.py b/create_update_test_oauth_token.py index cfe2709..65b3086 100644 --- a/create_update_test_oauth_token.py +++ b/create_update_test_oauth_token.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import os.path from google.auth.transport.requests import Request @@ -8,23 +6,20 @@ from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build from googleapiclient.errors import HttpError -from googleapiclient.http import MediaFileUpload - -# If creating for first time download the json `credentials.json` from https://console.cloud.google.com/apis/credentials OAuth 2.0 Client IDs +# If creating for first time download the OAuth Client Ids json `credentials.json` from https://console.cloud.google.com/apis/credentials OAuth 2.0 Client IDs +# add "http://localhost:55192/" to the list of "Authorised redirect URIs" # https://davemateer.com/2022/04/28/google-drive-with-python for more information -# Can run this code to get a new token and verify the token is the correct user -# and it will refresh the token accordingly +# You can run this code to get a new token and verify it belongs to the correct user +# This token will be refresh automatically by the auto-archiver # Code below from https://developers.google.com/drive/api/quickstart/python SCOPES = ['https://www.googleapis.com/auth/drive'] + def main(): - # token_file = 'gd-token.json' - - token_file = 'secrets/token-davemateer-gmail.json' - + token_file = 'gd-token.json' creds = None # The file token.json stores the user's access and refresh tokens, and is @@ -42,7 +37,7 @@ def main(): print('First run through so putting up login dialog') # credentials.json downloaded from https://console.cloud.google.com/apis/credentials flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) - creds = flow.run_local_server(port=0) + creds = flow.run_local_server(port=55192) # Save the credentials for the next run with open(token_file, 'w') as token: print('Saving new token') @@ -73,5 +68,6 @@ def main(): except HttpError as error: print(f'An error occurred: {error}') + if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/example.config.yaml b/example.config.yaml index 60753fa..dc78803 100644 --- a/example.config.yaml +++ b/example.config.yaml @@ -18,15 +18,18 @@ secrets: # needed if you use storage=gd google_drive: - # 1.service account to write to google storage - be aware of 15GB limit. Recommend using OAuth user. - # filename can be the same or different file from google_sheets.service_account + # To authenticate with google you have two options (1. service account OR 2. OAuth token) + + # 1. service account - storage space will count towards the developer account + # filename can be the same or different file from google_sheets.service_account, defaults to "service_account.json" # service_account: "service_account.json" - # 2.token (only 1. or 2. - if both specified then this 2. token takes precedence) - # will need to have write access on the server so refresh flow works - # run the file `create_update_test_oauth_token.py` to create the token and save in a secrets directory so - # it is not checked into source control - oauth_token_file_path_and_name: "secrets/token-davemateer-gmail.json" + # 2. OAuth token - storage space will count towards the owner of the GDrive folder + # (only 1. or 2. - if both specified then this 2. takes precedence) + # needs write access on the server so refresh flow works + # To get the token, run the file `create_update_test_oauth_token.py` + # you can edit that file if you want a different token filename, default is "gd-token.json" + oauth_token_filename: "gd-token.json" root_folder_id: copy XXXX from https://drive.google.com/drive/folders/XXXX diff --git a/storages/gd_storage.py b/storages/gd_storage.py index e60e37f..be12625 100644 --- a/storages/gd_storage.py +++ b/storages/gd_storage.py @@ -14,8 +14,8 @@ from google.auth.transport.requests import Request @dataclass class GDConfig: root_folder_id: str - oauth_token_file_path_and_name: str - service_account: str + oauth_token_filename: str + service_account: str = "service_account.json" folder: str = "default" class GDStorage(Storage): @@ -25,7 +25,7 @@ class GDStorage(Storage): SCOPES=['https://www.googleapis.com/auth/drive'] - token_file = config.oauth_token_file_path_and_name + token_file = config.oauth_token_filename if token_file is not None: """ Tokens are refreshed after 1 hour From 992dee022a366718ce22e58d9d66783daf12247e Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 25 Jul 2022 14:59:04 +0100 Subject: [PATCH 4/4] format --- storages/gd_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storages/gd_storage.py b/storages/gd_storage.py index be12625..933c168 100644 --- a/storages/gd_storage.py +++ b/storages/gd_storage.py @@ -28,7 +28,7 @@ class GDStorage(Storage): token_file = config.oauth_token_filename if token_file is not None: """ - Tokens are refreshed after 1 hour + Tokens are refreshed after 1 hour however keep working for 7 days (tbc) so as long as the job doesn't last for 7 days then this method of refreshing only once per run will work