Merge pull request #56 from djhmateer/oauth

This commit is contained in:
Miguel Sozinho Ramalho
2022-07-25 15:03:49 +01:00
committed by GitHub
5 changed files with 135 additions and 8 deletions

2
.gitignore vendored
View File

@@ -17,4 +17,6 @@ config-*.yaml
logs/*
local_archive/
vk_config*.json
gd-token.json
credentials.json
secrets/*

View File

@@ -118,6 +118,7 @@ class Config:
gd = secrets["google_drive"]
self.gd_config = GDConfig(
root_folder_id=gd.get("root_folder_id"),
oauth_token_filename=gd.get("oauth_token_filename"),
service_account=gd.get("service_account", GDConfig.service_account)
)

View File

@@ -0,0 +1,73 @@
import os.path
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# If creating for first time download the OAuth Client Ids json `credentials.json` from https://console.cloud.google.com/apis/credentials OAuth 2.0 Client IDs
# add "http://localhost:55192/" to the list of "Authorised redirect URIs"
# https://davemateer.com/2022/04/28/google-drive-with-python for more information
# You can run this code to get a new token and verify it belongs to the correct user
# This token will be refresh automatically by the auto-archiver
# Code below from https://developers.google.com/drive/api/quickstart/python
SCOPES = ['https://www.googleapis.com/auth/drive']
def main():
token_file = 'gd-token.json'
creds = None
# The file token.json stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists(token_file):
creds = Credentials.from_authorized_user_file(token_file, SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
print('Requesting new token')
creds.refresh(Request())
else:
print('First run through so putting up login dialog')
# credentials.json downloaded from https://console.cloud.google.com/apis/credentials
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
creds = flow.run_local_server(port=55192)
# Save the credentials for the next run
with open(token_file, 'w') as token:
print('Saving new token')
token.write(creds.to_json())
else:
print('Token valid')
try:
service = build('drive', 'v3', credentials=creds)
# About the user
results = service.about().get(fields="*").execute()
emailAddress = results['user']['emailAddress']
print(emailAddress)
# Call the Drive v3 API and return some files
results = service.files().list(
pageSize=10, fields="nextPageToken, files(id, name)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
return
print('Files:')
for item in items:
print(u'{0} ({1})'.format(item['name'], item['id']))
except HttpError as error:
print(f'An error occurred: {error}')
if __name__ == '__main__':
main()

View File

@@ -18,8 +18,19 @@ secrets:
# needed if you use storage=gd
google_drive:
# local filename can be the same or different file from google_sheets.service_account, defaults to service_account.json
service_account: "service_account.json"
# To authenticate with google you have two options (1. service account OR 2. OAuth token)
# 1. service account - storage space will count towards the developer account
# filename can be the same or different file from google_sheets.service_account, defaults to "service_account.json"
# service_account: "service_account.json"
# 2. OAuth token - storage space will count towards the owner of the GDrive folder
# (only 1. or 2. - if both specified then this 2. takes precedence)
# needs write access on the server so refresh flow works
# To get the token, run the file `create_update_test_oauth_token.py`
# you can edit that file if you want a different token filename, default is "gd-token.json"
oauth_token_filename: "gd-token.json"
root_folder_id: copy XXXX from https://drive.google.com/drive/folders/XXXX
# needed if you use storage=local

View File

@@ -8,19 +8,54 @@ from googleapiclient.http import MediaFileUpload
from google.oauth2 import service_account
from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
@dataclass
class GDConfig:
root_folder_id: str
folder: str = "default"
oauth_token_filename: str
service_account: str = "service_account.json"
folder: str = "default"
class GDStorage(Storage):
def __init__(self, config: GDConfig):
self.folder = config.folder
self.root_folder_id = config.root_folder_id
creds = service_account.Credentials.from_service_account_file(
config.service_account, scopes=['https://www.googleapis.com/auth/drive'])
SCOPES=['https://www.googleapis.com/auth/drive']
token_file = config.oauth_token_filename
if token_file is not None:
"""
Tokens are refreshed after 1 hour
however keep working for 7 days (tbc)
so as long as the job doesn't last for 7 days
then this method of refreshing only once per run will work
see this link for details on the token
https://davemateer.com/2022/04/28/google-drive-with-python#tokens
"""
logger.debug(f'Using GD OAuth token {token_file}')
creds = Credentials.from_authorized_user_file(token_file, SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
logger.debug('Requesting new GD OAuth token')
creds.refresh(Request())
else:
raise Exception("Problem with creds - create the token again")
# Save the credentials for the next run
with open(token_file, 'w') as token:
logger.debug('Saving new GD OAuth token')
token.write(creds.to_json())
else:
logger.debug('GD OAuth Token valid')
else:
gd_service_account = config.service_account
logger.debug(f'Using GD Service Account {gd_service_account}')
creds = service_account.Credentials.from_service_account_file(gd_service_account, scopes=SCOPES)
self.service = build('drive', 'v3', credentials=creds)
def get_cdn_url(self, key):
@@ -88,13 +123,18 @@ class GDStorage(Storage):
return key[1:]
return key
def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=True):
# gets the Drive folderID if it is there
def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False):
"""
Retrieves the id of a folder or file from its @name and the @parent_id folder
Optionally does multiple @retries and sleeps @sleep_seconds between them
If @use_mime_type will restrict search to "mimeType='application/vnd.google-apps.folder'"
If @raise_on_missing will throw error when not found, or returns None
Will remember previous calls to avoid duplication if @use_cache
DM - caching giving a perf improvement in order of 41s to 46s
So I prefer not to use yet, purely as caching notoriously hard in terms of edge cases
and pro's don't outweigh cons for me (yet)
to be fair I just need to test this and make sure it always runs well!
Returns the id of the file or folder from its name as a string
"""
# cache logic
@@ -107,7 +147,7 @@ class GDStorage(Storage):
# API logic
debug_header: str = f"[searching {name=} in {parent_id=}]"
query_string = f"'{parent_id}' in parents and name = '{name}' "
query_string = f"'{parent_id}' in parents and name = '{name}' and trashed = false "
if use_mime_type:
query_string += f" and mimeType='application/vnd.google-apps.folder' "