From 853e018ace1cd8f2fde1a1196d57687652b75ef1 Mon Sep 17 00:00:00 2001 From: Logan Williams Date: Mon, 8 Feb 2021 11:21:42 +0000 Subject: [PATCH] Synchronize with remote version --- streaming.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++++ update.py | 106 ++++++++++++++++++++++++++------------------------- 2 files changed, 156 insertions(+), 51 deletions(-) create mode 100644 streaming.py diff --git a/streaming.py b/streaming.py new file mode 100644 index 0000000..eab8356 --- /dev/null +++ b/streaming.py @@ -0,0 +1,101 @@ +import gspread +import youtube_dl +from pathlib import Path +import sys +import datetime +import boto3 +import os +from dotenv import load_dotenv +from botocore.errorfactory import ClientError + +load_dotenv() + +gc = gspread.service_account() +sh = gc.open("Media Sheet (January 16-20 + People)") + +found_live = False + +# loop through worksheets to check +for ii in range(5): + # only capture one video if its a livestream + if found_live: + break + + wks = sh.get_worksheet(ii) + values = wks.get_all_values() + + ydl_opts = {'outtmpl': 'tmp/%(id)s.%(ext)s', 'quiet': False} + ydl = youtube_dl.YoutubeDL(ydl_opts) + + s3_client = boto3.client('s3', + region_name=os.getenv('DO_SPACES_REGION'), + endpoint_url='https://{}.digitaloceanspaces.com'.format(os.getenv('DO_SPACES_REGION')), + aws_access_key_id=os.getenv('DO_SPACES_KEY'), + aws_secret_access_key=os.getenv('DO_SPACES_SECRET')) + + # loop through rows in worksheet + for i in range(2, len(values)+1): + # only capture one video if its a livestream + if found_live: + break + + v = values[i-1] + + if v[1] != "" and v[10] == "": + print(v[1]) + + try: + info = ydl.extract_info(v[1], download=False) + + # skip if live + if 'is_live' in info and info['is_live']: + found_live = True + + wks.update('K' + str(i), 'Recording stream') + + # sometimes this results in a different filename, so do this again + info = ydl.extract_info(v[1], download=True) + + if 'entries' in info: + filename = ydl.prepare_filename(info['entries'][0]) + else: + filename = ydl.prepare_filename(info) + + if not os.path.exists(filename): + filename = filename.split('.')[0] + '.mkv' + + print(filename) + key = filename.split('/')[1] + cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key) + + with open(filename, 'rb') as f: + s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'}) + + os.remove(filename) + + update = [{ + 'range': 'K' + str(i), + 'values': [['successful']] + }, { + 'range': 'L' + str(i), + 'values': [[datetime.datetime.now().isoformat()]] + }, { + 'range': 'M' + str(i), + 'values': [[cdn_url]] + }] + + wks.batch_update(update) + + break + except: + t, value, traceback = sys.exc_info() + + update = [{ + 'range': 'K' + str(i), + 'values': [[str(value)]] + }, { + 'range': 'L' + str(i), + 'values': [[datetime.datetime.now().isoformat()]] + }] + + wks.batch_update(update) diff --git a/update.py b/update.py index 826030c..51c7a0b 100644 --- a/update.py +++ b/update.py @@ -13,6 +13,7 @@ load_dotenv() gc = gspread.service_account() sh = gc.open("Media Sheet (January 16-20 + People)") +# loop through worksheets to check for ii in range(5): wks = sh.get_worksheet(ii) values = wks.get_all_values() @@ -26,6 +27,7 @@ for ii in range(5): aws_access_key_id=os.getenv('DO_SPACES_KEY'), aws_secret_access_key=os.getenv('DO_SPACES_SECRET')) + # loop through rows in worksheet for i in range(2, len(values)+1): v = values[i-1] @@ -35,68 +37,70 @@ for ii in range(5): try: info = ydl.extract_info(v[1], download=False) - if 'entries' in info: - if len(info['entries']) > 1: - raise Exception('ERROR: Cannot archive channels or pages with multiple videos') - - filename = ydl.prepare_filename(info['entries'][0]) - else: - filename = ydl.prepare_filename(info) - - print(filename) - key = filename.split('/')[1] - cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key) - - try: - s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key) - - # file exists - - update = [{ - 'range': 'K' + str(i), - 'values': [['already archived']] - }, { - 'range': 'M' + str(i), - 'values': [[cdn_url]] - }] - - wks.batch_update(update) - - except ClientError: - # Not found - - # sometimes this results in a different filename, so do this again - info = ydl.extract_info(v[1], download=True) + # skip if live + if 'is_live' not in info or not info['is_live']: if 'entries' in info: + if len(info['entries']) > 1: + raise Exception('ERROR: Cannot archive channels or pages with multiple videos') + filename = ydl.prepare_filename(info['entries'][0]) else: filename = ydl.prepare_filename(info) - - - if not os.path.exists(filename): - filename = filename.split('.')[0] + '.mkv' - + print(filename) key = filename.split('/')[1] cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key) - with open(filename, 'rb') as f: - s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'}) + try: + s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key) - os.remove(filename) + # file exists - update = [{ - 'range': 'K' + str(i), - 'values': [['successful']] - }, { - 'range': 'L' + str(i), - 'values': [[datetime.datetime.now().isoformat()]] - }, { - 'range': 'M' + str(i), - 'values': [[cdn_url]] - }] + update = [{ + 'range': 'K' + str(i), + 'values': [['already archived']] + }, { + 'range': 'M' + str(i), + 'values': [[cdn_url]] + }] - wks.batch_update(update) + wks.batch_update(update) + + except ClientError: + # Not found + + # sometimes this results in a different filename, so do this again + info = ydl.extract_info(v[1], download=True) + if 'entries' in info: + filename = ydl.prepare_filename(info['entries'][0]) + else: + filename = ydl.prepare_filename(info) + + + if not os.path.exists(filename): + filename = filename.split('.')[0] + '.mkv' + + print(filename) + key = filename.split('/')[1] + cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key) + + with open(filename, 'rb') as f: + s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'}) + + os.remove(filename) + + update = [{ + 'range': 'K' + str(i), + 'values': [['successful']] + }, { + 'range': 'L' + str(i), + 'values': [[datetime.datetime.now().isoformat()]] + }, { + 'range': 'M' + str(i), + 'values': [[cdn_url]] + }] + + wks.batch_update(update) except: t, value, traceback = sys.exc_info()