From 853e018ace1cd8f2fde1a1196d57687652b75ef1 Mon Sep 17 00:00:00 2001
From: Logan Williams <logan@bellingcat.com>
Date: Mon, 8 Feb 2021 11:21:42 +0000
Subject: [PATCH] Synchronize with remote version

---
 streaming.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++++
 update.py    | 106 ++++++++++++++++++++++++++-------------------------
 2 files changed, 156 insertions(+), 51 deletions(-)
 create mode 100644 streaming.py

diff --git a/streaming.py b/streaming.py
new file mode 100644
index 0000000..eab8356
--- /dev/null
+++ b/streaming.py
@@ -0,0 +1,101 @@
+import gspread
+import youtube_dl
+from pathlib import Path
+import sys
+import datetime
+import boto3
+import os
+from dotenv import load_dotenv
+from botocore.errorfactory import ClientError
+
+load_dotenv()
+
+gc = gspread.service_account()
+sh = gc.open("Media Sheet (January 16-20 + People)")
+
+found_live = False
+
+# loop through worksheets to check
+for ii in range(5):
+    # only capture one video if its a livestream
+    if found_live:
+        break
+
+    wks = sh.get_worksheet(ii)
+    values = wks.get_all_values()
+
+    ydl_opts = {'outtmpl': 'tmp/%(id)s.%(ext)s', 'quiet': False}
+    ydl = youtube_dl.YoutubeDL(ydl_opts)
+
+    s3_client = boto3.client('s3',
+            region_name=os.getenv('DO_SPACES_REGION'),
+            endpoint_url='https://{}.digitaloceanspaces.com'.format(os.getenv('DO_SPACES_REGION')),
+            aws_access_key_id=os.getenv('DO_SPACES_KEY'),
+            aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))
+
+    # loop through rows in worksheet
+    for i in range(2, len(values)+1):
+        # only capture one video if its a livestream
+        if found_live:
+            break
+
+        v = values[i-1]
+
+        if v[1] != "" and v[10] == "":
+            print(v[1])
+
+            try:
+                info = ydl.extract_info(v[1], download=False)
+
+                # skip if live
+                if 'is_live' in info and info['is_live']:
+                    found_live = True
+
+                    wks.update('K' + str(i), 'Recording stream')
+
+                    # sometimes this results in a different filename, so do this again
+                    info = ydl.extract_info(v[1], download=True)
+
+                    if 'entries' in info:
+                        filename = ydl.prepare_filename(info['entries'][0])
+                    else:
+                        filename = ydl.prepare_filename(info)
+
+                    if not os.path.exists(filename):
+                        filename = filename.split('.')[0] + '.mkv'
+
+                    print(filename)
+                    key = filename.split('/')[1]
+                    cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
+
+                    with open(filename, 'rb') as f:
+                        s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
+
+                    os.remove(filename)
+
+                    update = [{
+                        'range': 'K' + str(i),
+                        'values': [['successful']]
+                    }, {
+                        'range': 'L' + str(i),
+                        'values': [[datetime.datetime.now().isoformat()]]
+                    }, {
+                        'range': 'M' + str(i),
+                        'values': [[cdn_url]]
+                    }]
+
+                    wks.batch_update(update)
+
+                    break
+            except:
+                t, value, traceback = sys.exc_info()
+
+                update = [{
+                    'range': 'K' + str(i),
+                    'values': [[str(value)]]
+                }, {
+                    'range': 'L' + str(i),
+                    'values': [[datetime.datetime.now().isoformat()]]
+                }]
+
+                wks.batch_update(update)
diff --git a/update.py b/update.py
index 826030c..51c7a0b 100644
--- a/update.py
+++ b/update.py
@@ -13,6 +13,7 @@ load_dotenv()
 gc = gspread.service_account()
 sh = gc.open("Media Sheet (January 16-20 + People)")
 
+# loop through worksheets to check
 for ii in range(5):
     wks = sh.get_worksheet(ii)
     values = wks.get_all_values()
@@ -26,6 +27,7 @@ for ii in range(5):
             aws_access_key_id=os.getenv('DO_SPACES_KEY'),
             aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))
 
+    # loop through rows in worksheet
     for i in range(2, len(values)+1):
         v = values[i-1]
 
@@ -35,68 +37,70 @@ for ii in range(5):
             try:
                 info = ydl.extract_info(v[1], download=False)
 
-                if 'entries' in info:
-                    if len(info['entries']) > 1:
-                        raise Exception('ERROR: Cannot archive channels or pages with multiple videos')
-
-                    filename = ydl.prepare_filename(info['entries'][0])
-                else:
-                    filename = ydl.prepare_filename(info)
-                
-                print(filename)
-                key = filename.split('/')[1]
-                cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
-
-                try:
-                    s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
-
-                    # file exists
-
-                    update = [{
-                        'range': 'K' + str(i),
-                        'values': [['already archived']]
-                    }, {
-                        'range': 'M' + str(i),
-                        'values': [[cdn_url]]
-                    }]
-
-                    wks.batch_update(update)
-
-                except ClientError:
-                    # Not found
-
-                    # sometimes this results in a different filename, so do this again
-                    info = ydl.extract_info(v[1], download=True)
+                # skip if live
+                if 'is_live' not in info or not info['is_live']:
                     if 'entries' in info:
+                        if len(info['entries']) > 1:
+                            raise Exception('ERROR: Cannot archive channels or pages with multiple videos')
+
                         filename = ydl.prepare_filename(info['entries'][0])
                     else:
                         filename = ydl.prepare_filename(info)
-
-
-                    if not os.path.exists(filename):
-                        filename = filename.split('.')[0] + '.mkv'
-
+                    
                     print(filename)
                     key = filename.split('/')[1]
                     cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
 
-                    with open(filename, 'rb') as f:
-                        s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
+                    try:
+                        s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
 
-                    os.remove(filename)
+                        # file exists
 
-                    update = [{
-                        'range': 'K' + str(i),
-                        'values': [['successful']]
-                    }, {
-                        'range': 'L' + str(i),
-                        'values': [[datetime.datetime.now().isoformat()]]
-                    }, {
-                        'range': 'M' + str(i),
-                        'values': [[cdn_url]]
-                    }]
+                        update = [{
+                            'range': 'K' + str(i),
+                            'values': [['already archived']]
+                        }, {
+                            'range': 'M' + str(i),
+                            'values': [[cdn_url]]
+                        }]
 
-                    wks.batch_update(update)
+                        wks.batch_update(update)
+
+                    except ClientError:
+                        # Not found
+
+                        # sometimes this results in a different filename, so do this again
+                        info = ydl.extract_info(v[1], download=True)
+                        if 'entries' in info:
+                            filename = ydl.prepare_filename(info['entries'][0])
+                        else:
+                            filename = ydl.prepare_filename(info)
+
+
+                        if not os.path.exists(filename):
+                            filename = filename.split('.')[0] + '.mkv'
+
+                        print(filename)
+                        key = filename.split('/')[1]
+                        cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
+
+                        with open(filename, 'rb') as f:
+                            s3_client.upload_fileobj(f, Bucket=os.getenv('DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
+
+                        os.remove(filename)
+
+                        update = [{
+                            'range': 'K' + str(i),
+                            'values': [['successful']]
+                        }, {
+                            'range': 'L' + str(i),
+                            'values': [[datetime.datetime.now().isoformat()]]
+                        }, {
+                            'range': 'M' + str(i),
+                            'values': [[cdn_url]]
+                        }]
+
+                        wks.batch_update(update)
             except:
                 t, value, traceback = sys.exc_info()