Merge branch 'jamesarnall-refactor-dry' into main

This commit is contained in:
Logan Williams
2021-10-21 10:01:11 +00:00

View File

@@ -46,6 +46,14 @@ def index_to_col(index):
else:
return alphabet[index]
def get_cdn_url(key):
return 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
def do_s3_upload(s3_client, f, key):
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
def get_thumbnails(filename, s3_client, duration = None):
if not os.path.exists(filename.split('.')[0]):
@@ -75,12 +83,10 @@ def get_thumbnails(filename, s3_client, duration = None):
thumbnail_filename = filename.split('.')[0] + '/' + fname
key = filename.split('/')[1].split('.')[0] + '/' + fname
cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
cdn_url = get_cdn_url(key)
with open(thumbnail_filename, 'rb') as f:
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
do_s3_upload(s3_client, f, key)
cdn_urls.append(cdn_url)
os.remove(thumbnail_filename)
@@ -107,8 +113,7 @@ def get_thumbnails(filename, s3_client, duration = None):
s3_client.upload_fileobj(open(index_fname, 'rb'), Bucket=os.getenv(
'DO_BUCKET'), Key=thumb_index, ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/html'})
thumb_index_cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), thumb_index)
thumb_index_cdn_url = get_cdn_url(thumb_index)
return (key_thumb, thumb_index_cdn_url)
@@ -139,8 +144,7 @@ def download_telegram_video(url, s3_client, check_if_exists=False):
s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
# file exists
cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
cdn_url = get_cdn_url(key)
status = 'already archived'
@@ -153,12 +157,10 @@ def download_telegram_video(url, s3_client, check_if_exists=False):
f.write(v.content)
if status != 'already archived':
cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
cdn_url = get_cdn_url(key)
with open(filename, 'rb') as f:
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
do_s3_upload(s3_client, f, key)
duration = s.find_all('time')[0].contents[0]
if ':' in duration:
@@ -182,11 +184,15 @@ def download_telegram_video(url, s3_client, check_if_exists=False):
def internet_archive(url, s3_client):
r = requests.post(
'https://web.archive.org/save/', headers={
ia_headers = {
"Accept": "application/json",
"Authorization": "LOW " + os.getenv('INTERNET_ARCHIVE_S3_KEY') + ":" + os.getenv('INTERNET_ARCHIVE_S3_SECRET')
}, data={'url': url})
}
r = requests.post(
'https://web.archive.org/save/', headers=ia_headers, data={'url': url})
if r.status_code != 200:
return ({}, 'Internet archive failed')
@@ -194,10 +200,7 @@ def internet_archive(url, s3_client):
job_id = r.json()['job_id']
status_r = requests.get(
'https://web.archive.org/save/status/' + job_id, headers={
"Accept": "application/json",
"Authorization": "LOW " + os.getenv('INTERNET_ARCHIVE_S3_KEY') + ":" + os.getenv('INTERNET_ARCHIVE_S3_SECRET')
})
'https://web.archive.org/save/status/' + job_id, headers=ia_headers)
retries = 0
@@ -206,10 +209,7 @@ def internet_archive(url, s3_client):
try:
status_r = requests.get(
'https://web.archive.org/save/status/' + job_id, headers={
"Accept": "application/json",
"Authorization": "LOW " + os.getenv('INTERNET_ARCHIVE_S3_KEY') + ":" + os.getenv('INTERNET_ARCHIVE_S3_SECRET')
})
'https://web.archive.org/save/status/' + job_id, headers=ia_headers)
except:
time.sleep(1)
@@ -266,8 +266,7 @@ def download_vid(url, s3_client, check_if_exists=False):
s3_client.head_object(Bucket=os.getenv('DO_BUCKET'), Key=key)
# file exists
cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
cdn_url = get_cdn_url(os, key)
status = 'already archived'
@@ -291,12 +290,10 @@ def download_vid(url, s3_client, check_if_exists=False):
if status != 'already archived':
key = get_key(filename)
cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
cdn_url = get_cdn_url(os, key)
with open(filename, 'rb') as f:
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
do_s3_upload(s3_client, f, key)
duration = info['duration'] if 'duration' in info else None
key_thumb, thumb_index = get_thumbnails(filename, s3_client, duration=duration)