Dynamically adjust number of keyframes for contact sheet view.

This commit is contained in:
Logan Williams
2021-08-25 11:04:14 +00:00
parent e3b400ca4e
commit 2097e42df0
2 changed files with 108 additions and 75 deletions

89
Pipfile.lock generated
View File

@@ -35,18 +35,18 @@
},
"boto3": {
"hashes": [
"sha256:1d24c6d1f5db4b52bb29f1dfe13fd3e9d95d9fa4634b0638a096f5a884173cde",
"sha256:8ee8766813864796be6c87ad762c6da4bfef603977931854a38f49fe4db06495"
"sha256:7209b79833bdf13753aa24f76bf533890ffed2cc4fe1fe08619d223c209bbd11",
"sha256:f46c93d09acd4d4bfc6b9522ed852fecbdc508e0365f29ddfb3c146aae784b4e"
],
"index": "pypi",
"version": "==1.17.84"
"version": "==1.18.27"
},
"botocore": {
"hashes": [
"sha256:75e1397b80aa8757a26636b949eebd20b3cf67e8f1ed80dc01170907e06ea45d",
"sha256:bc59eb748fcb07835613ebea6dcc2600ae1a8be0fae30e40b9c1e81b73262296"
"sha256:8c99abd7093ab11ce8d09c68732aeeb6065a53d2fe371568452e99291817fff5",
"sha256:b9e2c90bad164d111c229102f58f995c28576e719dd116b446965e1b786f8fa5"
],
"version": "==1.20.84"
"version": "==1.21.27"
},
"cachetools": {
"hashes": [
@@ -62,12 +62,13 @@
],
"version": "==2021.5.30"
},
"chardet": {
"charset-normalizer": {
"hashes": [
"sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
"sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
"sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
"sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
],
"version": "==4.0.0"
"markers": "python_version >= '3'",
"version": "==2.0.4"
},
"ffmpeg-python": {
"hashes": [
@@ -85,32 +86,33 @@
},
"google-auth": {
"hashes": [
"sha256:044d81b1e58012f8ebc71cc134e191c1fa312f543f1fbc99973afe28c25e3228",
"sha256:b3ca7a8ff9ab3bdefee3ad5aefb11fc6485423767eee016f5942d8e606ca23fb"
"sha256:c012c8be7c442c8309ca8fa0876fef33f5fd977c467be1e1c1c2f721e8ebd73c",
"sha256:ea1af050b3e06eb73e4470f704d23007307bc0e87c13e015f6b90460f1407bd3"
],
"version": "==1.30.1"
"version": "==2.0.1"
},
"google-auth-oauthlib": {
"hashes": [
"sha256:09832c6e75032f93818edf1affe4746121d640c625a5bef9b5c96af676e98eee",
"sha256:0e92aacacfb94978de3b7972cf4b0f204c3cd206f74ddd0dc0b31e91164e6317"
"sha256:4ab58e6c3dc6ccf112f921fcced40e5426fba266768986ea502228488276eaba",
"sha256:b5a1ce7c617d247ccb2dfbba9d4bfc734b41096803d854a2c52592ae80150a67"
],
"version": "==0.4.4"
"version": "==0.4.5"
},
"gspread": {
"hashes": [
"sha256:056ceb9fb4f439c15ec39d84c91653c6435f775a1c8afc8fe7f909f8393821fb",
"sha256:4bda4ab8c5edb9e41cf4ae40d4d5fb30447522b4e43608e05c01351ab1b96912"
"sha256:236a0f24e3724b49bae4cbd5144ed036b0ae6feaf5828ad033eb2824bf05e5be",
"sha256:4933c3e2359e82698c0990f3b0e312627fcbf8fecc8bc81d26713f5860e20b48"
],
"index": "pypi",
"version": "==3.7.0"
"version": "==4.0.1"
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
],
"version": "==2.10"
"markers": "python_version >= '3'",
"version": "==3.2"
},
"jmespath": {
"hashes": [
@@ -121,10 +123,10 @@
},
"oauthlib": {
"hashes": [
"sha256:bee41cc35fcca6e988463cacc3bcb8a96224f470ca547e697b604cc697b2f889",
"sha256:df884cd6cbe20e32633f1db1072e9356f53638e4361bef4e8b03c9127c9328ea"
"sha256:42bf6354c2ed8c6acb54d971fce6f88193d97297e18602a3a886603f9d7730cc",
"sha256:8f0215fcc533dd8dd1bee6f4c412d4f0cd7297307d43ac61666389e3bc3198a3"
],
"version": "==3.1.0"
"version": "==3.1.1"
},
"pyasn1": {
"hashes": [
@@ -164,25 +166,25 @@
},
"python-dateutil": {
"hashes": [
"sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
"sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
"sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
"sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"
],
"version": "==2.8.1"
"version": "==2.8.2"
},
"python-dotenv": {
"hashes": [
"sha256:00aa34e92d992e9f8383730816359647f358f4a3be1ba45e5a5cefd27ee91544",
"sha256:b1ae5e9643d5ed987fc57cc2583021e38db531946518130777734f9589b3141f"
"sha256:aae25dc1ebe97c420f50b81fb0e5c949659af713f31fdb63c749ca68748f34b1",
"sha256:f521bc2ac9a8e03c736f62911605c5d83970021e3fa95b37d769e2bbbe9b6172"
],
"index": "pypi",
"version": "==0.17.1"
"version": "==0.19.0"
},
"requests": {
"hashes": [
"sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
"sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
"sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
"sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
],
"version": "==2.25.1"
"version": "==2.26.0"
},
"requests-oauthlib": {
"hashes": [
@@ -197,15 +199,14 @@
"sha256:78f9a9bf4e7be0c5ded4583326e7461e3a3c5aae24073648b4bdfa797d78c9d2",
"sha256:9d689e6ca1b3038bc82bf8d23e944b6b6037bc02301a574935b2dd946e0353b9"
],
"markers": "python_version >= '3.6'",
"version": "==4.7.2"
},
"s3transfer": {
"hashes": [
"sha256:9b3752887a2880690ce628bc263d6d13a3864083aeacff4890c1c9839a5eb0bc",
"sha256:cb022f4b16551edebbb31a377d3f09600dbada7363d8c5db7976e7f47732e1b2"
"sha256:50ed823e1dc5868ad40c8dc92072f757aa0e653a192845c94a3b676f4a62da4c",
"sha256:9c1dc369814391a6bda20ebbf4b70a0f34630592c9aa520856bf384916af2803"
],
"version": "==0.4.2"
"version": "==0.5.0"
},
"six": {
"hashes": [
@@ -224,18 +225,18 @@
},
"urllib3": {
"hashes": [
"sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c",
"sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098"
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
],
"version": "==1.26.5"
"version": "==1.26.6"
},
"youtube-dl": {
"hashes": [
"sha256:4e569cb0477428fd96ee6f7e7a6640b7c9416be626ed708ac4b8ada6c5a6ffbe",
"sha256:deb489a17e541ec7ac35581375ae94161eb22a7ec3373b1216181a4360c187ab"
"sha256:263e04d53fb8ba3dfbd246ad09b7d388e896c132a20cc770c26ee7684de050ac",
"sha256:cb2d3ee002158ede783e97a82c95f3817594df54367ea6a77ce5ceea4772f0ab"
],
"index": "pypi",
"version": "==2021.5.16"
"version": "==2021.6.6"
}
},
"develop": {}

View File

@@ -47,32 +47,45 @@ def index_to_col(index):
return alphabet[index]
def get_thumbnails(filename, s3_client):
def get_thumbnails(filename, s3_client, duration = None):
if not os.path.exists(filename.split('.')[0]):
os.mkdir(filename.split('.')[0])
fps = 0.5
if duration is not None:
duration = float(duration)
if duration < 60:
fps = 10.0 / duration
elif duration < 120:
fps = 20.0 / duration
else:
fps = 40.0 / duration
stream = ffmpeg.input(filename)
stream = ffmpeg.filter(stream, 'fps', fps=0.5).filter('scale', 512, -1)
stream = ffmpeg.filter(stream, 'fps', fps=fps).filter('scale', 512, -1)
stream.output(filename.split('.')[0] + '/out%d.jpg').run()
thumbnails = os.listdir(filename.split('.')[0] + '/')
cdn_urls = []
for fname in thumbnails:
thumbnail_filename = filename.split('.')[0] + '/' + fname
key = filename.split('/')[1].split('.')[0] + '/' + fname
if fname[-3:] == 'jpg':
thumbnail_filename = filename.split('.')[0] + '/' + fname
key = filename.split('/')[1].split('.')[0] + '/' + fname
cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
cdn_url = 'https://{}.{}.cdn.digitaloceanspaces.com/{}'.format(
os.getenv('DO_BUCKET'), os.getenv('DO_SPACES_REGION'), key)
with open(thumbnail_filename, 'rb') as f:
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
with open(thumbnail_filename, 'rb') as f:
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
cdn_urls.append(cdn_url)
os.remove(thumbnail_filename)
cdn_urls.append(cdn_url)
os.remove(thumbnail_filename)
key_thumb = cdn_urls[int(len(cdn_urls)*0.25)]
key_thumb = cdn_urls[int(len(cdn_urls)*0.1)]
index_page = f'''<html><head><title>{filename}</title></head>
<body>'''
@@ -117,7 +130,6 @@ def download_telegram_video(url, s3_client, check_if_exists=False):
video_url = video.get('src')
key = video_url.split('/')[-1].split('?')[0]
filename = 'tmp/' + key
print(video_url, key)
if check_if_exists:
try:
@@ -145,14 +157,20 @@ def download_telegram_video(url, s3_client, check_if_exists=False):
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
key_thumb, thumb_index = get_thumbnails(filename, s3_client)
duration = s.find_all('time')[0].contents[0]
if ':' in duration:
duration = float(duration.split(':')[0])*60 + float(duration.split(':')[1])
else:
duration = float(duration)
key_thumb, thumb_index = get_thumbnails(filename, s3_client, duration=duration)
os.remove(filename)
video_data = {
'cdn_url': cdn_url,
'thumbnail': key_thumb,
'thumbnail_index': thumb_index,
'duration': s.find_all('time')[0].contents[0],
'duration': duration,
'title': original_url,
'timestamp': s.find_all('time')[1].get('datetime')
}
@@ -183,11 +201,14 @@ def internet_archive(url, s3_client):
while status_r.json()['status'] == 'pending' and retries < 40:
time.sleep(5)
status_r = requests.get(
'https://web.archive.org/save/status/' + job_id, headers={
"Accept": "application/json",
"Authorization": "LOW " + os.getenv('INTERNET_ARCHIVE_S3_KEY') + ":" + os.getenv('INTERNET_ARCHIVE_S3_SECRET')
})
try:
status_r = requests.get(
'https://web.archive.org/save/status/' + job_id, headers={
"Accept": "application/json",
"Authorization": "LOW " + os.getenv('INTERNET_ARCHIVE_S3_KEY') + ":" + os.getenv('INTERNET_ARCHIVE_S3_SECRET')
})
except:
time.sleep(1)
retries += 1
@@ -211,7 +232,8 @@ def internet_archive(url, s3_client):
def download_vid(url, s3_client, check_if_exists=False):
ydl_opts = {'outtmpl': 'tmp/%(id)s.%(ext)s', 'quiet': False}
if url[0:20] == 'https://facebook.com' and os.getenv('FB_COOKIE'):
if (url[0:21] == 'https://facebook.com/' or url[0:25] == 'https://wwww.facebook.com/') and os.getenv('FB_COOKIE'):
print('Using cookie')
youtube_dl.utils.std_headers['cookie'] = os.getenv('FB_COOKIE')
ydl = youtube_dl.YoutubeDL(ydl_opts)
cdn_url = None
@@ -250,10 +272,10 @@ def download_vid(url, s3_client, check_if_exists=False):
if len(info['entries']) > 1:
raise Exception(
'ERROR: Cannot archive channels or pages with multiple videos')
else:
info = info['entries'][0]
filename = ydl.prepare_filename(info['entries'][0])
else:
filename = ydl.prepare_filename(info)
filename = ydl.prepare_filename(info)
if not os.path.exists(filename):
filename = filename.split('.')[0] + '.mkv'
@@ -267,14 +289,15 @@ def download_vid(url, s3_client, check_if_exists=False):
s3_client.upload_fileobj(f, Bucket=os.getenv(
'DO_BUCKET'), Key=key, ExtraArgs={'ACL': 'public-read'})
key_thumb, thumb_index = get_thumbnails(filename, s3_client)
duration = info['duration'] if 'duration' in info else None
key_thumb, thumb_index = get_thumbnails(filename, s3_client, duration=duration)
os.remove(filename)
video_data = {
'cdn_url': cdn_url,
'thumbnail': key_thumb,
'thumbnail_index': thumb_index,
'duration': info['duration'] if 'duration' in info else None,
'duration': duration,
'title': info['title'] if 'title' in info else None,
'timestamp': info['timestamp'] if 'timestamp' in info else datetime.datetime.strptime(info['upload_date'], '%Y%m%d').timestamp() if 'upload_date' in info else None,
}
@@ -404,17 +427,29 @@ def process_sheet(sheet):
# check so we don't step on each others' toes
if latest_val == '' or latest_val is None:
if 'http://t.me/' in v[url_index] or 'https://t.me/' in v[url_index]:
wks.update(
wks.update(
columns['status'] + str(i), 'Archive in progress')
if 'http://t.me/' in v[url_index] or 'https://t.me/' in v[url_index]:
video_data, status = download_telegram_video(
v[url_index], s3_client, check_if_exists=True)
if status == 'No telegram video found':
print("Trying Internet Archive fallback")
video_data, status = internet_archive(
v[url_index], s3_client)
update_sheet(wks, i, status, video_data, columns, v)
else:
try:
ydl_opts = {
'outtmpl': 'tmp/%(id)s.%(ext)s', 'quiet': False}
if (v[url_index][0:21] == 'https://facebook.com/' or v[url_index][0:25] == 'https://www.facebook.com/') and os.getenv('FB_COOKIE'):
print('Using cookie')
youtube_dl.utils.std_headers['cookie'] = os.getenv(
'FB_COOKIE')
ydl = youtube_dl.YoutubeDL(ydl_opts)
info = ydl.extract_info(
v[url_index], download=False)
@@ -434,9 +469,6 @@ def process_sheet(sheet):
except:
# i'm sure there's a better way to handle this than nested try/catch blocks
try:
wks.update(
columns['status'] + str(i), 'Archive in progress')
print("Trying Internet Archive fallback")
video_data, status = internet_archive(