mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 04:38:29 +03:00
improving media page with images and videos
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import os, datetime, shutil, hashlib, time, requests, re
|
||||
import os, datetime, shutil, hashlib, time, requests, re, mimetypes
|
||||
from dataclasses import dataclass
|
||||
from abc import ABC, abstractmethod
|
||||
from urllib.parse import urlparse
|
||||
@@ -58,7 +58,13 @@ class Archiver(ABC):
|
||||
<h3><a href="{url}">{url}</a></h3><ul>'''
|
||||
|
||||
for url_info in urls_info:
|
||||
page += f'''<li><a href="{url_info['cdn_url']}">{url_info['key']}</a>: {url_info['hash']}</li>'''
|
||||
mime_global, mime_type = self._guess_file_type(url_info["key"])
|
||||
preview = ""
|
||||
if mime_global == "image":
|
||||
preview = f'<img src="{url_info["cdn_url"]}" style="max-height:200px;max-width:200px;"></img>'
|
||||
elif mime_global == "video":
|
||||
preview = f'<video controls style="max-height:200px;max-width:200px;"><source src="{url_info["cdn_url"]}" type="{mime_type}"></source></video>'
|
||||
page += f'''<li><a href="{url_info['cdn_url']}">{preview}{url_info['key']}</a>: {url_info['hash']}</li>'''
|
||||
|
||||
page += f"</ul><h2>{self.name} object data:</h2><code>{object}</code>"
|
||||
page += f"</body></html>"
|
||||
@@ -77,8 +83,20 @@ class Archiver(ABC):
|
||||
page_cdn = self.storage.get_cdn_url(page_key)
|
||||
return (page_cdn, page_hash, thumbnail)
|
||||
|
||||
def _guess_file_type(self, path:str):
|
||||
"""
|
||||
Receives a URL or filename and returns global mimetype like 'image' or 'video' and the specific mimetype as a tuple
|
||||
see https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
|
||||
ex: ('audio', 'audio/mp3')
|
||||
"""
|
||||
mime = mimetypes.guess_type(path)[0]
|
||||
if mime is not None:
|
||||
return mime.split("/")[0], mime
|
||||
return "", ""
|
||||
|
||||
|
||||
# eg images in a tweet save to cloud storage
|
||||
def generate_media_page(self, urls, url, object):
|
||||
def generate_media_page(self, urls, url, object, requester=requests):
|
||||
"""
|
||||
For a list of media urls, fetch them, upload them
|
||||
and call self.generate_media_page_html with them
|
||||
@@ -94,7 +112,7 @@ class Archiver(ABC):
|
||||
|
||||
filename = os.path.join(Storage.TMP_FOLDER, key)
|
||||
|
||||
d = requests.get(media_url, headers=headers)
|
||||
d = requester.get(media_url, headers=headers)
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(d.content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user