mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
telethon_archiver working for multiple media
This commit is contained in:
@@ -3,6 +3,7 @@ from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from metadata import Metadata
|
||||
from steps.step import Step
|
||||
import mimetypes, requests
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -12,9 +13,9 @@ class Archiverv2(Step):
|
||||
def __init__(self, config: dict) -> None:
|
||||
# without this STEP.__init__ is not called
|
||||
super().__init__(config)
|
||||
# self.setup()
|
||||
|
||||
# only for typing...
|
||||
|
||||
def init(name: str, config: dict) -> Archiverv2:
|
||||
return Step.init(name, config, Archiverv2)
|
||||
|
||||
@@ -22,5 +23,23 @@ class Archiverv2(Step):
|
||||
# used when archivers need to login or do other one-time setup
|
||||
pass
|
||||
|
||||
def _guess_file_type(self, path: str) -> str:
|
||||
"""
|
||||
Receives a URL or filename and returns global mimetype like 'image' or 'video'
|
||||
see https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
|
||||
"""
|
||||
mime = mimetypes.guess_type(path)[0]
|
||||
if mime is not None:
|
||||
return mime.split("/")[0]
|
||||
return ""
|
||||
|
||||
def download_from_url(self, url:str, to_filename:str) -> None:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
||||
}
|
||||
d = requests.get(url, headers=headers)
|
||||
with open(to_filename, 'wb') as f:
|
||||
f.write(d.content)
|
||||
|
||||
@abstractmethod
|
||||
def download(self, item: Metadata) -> Metadata: pass
|
||||
|
||||
Reference in New Issue
Block a user