mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-13 13:48:28 +03:00
telegram archiver
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
from metadata import Metadata
|
||||
from steps.step import Step
|
||||
import mimetypes, requests
|
||||
@@ -23,7 +24,7 @@ class Archiverv2(Step):
|
||||
# used when archivers need to login or do other one-time setup
|
||||
pass
|
||||
|
||||
def clean_url(self, url:str) -> str:
|
||||
def clean_url(self, url: str) -> str:
|
||||
# used to clean unnecessary URL parameters
|
||||
return url
|
||||
|
||||
@@ -37,13 +38,23 @@ class Archiverv2(Step):
|
||||
return mime.split("/")[0]
|
||||
return ""
|
||||
|
||||
def download_from_url(self, url:str, to_filename:str) -> None:
|
||||
def download_from_url(self, url: str, to_filename: str = None, item: Metadata = None) -> str:
|
||||
"""
|
||||
downloads a URL to provided filename, or inferred from URL, returns local filename, if item is present will use its tmp_dir
|
||||
"""
|
||||
if not to_filename:
|
||||
to_filename = url.split('/')[-1].split('?')[0]
|
||||
if len(to_filename) > 64:
|
||||
to_filename = to_filename[-64:]
|
||||
if item:
|
||||
to_filename = os.path.join(item.get_tmp_dir(), to_filename)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
||||
}
|
||||
d = requests.get(url, headers=headers)
|
||||
with open(to_filename, 'wb') as f:
|
||||
f.write(d.content)
|
||||
return to_filename
|
||||
|
||||
@abstractmethod
|
||||
def download(self, item: Metadata) -> Metadata: pass
|
||||
|
||||
Reference in New Issue
Block a user