telegram archiver

This commit is contained in:
msramalho
2023-01-18 21:14:20 +00:00
parent 63d1abbe4b
commit 085376f63f
7 changed files with 105 additions and 16 deletions

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
from abc import abstractmethod
from dataclasses import dataclass
import os
from metadata import Metadata
from steps.step import Step
import mimetypes, requests
@@ -23,7 +24,7 @@ class Archiverv2(Step):
# used when archivers need to login or do other one-time setup
pass
def clean_url(self, url:str) -> str:
def clean_url(self, url: str) -> str:
# used to clean unnecessary URL parameters
return url
@@ -37,13 +38,23 @@ class Archiverv2(Step):
return mime.split("/")[0]
return ""
def download_from_url(self, url:str, to_filename:str) -> None:
def download_from_url(self, url: str, to_filename: str = None, item: Metadata = None) -> str:
"""
downloads a URL to provided filename, or inferred from URL, returns local filename, if item is present will use its tmp_dir
"""
if not to_filename:
to_filename = url.split('/')[-1].split('?')[0]
if len(to_filename) > 64:
to_filename = to_filename[-64:]
if item:
to_filename = os.path.join(item.get_tmp_dir(), to_filename)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
d = requests.get(url, headers=headers)
with open(to_filename, 'wb') as f:
f.write(d.content)
return to_filename
@abstractmethod
def download(self, item: Metadata) -> Metadata: pass