mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Further cleanup
* Removes (partly) the ArchivingOrchestrator * Removes the cli_feeder module, and makes it the 'default', allowing you to pass URLs directly on the command line, without having to use the cumbersome --cli_feeder.urls. Just do auto-archiver https://my.url.com * More unit tests * Improved error handling
This commit is contained in:
@@ -12,7 +12,6 @@ from dataclasses import dataclass
|
||||
import mimetypes
|
||||
import os
|
||||
import mimetypes
|
||||
|
||||
import requests
|
||||
from loguru import logger
|
||||
from retrying import retry
|
||||
@@ -71,7 +70,7 @@ class Extractor(BaseModule):
|
||||
to_filename = url.split('/')[-1].split('?')[0]
|
||||
if len(to_filename) > 64:
|
||||
to_filename = to_filename[-64:]
|
||||
to_filename = os.path.join(ArchivingContext.get_tmp_dir(), to_filename)
|
||||
to_filename = os.path.join(self.tmp_dir, to_filename)
|
||||
if verbose: logger.debug(f"downloading {url[0:50]=} {to_filename=}")
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
||||
|
||||
Reference in New Issue
Block a user