mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 12:48:28 +03:00
* clean orchestrator code, add archiver cleanup logic * improves documentation for database.py * telethon archivers isolate sessions into copied files * closes #127 * closes #125 * closes #84 * meta enricher applies to all media * closes #61 adds subtitles and comments * minor update * minor fixes to yt-dlp subtitles and comments * closes #17 but logic is imperfect. * closes #85 ssl enhancer * minimifies html, JS refactor for preview of certificates * closes #91 adds freetsa timestamp authority * version bump * simplify download_url method * skip ssl if nothing archived * html preview improvements * adds retrying lib * manual download archiver improvements * meta only runs when relevant data available * new metadata convenience method * html template improvements * removes debug message * does not close #91 yet, will need a few more certificate chaing logging * adds verbosity config * new instagram api archiver * adds proxy support we * adds proxy/end support and bug fix for yt-dlp * proxy support for webdriver * adds socks proxy to wacz_enricher * refactor recursivity in inner media and display * infinite recursive display * foolproofing timestamping authortities * version to 0.9.0 * minor fixes from code-review
49 lines
2.0 KiB
Python
49 lines
2.0 KiB
Python
from __future__ import annotations
|
|
from selenium import webdriver
|
|
from selenium.common.exceptions import TimeoutException
|
|
from selenium.webdriver.common.proxy import Proxy, ProxyType
|
|
from loguru import logger
|
|
from selenium.webdriver.common.by import By
|
|
import time
|
|
|
|
|
|
class Webdriver:
|
|
def __init__(self, width: int, height: int, timeout_seconds: int, facebook_accept_cookies: bool = False, http_proxy: str = "") -> webdriver:
|
|
self.width = width
|
|
self.height = height
|
|
self.timeout_seconds = timeout_seconds
|
|
self.facebook_accept_cookies = facebook_accept_cookies
|
|
self.http_proxy = http_proxy
|
|
|
|
def __enter__(self) -> webdriver:
|
|
options = webdriver.FirefoxOptions()
|
|
options.add_argument("--headless")
|
|
options.add_argument(f'--proxy-server={self.http_proxy}')
|
|
options.set_preference('network.protocol-handler.external.tg', False)
|
|
try:
|
|
self.driver = webdriver.Firefox(options=options)
|
|
self.driver.set_window_size(self.width, self.height)
|
|
self.driver.set_page_load_timeout(self.timeout_seconds)
|
|
except TimeoutException as e:
|
|
logger.error(f"failed to get new webdriver, possibly due to insufficient system resources or timeout settings: {e}")
|
|
|
|
if self.facebook_accept_cookies:
|
|
try:
|
|
logger.debug(f'Trying fb click accept cookie popup.')
|
|
self.driver.get("http://www.facebook.com")
|
|
foo = self.driver.find_element(By.XPATH, "//button[@data-cookiebanner='accept_only_essential_button']")
|
|
foo.click()
|
|
logger.debug(f'fb click worked')
|
|
# linux server needs a sleep otherwise facebook cookie won't have worked and we'll get a popup on next page
|
|
time.sleep(2)
|
|
except:
|
|
logger.warning(f'Failed on fb accept cookies.')
|
|
|
|
return self.driver
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.driver.close()
|
|
self.driver.quit()
|
|
del self.driver
|
|
return True
|