mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Merge pull request #233 from bellingcat/docker-webdriver-aarch64
Docker webdriver aarch64
This commit is contained in:
15
Dockerfile
15
Dockerfile
@@ -7,13 +7,24 @@ ENV RUNNING_IN_DOCKER=1 \
|
|||||||
PYTHONFAULTHANDLER=1 \
|
PYTHONFAULTHANDLER=1 \
|
||||||
PATH="/root/.local/bin:$PATH"
|
PATH="/root/.local/bin:$PATH"
|
||||||
|
|
||||||
|
|
||||||
|
ARG TARGETARCH
|
||||||
|
|
||||||
# Installing system dependencies
|
# Installing system dependencies
|
||||||
RUN add-apt-repository ppa:mozillateam/ppa && \
|
RUN add-apt-repository ppa:mozillateam/ppa && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends gcc ffmpeg fonts-noto exiftool && \
|
apt-get install -y --no-install-recommends gcc ffmpeg fonts-noto exiftool && \
|
||||||
apt-get install -y --no-install-recommends firefox-esr && \
|
apt-get install -y --no-install-recommends firefox-esr && \
|
||||||
ln -s /usr/bin/firefox-esr /usr/bin/firefox && \
|
ln -s /usr/bin/firefox-esr /usr/bin/firefox
|
||||||
wget https://github.com/mozilla/geckodriver/releases/download/v0.35.0/geckodriver-v0.35.0-linux64.tar.gz && \
|
|
||||||
|
ARG GECKODRIVER_VERSION=0.36.0
|
||||||
|
|
||||||
|
RUN if [ $(uname -m) = "aarch64" ]; then \
|
||||||
|
GECKODRIVER_ARCH=linux-aarch64; \
|
||||||
|
else \
|
||||||
|
GECKODRIVER_ARCH=linux64; \
|
||||||
|
fi && \
|
||||||
|
wget https://github.com/mozilla/geckodriver/releases/download/v${GECKODRIVER_VERSION}/geckodriver-v${GECKODRIVER_VERSION}-${GECKODRIVER_ARCH}.tar.gz && \
|
||||||
tar -xvzf geckodriver* -C /usr/local/bin && \
|
tar -xvzf geckodriver* -C /usr/local/bin && \
|
||||||
chmod +x /usr/local/bin/geckodriver && \
|
chmod +x /usr/local/bin/geckodriver && \
|
||||||
rm geckodriver-v* && \
|
rm geckodriver-v* && \
|
||||||
|
|||||||
@@ -105,8 +105,8 @@ class BaseModule(ABC):
|
|||||||
for key in self.authentication.keys():
|
for key in self.authentication.keys():
|
||||||
if key in site or site in key:
|
if key in site or site in key:
|
||||||
logger.debug(f"Could not find exact authentication information for site '{site}'. \
|
logger.debug(f"Could not find exact authentication information for site '{site}'. \
|
||||||
did find information for '{key}' which is close, is this what you meant? \
|
did find information for '{key}' which is close, is this what you meant? \
|
||||||
If so, edit your authentication settings to make sure it exactly matches.")
|
If so, edit your authentication settings to make sure it exactly matches.")
|
||||||
|
|
||||||
def get_ytdlp_cookiejar(args):
|
def get_ytdlp_cookiejar(args):
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
},
|
},
|
||||||
"configs": {
|
"configs": {
|
||||||
"width": {"default": 1280, "help": "width of the screenshots"},
|
"width": {"default": 1280, "help": "width of the screenshots"},
|
||||||
"height": {"default": 720, "help": "height of the screenshots"},
|
"height": {"default": 1024, "help": "height of the screenshots"},
|
||||||
"timeout": {"default": 60, "help": "timeout for taking the screenshot"},
|
"timeout": {"default": 60, "help": "timeout for taking the screenshot"},
|
||||||
"sleep_before_screenshot": {"default": 4, "help": "seconds to wait for the pages to load before taking screenshot"},
|
"sleep_before_screenshot": {"default": 4, "help": "seconds to wait for the pages to load before taking screenshot"},
|
||||||
"http_proxy": {"default": "", "help": "http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port"},
|
"http_proxy": {"default": "", "help": "http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port"},
|
||||||
|
|||||||
@@ -1,18 +1,23 @@
|
|||||||
""" This Webdriver class acts as a context manager for the selenium webdriver. """
|
""" This Webdriver class acts as a context manager for the selenium webdriver. """
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from selenium import webdriver
|
|
||||||
from selenium.common.exceptions import TimeoutException
|
|
||||||
from selenium.webdriver.common.proxy import Proxy, ProxyType
|
|
||||||
from selenium.webdriver.common.print_page_options import PrintOptions
|
|
||||||
|
|
||||||
from loguru import logger
|
import os
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
#import domain_for_url
|
#import domain_for_url
|
||||||
from urllib.parse import urlparse, urlunparse
|
from urllib.parse import urlparse, urlunparse
|
||||||
from http.cookiejar import MozillaCookieJar
|
from http.cookiejar import MozillaCookieJar
|
||||||
|
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.common import exceptions as selenium_exceptions
|
||||||
|
from selenium.webdriver.common.print_page_options import PrintOptions
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
class CookieSettingDriver(webdriver.Firefox):
|
class CookieSettingDriver(webdriver.Firefox):
|
||||||
|
|
||||||
facebook_accept_cookies: bool
|
facebook_accept_cookies: bool
|
||||||
@@ -20,6 +25,10 @@ class CookieSettingDriver(webdriver.Firefox):
|
|||||||
cookiejar: MozillaCookieJar
|
cookiejar: MozillaCookieJar
|
||||||
|
|
||||||
def __init__(self, cookies, cookiejar, facebook_accept_cookies, *args, **kwargs):
|
def __init__(self, cookies, cookiejar, facebook_accept_cookies, *args, **kwargs):
|
||||||
|
if os.environ.get('RUNNING_IN_DOCKER'):
|
||||||
|
# Selenium doesn't support linux-aarch64 driver, we need to set this manually
|
||||||
|
kwargs['service'] = webdriver.FirefoxService(executable_path='/usr/local/bin/geckodriver')
|
||||||
|
|
||||||
super(CookieSettingDriver, self).__init__(*args, **kwargs)
|
super(CookieSettingDriver, self).__init__(*args, **kwargs)
|
||||||
self.cookies = cookies
|
self.cookies = cookies
|
||||||
self.cookiejar = cookiejar
|
self.cookiejar = cookiejar
|
||||||
@@ -64,14 +73,29 @@ class CookieSettingDriver(webdriver.Firefox):
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f'Failed on fb accept cookies.', e)
|
logger.warning(f'Failed on fb accept cookies.', e)
|
||||||
|
|
||||||
|
|
||||||
# now get the actual URL
|
# now get the actual URL
|
||||||
super(CookieSettingDriver, self).get(url)
|
super(CookieSettingDriver, self).get(url)
|
||||||
if self.facebook_accept_cookies:
|
if self.facebook_accept_cookies:
|
||||||
# try and click the 'close' button on the 'login' window to close it
|
# try and click the 'close' button on the 'login' window to close it
|
||||||
close_button = self.find_element(By.XPATH, "//div[@role='dialog']//div[@aria-label='Close']")
|
try:
|
||||||
if close_button:
|
xpath = "//div[@role='dialog']//div[@aria-label='Close']"
|
||||||
close_button.click()
|
WebDriverWait(self, 5).until(EC.element_to_be_clickable((By.XPATH, xpath))).click()
|
||||||
|
except selenium_exceptions.NoSuchElementException:
|
||||||
|
logger.warning("Unable to find the 'close' button on the facebook login window")
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
# for all other sites, try and use some common button text to reject/accept cookies
|
||||||
|
for text in ["Refuse non-essential cookies", "Decline optional cookies", "Reject additional cookies", "Accept all cookies"]:
|
||||||
|
try:
|
||||||
|
xpath = f"//*[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '{text.lower()}')]"
|
||||||
|
WebDriverWait(self, 5).until(EC.element_to_be_clickable((By.XPATH, xpath))).click()
|
||||||
|
break
|
||||||
|
except selenium_exceptions.WebDriverException:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Webdriver:
|
class Webdriver:
|
||||||
@@ -90,7 +114,6 @@ class Webdriver:
|
|||||||
setattr(self.print_options, k, v)
|
setattr(self.print_options, k, v)
|
||||||
|
|
||||||
def __enter__(self) -> webdriver:
|
def __enter__(self) -> webdriver:
|
||||||
|
|
||||||
options = webdriver.FirefoxOptions()
|
options = webdriver.FirefoxOptions()
|
||||||
options.add_argument("--headless")
|
options.add_argument("--headless")
|
||||||
options.add_argument(f'--proxy-server={self.http_proxy}')
|
options.add_argument(f'--proxy-server={self.http_proxy}')
|
||||||
@@ -105,7 +128,7 @@ class Webdriver:
|
|||||||
self.driver.set_window_size(self.width, self.height)
|
self.driver.set_window_size(self.width, self.height)
|
||||||
self.driver.set_page_load_timeout(self.timeout_seconds)
|
self.driver.set_page_load_timeout(self.timeout_seconds)
|
||||||
self.driver.print_options = self.print_options
|
self.driver.print_options = self.print_options
|
||||||
except TimeoutException as e:
|
except selenium_exceptions.TimeoutException as e:
|
||||||
logger.error(f"failed to get new webdriver, possibly due to insufficient system resources or timeout settings: {e}")
|
logger.error(f"failed to get new webdriver, possibly due to insufficient system resources or timeout settings: {e}")
|
||||||
|
|
||||||
return self.driver
|
return self.driver
|
||||||
|
|||||||
Reference in New Issue
Block a user