mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Set up screenshot enricher to use authentication/cookies
This commit is contained in:
@@ -274,7 +274,7 @@ class GenericExtractor(Extractor):
|
||||
"max_downloads": self.max_downloads, "playlistend": self.max_downloads}
|
||||
|
||||
# set up auth
|
||||
auth = self.auth_for_site(url)
|
||||
auth = self.auth_for_site(url, extract_cookies=False)
|
||||
# order of importance: username/pasword -> api_key -> cookie -> cookie_from_browser -> cookies_file
|
||||
if auth:
|
||||
if 'username' in auth and 'password' in auth:
|
||||
|
||||
@@ -5,7 +5,7 @@ from loguru import logger
|
||||
from slugify import slugify
|
||||
|
||||
from auto_archiver.core.metadata import Metadata, Media
|
||||
from auto_archiver.utils import UrlUtil
|
||||
from auto_archiver.utils import url as UrlUtil
|
||||
from auto_archiver.core.extractor import Extractor
|
||||
|
||||
from .dropin import GenericDropin, InfoExtractor
|
||||
|
||||
@@ -6,7 +6,7 @@ from selenium.common.exceptions import TimeoutException
|
||||
|
||||
|
||||
from auto_archiver.core import Enricher
|
||||
from auto_archiver.utils import Webdriver, UrlUtil, random_str
|
||||
from auto_archiver.utils import Webdriver, url as UrlUtil, random_str
|
||||
from auto_archiver.core import Media, Metadata
|
||||
|
||||
class ScreenshotEnricher(Enricher):
|
||||
@@ -19,7 +19,9 @@ class ScreenshotEnricher(Enricher):
|
||||
return
|
||||
|
||||
logger.debug(f"Enriching screenshot for {url=}")
|
||||
with Webdriver(self.width, self.height, self.timeout, 'facebook.com' in url, http_proxy=self.http_proxy, print_options=self.print_options) as driver:
|
||||
auth = self.auth_for_site(url)
|
||||
with Webdriver(self.width, self.height, self.timeout, facebook_accept_cookies='facebook.com' in url,
|
||||
http_proxy=self.http_proxy, print_options=self.print_options, auth=auth) as driver:
|
||||
try:
|
||||
driver.get(url)
|
||||
time.sleep(int(self.sleep_before_screenshot))
|
||||
|
||||
@@ -7,7 +7,7 @@ from warcio.archiveiterator import ArchiveIterator
|
||||
|
||||
from auto_archiver.core import Media, Metadata
|
||||
from auto_archiver.core import Extractor, Enricher
|
||||
from auto_archiver.utils import UrlUtil, random_str
|
||||
from auto_archiver.utils import url as UrlUtil, random_str
|
||||
|
||||
|
||||
class WaczExtractorEnricher(Enricher, Extractor):
|
||||
|
||||
@@ -3,7 +3,7 @@ from loguru import logger
|
||||
import time, requests
|
||||
|
||||
from auto_archiver.core import Extractor, Enricher
|
||||
from auto_archiver.utils import UrlUtil
|
||||
from auto_archiver.utils import url as UrlUtil
|
||||
from auto_archiver.core import Metadata
|
||||
|
||||
class WaybackExtractorEnricher(Enricher, Extractor):
|
||||
|
||||
Reference in New Issue
Block a user