Set up screenshot enricher to use authentication/cookies

This commit is contained in:
Patrick Robertson
2025-02-03 17:25:59 +01:00
parent 7ec328ab40
commit c574b694ed
11 changed files with 153 additions and 96 deletions

View File

@@ -274,7 +274,7 @@ class GenericExtractor(Extractor):
"max_downloads": self.max_downloads, "playlistend": self.max_downloads}
# set up auth
auth = self.auth_for_site(url)
auth = self.auth_for_site(url, extract_cookies=False)
# order of importance: username/pasword -> api_key -> cookie -> cookie_from_browser -> cookies_file
if auth:
if 'username' in auth and 'password' in auth:

View File

@@ -5,7 +5,7 @@ from loguru import logger
from slugify import slugify
from auto_archiver.core.metadata import Metadata, Media
from auto_archiver.utils import UrlUtil
from auto_archiver.utils import url as UrlUtil
from auto_archiver.core.extractor import Extractor
from .dropin import GenericDropin, InfoExtractor

View File

@@ -6,7 +6,7 @@ from selenium.common.exceptions import TimeoutException
from auto_archiver.core import Enricher
from auto_archiver.utils import Webdriver, UrlUtil, random_str
from auto_archiver.utils import Webdriver, url as UrlUtil, random_str
from auto_archiver.core import Media, Metadata
class ScreenshotEnricher(Enricher):
@@ -19,7 +19,9 @@ class ScreenshotEnricher(Enricher):
return
logger.debug(f"Enriching screenshot for {url=}")
with Webdriver(self.width, self.height, self.timeout, 'facebook.com' in url, http_proxy=self.http_proxy, print_options=self.print_options) as driver:
auth = self.auth_for_site(url)
with Webdriver(self.width, self.height, self.timeout, facebook_accept_cookies='facebook.com' in url,
http_proxy=self.http_proxy, print_options=self.print_options, auth=auth) as driver:
try:
driver.get(url)
time.sleep(int(self.sleep_before_screenshot))

View File

@@ -7,7 +7,7 @@ from warcio.archiveiterator import ArchiveIterator
from auto_archiver.core import Media, Metadata
from auto_archiver.core import Extractor, Enricher
from auto_archiver.utils import UrlUtil, random_str
from auto_archiver.utils import url as UrlUtil, random_str
class WaczExtractorEnricher(Enricher, Extractor):

View File

@@ -3,7 +3,7 @@ from loguru import logger
import time, requests
from auto_archiver.core import Extractor, Enricher
from auto_archiver.utils import UrlUtil
from auto_archiver.utils import url as UrlUtil
from auto_archiver.core import Metadata
class WaybackExtractorEnricher(Enricher, Extractor):