From 7a81ab617a665a7768e6d9984a16b9ee8b77baa2 Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Tue, 11 Mar 2025 11:57:25 +0000
Subject: [PATCH 1/7] Better checking of cookies to add to webdriver

---
 src/auto_archiver/utils/webdriver.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/auto_archiver/utils/webdriver.py b/src/auto_archiver/utils/webdriver.py
index cb4e2a9..af3b7dd 100644
--- a/src/auto_archiver/utils/webdriver.py
+++ b/src/auto_archiver/utils/webdriver.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 
 import os
 import time
+import re
 
 #import domain_for_url
 from urllib.parse import urlparse, urlunparse
@@ -48,8 +49,9 @@ class CookieSettingDriver(webdriver.Firefox):
                         self.driver.add_cookie({'name': name, 'value': value})
             elif self.cookiejar:
                 domain = urlparse(url).netloc.lstrip("www.")
+                regex = re.compile(f"(www)?\.?{domain}$")
                 for cookie in self.cookiejar:
-                    if domain in cookie.domain:
+                    if regex.match(cookie.domain):
                         try:
                             self.add_cookie({
                                 'name': cookie.name,
@@ -60,7 +62,7 @@ class CookieSettingDriver(webdriver.Firefox):
                                 'expiry': cookie.expires
                             })
                         except Exception as e:
-                            logger.warning(f"Failed to add cookie to webdriver: {e}")
+                            logger.warning(f"Failed to add cookie ({cookie.domain}) to webdriver for url {domain}: {e}")
         
         if self.facebook_accept_cookies:
             try:
@@ -81,7 +83,7 @@ class CookieSettingDriver(webdriver.Firefox):
             # try and click the 'close' button on the 'login' window to close it
             try:
                 xpath = "//div[@role='dialog']//div[@aria-label='Close']"
-                WebDriverWait(self, 5).until(EC.element_to_be_clickable((By.XPATH, xpath))).click()
+                WebDriverWait(self, 2).until(EC.element_to_be_clickable((By.XPATH, xpath))).click()
             except selenium_exceptions.NoSuchElementException:
                 logger.warning("Unable to find the 'close' button on the facebook login window")
                 pass

From 0efeaaabb181f34e0a692443c4651b2f171a2eb5 Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Tue, 11 Mar 2025 12:24:52 +0000
Subject: [PATCH 2/7] Revert to using time.sleep and .click() - since we only
 want to be waiting the first time (for the page to load)

---
 src/auto_archiver/utils/webdriver.py | 38 ++++++++++------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/src/auto_archiver/utils/webdriver.py b/src/auto_archiver/utils/webdriver.py
index af3b7dd..ccfead5 100644
--- a/src/auto_archiver/utils/webdriver.py
+++ b/src/auto_archiver/utils/webdriver.py
@@ -64,41 +64,31 @@ class CookieSettingDriver(webdriver.Firefox):
                         except Exception as e:
                             logger.warning(f"Failed to add cookie ({cookie.domain}) to webdriver for url {domain}: {e}")
         
-        if self.facebook_accept_cookies:
-            try:
-                logger.debug(f'Trying fb click accept cookie popup.')
-                super(CookieSettingDriver, self).get("http://www.facebook.com")
-                essential_only = self.find_element(By.XPATH, "//span[contains(text(), 'Decline optional cookies')]")
-                essential_only.click()
-                logger.debug(f'fb click worked')
-                # linux server needs a sleep otherwise facebook cookie won't have worked and we'll get a popup on next page
-                time.sleep(2)
-            except Exception as e:
-                logger.warning(f'Failed on fb accept cookies.', e)
+
         
+        super(CookieSettingDriver, self).get(url)
+        time.sleep(2)
+
+        # Try and use some common button text to reject/accept cookies
+        for text in ["Refuse non-essential cookies", "Decline optional cookies", "Reject additional cookies", "Reject all", "Accept all cookies"]:
+            try:
+                xpath = f"//*[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '{text.lower()}')]"
+                self.find_element(By.XPATH, xpath).click()
+                time.sleep(2)
+            except selenium_exceptions.NoSuchElementException:
+                pass
 
         # now get the actual URL
-        super(CookieSettingDriver, self).get(url)
         if self.facebook_accept_cookies:
             # try and click the 'close' button on the 'login' window to close it
             try:
                 xpath = "//div[@role='dialog']//div[@aria-label='Close']"
-                WebDriverWait(self, 2).until(EC.element_to_be_clickable((By.XPATH, xpath))).click()
+                self.find_element(By.XPATH, xpath).click()
+                time.sleep(2)
             except selenium_exceptions.NoSuchElementException:
                 logger.warning("Unable to find the 'close' button on the facebook login window")
                 pass
 
-        else:
-
-            # for all other sites, try and use some common button text to reject/accept cookies
-            for text in ["Refuse non-essential cookies", "Decline optional cookies", "Reject additional cookies", "Reject all", "Accept all cookies"]:
-                try:
-                    xpath = f"//*[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '{text.lower()}')]"
-                    WebDriverWait(self, 5).until(EC.element_to_be_clickable((By.XPATH, xpath))).click()
-                    break
-                except selenium_exceptions.WebDriverException:
-                    pass
-
     
 class Webdriver:
     def __init__(self, width: int, height: int, timeout_seconds: int,

From 589c834047b21fe804b81997208e01f666142ee7 Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Tue, 11 Mar 2025 12:25:19 +0000
Subject: [PATCH 3/7] Fix parsing ytdlp args - we should first run them through
 the parse_options method

---
 .../modules/generic_extractor/__manifest__.py |  7 ++++
 .../generic_extractor/generic_extractor.py    | 38 ++++++++++++-------
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py
index 1d3b365..274a4ba 100644
--- a/src/auto_archiver/modules/generic_extractor/__manifest__.py
+++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py
@@ -76,5 +76,12 @@ If you are having issues with the extractor, you can review the version of `yt-d
             "help": "How often to check for yt-dlp updates (days). If positive, will check and update yt-dlp every [num] days. Set it to -1 to disable, or 0 to always update on every run.",
             "type": "int",
         },
+        "ytdlp_args": {
+            "default": "",
+            "help": "Additional arguments to pass to yt-dlp, e.g. --no-check-certificate or --plugin-dirs.\
+See yt-dlp documentation here for more information: https://github.com/yt-dlp/yt-dlp?tab=readme-ov-file#general-options\
+Note: this is not to be confused with 'extractor_args' which are specific to the extractor itself.",
+            "type": "str",
+        },
     },
 }
diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
index 5acce46..56164ff 100644
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -1,6 +1,7 @@
 import datetime, os
 import importlib
 import subprocess
+
 from typing import Generator, Type
 
 import yt_dlp
@@ -166,7 +167,7 @@ class GenericExtractor(Extractor):
 
         if not dropin:
             # TODO: add a proper link to 'how to create your own dropin'
-            logger.debug(f"""Could not find valid dropin for {info_extractor.IE_NAME}.
+            logger.debug(f"""Could not find valid dropin for {info_extractor.ie_key()}.
                      Why not try creating your own, and make sure it has a valid function called 'create_metadata'. Learn more: https://auto-archiver.readthedocs.io/en/latest/user_guidelines.html#""")
             return False
         
@@ -279,18 +280,18 @@ class GenericExtractor(Extractor):
             result = self.get_metadata_for_video(data, info_extractor, url, ydl)
 
         except Exception as e:
-            if info_extractor.ie_key() == "generic":
+            if info_extractor.IE_NAME == "generic":
                 # don't clutter the logs with issues about the 'generic' extractor not having a dropin
                 return False
 
-            logger.debug(f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead')
+            logger.debug(f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use dropin to get post data instead')
             try:
                 result = self.get_metadata_for_post(info_extractor, url, ydl)
             except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e:
                 logger.error(f'Error downloading metadata for post: {post_e}')
                 return False
             except Exception as generic_e:
-                logger.debug(f'Attempt to extract using ytdlp extractor "{info_extractor.IE_NAME}" failed:  \n  {repr(generic_e)}', exc_info=True)
+                logger.debug(f'Attempt to extract using ytdlp dropin "{info_extractor.IE_NAME}" failed:  \n  {repr(generic_e)}', exc_info=True)
                 return False
         
         if result:
@@ -314,11 +315,16 @@ class GenericExtractor(Extractor):
             item.set("replaced_url", url)
 
 
-        ydl_options = {'outtmpl': os.path.join(self.tmp_dir, f'%(id)s.%(ext)s'), 
-                       'quiet': False, 'noplaylist': not self.allow_playlist ,
-                       'writesubtitles': self.subtitles,'writeautomaticsub': self.subtitles,
-                       "live_from_start": self.live_from_start, "proxy": self.proxy,
-                       "max_downloads": self.max_downloads, "playlistend": self.max_downloads}
+        ydl_options = ["-o", os.path.join(self.tmp_dir, f'%(id)s.%(ext)s'),
+                       "--quiet",
+                       "--no-playlist" if not self.allow_playlist else "--yes-playlist",
+                       "--write-subs" if self.subtitles else "--no-write-subs",
+                       "--write-auto-subs" if self.subtitles else "--no-write-auto-subs",
+                       "--live-from-start" if self.live_from_start else "--no-live-from-start",
+                       "--proxy", self.proxy if self.proxy else '',
+                       f"--max-downloads {self.max_downloads}" if self.max_downloads != "inf" else '',
+                       f"--playlist-end {self.max_downloads}" if self.max_downloads != "inf" else ''
+                       ]
         
         # set up auth
         auth = self.auth_for_site(url, extract_cookies=False)
@@ -327,19 +333,23 @@ class GenericExtractor(Extractor):
         if auth:
             if 'username' in auth and 'password' in auth:
                 logger.debug(f'Using provided auth username and password for {url}')
-                ydl_options['username'] = auth['username']
-                ydl_options['password'] = auth['password']
+                ydl_options.extend(('--username', auth['username']))
+                ydl_options.extend(('--password', auth['password']))
             elif 'cookie' in auth:
                 logger.debug(f'Using provided auth cookie for {url}')
                 yt_dlp.utils.std_headers['cookie'] = auth['cookie']
             elif 'cookies_from_browser' in auth:
                 logger.debug(f'Using extracted cookies from browser {auth["cookies_from_browser"]} for {url}')
-                ydl_options['cookiesfrombrowser'] = auth['cookies_from_browser']
+                ydl_options.extend(('--cookies-from-browser', auth['cookies_from_browser']))
             elif 'cookies_file' in auth:
                 logger.debug(f'Using cookies from file {auth["cookies_file"]} for {url}')
-                ydl_options['cookiefile'] = auth['cookies_file']
+                ydl_options.extend(('--cookies', auth['cookies_file']))
 
-        ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"
+        if self.ytdlp_args:
+            ydl_options += self.ytdlp_args.split(" ")
+
+        _, _, _, validated_options = yt_dlp.parse_options(ydl_options)
+        ydl = yt_dlp.YoutubeDL(validated_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"
 
         for info_extractor in self.suitable_extractors(url):
             result = self.download_for_extractor(info_extractor, url, ydl)

From f6b13327f0329b771709a0550895810b91f6cf39 Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Thu, 13 Mar 2025 16:03:52 +0000
Subject: [PATCH 4/7] Tweaks and additional debug logging

---
 .../modules/generic_extractor/generic_extractor.py             | 3 ++-
 src/auto_archiver/utils/webdriver.py                           | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
index 56164ff..a75e874 100644
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -346,9 +346,10 @@ class GenericExtractor(Extractor):
                 ydl_options.extend(('--cookies', auth['cookies_file']))
 
         if self.ytdlp_args:
+            logger.debug("Adding additional ytdlp arguments: {self.ytdlp_args}")
             ydl_options += self.ytdlp_args.split(" ")
 
-        _, _, _, validated_options = yt_dlp.parse_options(ydl_options)
+        *_, validated_options = yt_dlp.parse_options(ydl_options)
         ydl = yt_dlp.YoutubeDL(validated_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"
 
         for info_extractor in self.suitable_extractors(url):
diff --git a/src/auto_archiver/utils/webdriver.py b/src/auto_archiver/utils/webdriver.py
index ccfead5..57f2cf1 100644
--- a/src/auto_archiver/utils/webdriver.py
+++ b/src/auto_archiver/utils/webdriver.py
@@ -48,7 +48,7 @@ class CookieSettingDriver(webdriver.Firefox):
                     for name, value in cookie.split("="):
                         self.driver.add_cookie({'name': name, 'value': value})
             elif self.cookiejar:
-                domain = urlparse(url).netloc.lstrip("www.")
+                domain = urlparse(url).netloc
                 regex = re.compile(f"(www)?\.?{domain}$")
                 for cookie in self.cookiejar:
                     if regex.match(cookie.domain):

From abaeec0cc6342ee7e843b05b6cc2d029d2103465 Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Fri, 14 Mar 2025 12:48:06 +0000
Subject: [PATCH 5/7] Add ruff check

---
 .pre-commit-config.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 78421d7..833a540 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,7 +3,10 @@ repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.9.10
     hooks:
-      - id: ruff-format
+        - id: ruff
+          args: [ --fix ]
+        - id: ruff-format
+
 
       # Runs Ruff linting - just checks without fixing, but blocks commit if errors are found.
 #      - id: ruff

From a8e5585e6c40c5dad8fad32e591fb90d7c52217e Mon Sep 17 00:00:00 2001
From: Patrick Robertson <robertson.patrick@gmail.com>
Date: Fri, 14 Mar 2025 12:51:47 +0000
Subject: [PATCH 6/7] github format

---
 .pre-commit-config.yaml                                         | 2 +-
 .../modules/generic_extractor/generic_extractor.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 833a540..0ec35a5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
     rev: v0.9.10
     hooks:
         - id: ruff
-          args: [ --fix ]
+          args: [ --fix, --output-format=github]
         - id: ruff-format
 
 
diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
index 72b526d..534fb71 100644
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -398,7 +398,7 @@ class GenericExtractor(Extractor):
 
         ydl_options = [
             "-o",
-            os.path.join(self.tmp_dir, f"%(id)s.%(ext)s"),
+            os.path.join(self.tmp_dir, "%(id)s.%(ext)s"),
             "--quiet",
             "--no-playlist" if not self.allow_playlist else "--yes-playlist",
             "--write-subs" if self.subtitles else "--no-write-subs",

From 562d06916ecd5bf8d1d17c17497e902f4e475a39 Mon Sep 17 00:00:00 2001
From: erinhmclark <erinhannahmary.clark@gmail.com>
Date: Fri, 14 Mar 2025 13:08:57 +0000
Subject: [PATCH 7/7] Revert pre commit

---
 .pre-commit-config.yaml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0ec35a5..78421d7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,10 +3,7 @@ repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.9.10
     hooks:
-        - id: ruff
-          args: [ --fix, --output-format=github]
-        - id: ruff-format
-
+      - id: ruff-format
 
       # Runs Ruff linting - just checks without fixing, but blocks commit if errors are found.
 #      - id: ruff