mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 20:58:29 +03:00
WIP: Docs tidyups+add howto on logging and authentication
(Authentication is WIP)
This commit is contained in:
@@ -63,12 +63,18 @@ class BaseModule(ABC):
|
||||
def config_setup(self, config: dict):
|
||||
|
||||
authentication = config.get('authentication', {})
|
||||
|
||||
# this is important. Each instance is given its own deepcopied config, so modules cannot
|
||||
# change values to affect other modules
|
||||
config = deepcopy(config)
|
||||
authentication = deepcopy(config.pop('authentication', {}))
|
||||
|
||||
# extract out concatenated sites
|
||||
for key, val in copy(authentication).items():
|
||||
if "," in key:
|
||||
for site in key.split(","):
|
||||
authentication[site] = val
|
||||
del authentication[key]
|
||||
|
||||
self.authentication = authentication
|
||||
self.config = config
|
||||
for key, val in config.get(self.name, {}).items():
|
||||
@@ -102,7 +108,7 @@ class BaseModule(ABC):
|
||||
# TODO: think about if/how we can deal with sites that have multiple domains (main one is x.com/twitter.com)
|
||||
# for now the user must enter them both, like "x.com,twitter.com" in their config. Maybe we just hard-code?
|
||||
|
||||
site = UrlUtil.domain_for_url(site)
|
||||
site = UrlUtil.domain_for_url(site).lstrip("www.")
|
||||
# add the 'www' version of the site to the list of sites to check
|
||||
authdict = {}
|
||||
|
||||
@@ -128,17 +134,30 @@ class BaseModule(ABC):
|
||||
# collections.namedtuple('ParsedOptions', ('parser', 'options', 'urls', 'ydl_opts'))
|
||||
ytdlp_opts = getattr(parse_options(args), 'ydl_opts')
|
||||
return yt_dlp.YoutubeDL(ytdlp_opts).cookiejar
|
||||
|
||||
get_cookiejar_options = None
|
||||
|
||||
# get the cookies jar, prefer the browser cookies than the file
|
||||
if 'cookies_from_browser' in self.authentication:
|
||||
# order of priority:
|
||||
# 1. cookies_from_browser setting in site config
|
||||
# 2. cookies_file setting in site config
|
||||
# 3. cookies_from_browser setting in global config
|
||||
# 4. cookies_file setting in global config
|
||||
|
||||
if 'cookies_from_browser' in authdict:
|
||||
get_cookiejar_options = ['--cookies-from-browser', authdict['cookies_from_browser']]
|
||||
elif 'cookies_file' in authdict:
|
||||
get_cookiejar_options = ['--cookies', authdict['cookies_file']]
|
||||
elif 'cookies_from_browser' in self.authentication:
|
||||
authdict['cookies_from_browser'] = self.authentication['cookies_from_browser']
|
||||
if extract_cookies:
|
||||
authdict['cookies_jar'] = get_ytdlp_cookiejar(['--cookies-from-browser', self.authentication['cookies_from_browser']])
|
||||
get_cookiejar_options = ['--cookies-from-browser', self.authentication['cookies_from_browser']]
|
||||
elif 'cookies_file' in self.authentication:
|
||||
authdict['cookies_file'] = self.authentication['cookies_file']
|
||||
if extract_cookies:
|
||||
authdict['cookies_jar'] = get_ytdlp_cookiejar(['--cookies', self.authentication['cookies_file']])
|
||||
get_cookiejar_options = ['--cookies', self.authentication['cookies_file']]
|
||||
|
||||
|
||||
if get_cookiejar_options:
|
||||
authdict['cookies_jar'] = get_ytdlp_cookiejar(get_cookiejar_options)
|
||||
|
||||
return authdict
|
||||
|
||||
def repr(self):
|
||||
|
||||
@@ -10,7 +10,7 @@ from ruamel.yaml import YAML, CommentedMap, add_representer
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from copy import deepcopy
|
||||
from copy import deepcopy, copy
|
||||
from .module import BaseModule
|
||||
|
||||
from typing import Any, List, Type, Tuple
|
||||
@@ -154,7 +154,7 @@ def read_yaml(yaml_filename: str) -> CommentedMap:
|
||||
|
||||
if not config:
|
||||
config = EMPTY_CONFIG
|
||||
|
||||
|
||||
return config
|
||||
|
||||
# TODO: make this tidier/find a way to notify of which keys should not be stored
|
||||
|
||||
@@ -8,7 +8,6 @@ from __future__ import annotations
|
||||
from typing import Generator, Union, List, Type
|
||||
from urllib.parse import urlparse
|
||||
from ipaddress import ip_address
|
||||
from copy import copy
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
@@ -75,13 +74,6 @@ class AuthenticationJsonParseAction(JsonParseAction):
|
||||
continue
|
||||
if not isinstance(key, str) or not isinstance(auth, dict):
|
||||
raise argparse.ArgumentTypeError(f"Authentication must be a dictionary of site names and their authentication methods. Valid global configs are {global_options}")
|
||||
|
||||
# extract out concatenated sites
|
||||
for key, val in copy(auth_dict).items():
|
||||
if "," in key:
|
||||
for site in key.split(","):
|
||||
auth_dict[site] = val
|
||||
del auth_dict[key]
|
||||
|
||||
setattr(namespace, self.dest, auth_dict)
|
||||
|
||||
|
||||
@@ -280,6 +280,7 @@ class GenericExtractor(Extractor):
|
||||
|
||||
# set up auth
|
||||
auth = self.auth_for_site(url, extract_cookies=False)
|
||||
|
||||
# order of importance: username/pasword -> api_key -> cookie -> cookie_from_browser -> cookies_file
|
||||
if auth:
|
||||
if 'username' in auth and 'password' in auth:
|
||||
@@ -290,11 +291,11 @@ class GenericExtractor(Extractor):
|
||||
logger.debug(f'Using provided auth cookie for {url}')
|
||||
yt_dlp.utils.std_headers['cookie'] = auth['cookie']
|
||||
elif 'cookie_from_browser' in auth:
|
||||
logger.debug(f'Using extracted cookies from browser {self.cookies_from_browser} for {url}')
|
||||
logger.debug(f'Using extracted cookies from browser {auth["cookies_from_browser"]} for {url}')
|
||||
ydl_options['cookiesfrombrowser'] = auth['cookies_from_browser']
|
||||
elif 'cookies_file' in auth:
|
||||
logger.debug(f'Using cookies from file {self.cookie_file} for {url}')
|
||||
ydl_options['cookiesfile'] = auth['cookies_file']
|
||||
logger.debug(f'Using cookies from file {auth["cookies_file"]} for {url}')
|
||||
ydl_options['cookiefile'] = auth['cookies_file']
|
||||
|
||||
ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user