mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 05:08:28 +03:00
Merge branch 'main' into feat/yt-dlp-pots
# Conflicts: # src/auto_archiver/modules/generic_extractor/__manifest__.py
This commit is contained in:
@@ -28,6 +28,13 @@ the broader archiving framework.
|
||||
metadata objects. Some dropins are included in this generic_archiver by default, but
|
||||
custom dropins can be created to handle additional websites and passed to the archiver
|
||||
via the command line using the `--dropins` option (TODO!).
|
||||
|
||||
### Auto-Updates
|
||||
|
||||
The Generic Extractor will also automatically check for updates to `yt-dlp` (every 5 days by default).
|
||||
This can be configured using the `ytdlp_update_interval` setting (or disabled by setting it to -1).
|
||||
If you are having issues with the extractor, you can review the version of `yt-dlp` being used with `yt-dlp --version`.
|
||||
|
||||
""",
|
||||
"configs": {
|
||||
"subtitles": {"default": True, "help": "download subtitles if available", "type": "bool"},
|
||||
@@ -69,5 +76,10 @@ via the command line using the `--dropins` option (TODO!).
|
||||
"help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.",
|
||||
"type": "json_loader",
|
||||
},
|
||||
"ytdlp_update_interval": {
|
||||
"default": 5,
|
||||
"help": "How often to check for yt-dlp updates (days). If positive, will check and update yt-dlp every [num] days. Set it to -1 to disable, or 0 to always update on every run.",
|
||||
"type": "int",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import datetime, os, yt_dlp, pysubs2
|
||||
import datetime, os
|
||||
import importlib
|
||||
import subprocess
|
||||
from typing import Generator, Type
|
||||
|
||||
import yt_dlp
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
import pysubs2
|
||||
|
||||
from loguru import logger
|
||||
|
||||
@@ -11,6 +15,44 @@ from auto_archiver.core import Metadata, Media
|
||||
class GenericExtractor(Extractor):
|
||||
_dropins = {}
|
||||
|
||||
def setup(self):
|
||||
# check for file .ytdlp-update in the secrets folder
|
||||
if self.ytdlp_update_interval < 0:
|
||||
return
|
||||
|
||||
use_secrets = os.path.exists('secrets')
|
||||
path = os.path.join('secrets' if use_secrets else '', '.ytdlp-update')
|
||||
next_update_check = None
|
||||
if os.path.exists(path):
|
||||
with open(path, "r") as f:
|
||||
next_update_check = datetime.datetime.fromisoformat(f.read())
|
||||
|
||||
if not next_update_check or next_update_check < datetime.datetime.now():
|
||||
self.update_ytdlp()
|
||||
|
||||
next_update_check = datetime.datetime.now() + datetime.timedelta(days=self.ytdlp_update_interval)
|
||||
with open(path, "w") as f:
|
||||
f.write(next_update_check.isoformat())
|
||||
|
||||
def update_ytdlp(self):
|
||||
logger.info("Checking and updating yt-dlp...")
|
||||
logger.info(f"Tip: change the 'ytdlp_update_interval' setting to control how often yt-dlp is updated. Set to -1 to disable or 0 to enable on every run. Current setting: {self.ytdlp_update_interval}")
|
||||
from importlib.metadata import version as get_version
|
||||
old_version = get_version("yt-dlp")
|
||||
try:
|
||||
# try and update with pip (this works inside poetry environment and in a normal virtualenv)
|
||||
result = subprocess.run(["pip", "install", "--upgrade", "yt-dlp"], check=True, capture_output=True)
|
||||
|
||||
if "Successfully installed yt-dlp" in result.stdout.decode():
|
||||
new_version = importlib.metadata.version("yt-dlp")
|
||||
logger.info(f"yt-dlp successfully (from {old_version} to {new_version})")
|
||||
importlib.reload(yt_dlp)
|
||||
else:
|
||||
logger.info("yt-dlp already up to date")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating yt-dlp: {e}")
|
||||
|
||||
def suitable_extractors(self, url: str) -> Generator[str, None, None]:
|
||||
"""
|
||||
Returns a list of valid extractors for the given URL"""
|
||||
|
||||
Reference in New Issue
Block a user