From 158d448cbcf53b017643d61ee858aa1a7a8b7106 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 24 Feb 2025 11:40:39 +0000 Subject: [PATCH 01/37] Add yt-dlp-get-pot and yt-dlp-getpot-wpc requirements. --- poetry.lock | 243 ++++++++++++++++++++++++++++++++++++++++++------- pyproject.toml | 2 + 2 files changed, 210 insertions(+), 35 deletions(-) diff --git a/poetry.lock b/poetry.lock index 83b2860..75cbacf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -172,18 +172,18 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.36.22" +version = "1.36.26" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.36.22-py3-none-any.whl", hash = "sha256:39957eabdce009353d72d131046489fbbfa15891865d5f069f1e8bfa414e6b81"}, - {file = "boto3-1.36.22.tar.gz", hash = "sha256:768c8a4d4a6227fe2258105efa086f1424cba5ca915a5eb2305b2cd979306ad1"}, + {file = "boto3-1.36.26-py3-none-any.whl", hash = "sha256:f67d014a7c5a3cd540606d64d7cb9eec3600cf42acab1ac0518df9751ae115e2"}, + {file = "boto3-1.36.26.tar.gz", hash = "sha256:523b69457eee55ac15aa707c0e768b2a45ca1521f95b2442931090633ec72458"}, ] [package.dependencies] -botocore = ">=1.36.22,<1.37.0" +botocore = ">=1.36.26,<1.37.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -192,14 +192,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.36.22" +version = "1.36.26" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.36.22-py3-none-any.whl", hash = "sha256:75d6b34acb0686ee4d54ff6eb285e78ccfe318407428769d1e3e13351714d890"}, - {file = "botocore-1.36.22.tar.gz", hash = "sha256:59520247d5a479731724f97c995d5a1c2aae3b303b324f39d99efcfad1d3019e"}, + {file = "botocore-1.36.26-py3-none-any.whl", hash = "sha256:4e3f19913887a58502e71ef8d696fe7eaa54de7813ff73390cd5883f837dfa6e"}, + {file = "botocore-1.36.26.tar.gz", hash = "sha256:4a63bcef7ecf6146fd3a61dc4f9b33b7473b49bdaf1770e9aaca6eee0c9eab62"}, ] [package.dependencies] @@ -363,14 +363,14 @@ beautifulsoup4 = "*" [[package]] name = "cachetools" -version = "5.5.1" +version = "5.5.2" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "cachetools-5.5.1-py3-none-any.whl", hash = "sha256:b76651fdc3b24ead3c648bbdeeb940c1b04d365b38b4af66788f9ec4a81d42bb"}, - {file = "cachetools-5.5.1.tar.gz", hash = "sha256:70f238fbba50383ef62e55c6aff6d9673175fe59f7c6782c7a0b9e38f4a9df95"}, + {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, + {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, ] [[package]] @@ -696,6 +696,24 @@ calendars = ["convertdate (>=2.2.1)", "hijridate"] fasttext = ["fasttext (>=0.9.1)", "numpy (>=1.19.3,<2)"] langdetect = ["langdetect (>=1.0.0)"] +[[package]] +name = "deprecated" +version = "1.2.18" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +groups = ["main"] +files = [ + {file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"}, + {file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"] + [[package]] name = "docutils" version = "0.21.2" @@ -860,14 +878,14 @@ tool = ["click (>=6.0.0)"] [[package]] name = "googleapis-common-protos" -version = "1.67.0" +version = "1.68.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741"}, - {file = "googleapis_common_protos-1.67.0.tar.gz", hash = "sha256:21398025365f138be356d5923e9168737d94d46a72aefee4a6110a1f23463c86"}, + {file = "googleapis_common_protos-1.68.0-py2.py3-none-any.whl", hash = "sha256:aaf179b2f81df26dfadac95def3b16a95064c76a5f45f07e4c68a21bb371c4ac"}, + {file = "googleapis_common_protos-1.68.0.tar.gz", hash = "sha256:95d38161f4f9af0d9423eed8fb7b64ffd2568c3464eb542ff02c5bfa1953ab3c"}, ] [package.dependencies] @@ -1209,6 +1227,23 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "mss" +version = "10.0.0" +description = "An ultra fast cross-platform multiple screenshots module in pure python using ctypes." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "mss-10.0.0-py3-none-any.whl", hash = "sha256:82cf6460a53d09e79b7b6d871163c982e6c7e9649c426e7b7591b74956d5cb64"}, + {file = "mss-10.0.0.tar.gz", hash = "sha256:d903e0d51262bf0f8782841cf16eaa6d7e3e1f12eae35ab41c2e318837c6637f"}, +] + +[package.extras] +dev = ["build (==1.2.2.post1)", "mypy (==1.13.0)", "ruff (==0.7.3)", "twine (==5.1.1)"] +docs = ["sphinx (==8.1.3)"] +tests = ["numpy (==2.1.3)", "pillow (==11.0.0)", "pytest (==8.3.3)", "pytest-cov (==6.0.0)", "pytest-rerunfailures (==14.0.0)", "pyvirtualdisplay (==3.0)"] + [[package]] name = "mutagen" version = "1.47.0" @@ -1260,6 +1295,26 @@ rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-bo testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"] testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"] +[[package]] +name = "nodriver" +version = "0.39" +description = "[Docs here](https://ultrafunkamsterdam.github.io/nodriver)" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "nodriver-0.39-py3-none-any.whl", hash = "sha256:f245be52e6328393ece340a6dcbc8d5754fd7cf0838f0e1e40076944617178fc"}, + {file = "nodriver-0.39.tar.gz", hash = "sha256:af84f76215877c74166f95c8e7615268e31f6118f4c7291d201f29003f2248ef"}, +] + +[package.dependencies] +deprecated = "*" +mss = "*" +websockets = ">=14" + +[package.extras] +dev = ["black", "build", "furo", "pygments", "sphinx", "sphinx_autodoc_typehints", "sphinx_markdown_builder"] + [[package]] name = "numpy" version = "2.1.3" @@ -1674,14 +1729,14 @@ files = [ [[package]] name = "pydata-sphinx-theme" -version = "0.16.1" +version = "0.15.4" description = "Bootstrap-based Sphinx theme from the PyData community" optional = false python-versions = ">=3.9" groups = ["docs"] files = [ - {file = "pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde"}, - {file = "pydata_sphinx_theme-0.16.1.tar.gz", hash = "sha256:a08b7f0b7f70387219dc659bff0893a7554d5eb39b59d3b8ef37b8401b7642d7"}, + {file = "pydata_sphinx_theme-0.15.4-py3-none-any.whl", hash = "sha256:2136ad0e9500d0949f96167e63f3e298620040aea8f9c74621959eda5d4cf8e6"}, + {file = "pydata_sphinx_theme-0.15.4.tar.gz", hash = "sha256:7762ec0ac59df3acecf49fd2f889e1b4565dbce8b88b2e29ee06fdd90645a06d"}, ] [package.dependencies] @@ -1689,8 +1744,9 @@ accessible-pygments = "*" Babel = "*" beautifulsoup4 = "*" docutils = "!=0.17.0" +packaging = "*" pygments = ">=2.7" -sphinx = ">=6.1" +sphinx = ">=5" typing-extensions = "*" [package.extras] @@ -2265,14 +2321,14 @@ crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"] [[package]] name = "selenium" -version = "4.28.1" +version = "4.29.0" description = "Official Python bindings for Selenium WebDriver" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "selenium-4.28.1-py3-none-any.whl", hash = "sha256:4238847e45e24e4472cfcf3554427512c7aab9443396435b1623ef406fff1cc1"}, - {file = "selenium-4.28.1.tar.gz", hash = "sha256:0072d08670d7ec32db901bd0107695a330cecac9f196e3afb3fa8163026e022a"}, + {file = "selenium-4.29.0-py3-none-any.whl", hash = "sha256:ce5d26f1ddc1111641113653af33694c13947dd36c2df09cdd33f554351d372e"}, + {file = "selenium-4.29.0.tar.gz", hash = "sha256:3a62f7ec33e669364a6c0562a701deb69745b569c50d55f1a912bf8eb33358ba"}, ] [package.dependencies] @@ -2425,19 +2481,19 @@ test = ["httpx", "pytest (>=6)"] [[package]] name = "sphinx-book-theme" -version = "1.1.3" +version = "1.1.4" description = "A clean book theme for scientific explanations and documentation with Sphinx" optional = false python-versions = ">=3.9" groups = ["docs"] files = [ - {file = "sphinx_book_theme-1.1.3-py3-none-any.whl", hash = "sha256:a554a9a7ac3881979a87a2b10f633aa2a5706e72218a10f71be38b3c9e831ae9"}, - {file = "sphinx_book_theme-1.1.3.tar.gz", hash = "sha256:1f25483b1846cb3d353a6bc61b3b45b031f4acf845665d7da90e01ae0aef5b4d"}, + {file = "sphinx_book_theme-1.1.4-py3-none-any.whl", hash = "sha256:843b3f5c8684640f4a2d01abd298beb66452d1b2394cd9ef5be5ebd5640ea0e1"}, + {file = "sphinx_book_theme-1.1.4.tar.gz", hash = "sha256:73efe28af871d0a89bd05856d300e61edce0d5b2fbb7984e84454be0fedfe9ed"}, ] [package.dependencies] -pydata-sphinx-theme = ">=0.15.2" -sphinx = ">=5" +pydata-sphinx-theme = "0.15.4" +sphinx = ">=6.1" [package.extras] code-style = ["pre-commit"] @@ -2584,14 +2640,14 @@ test = ["pytest"] [[package]] name = "starlette" -version = "0.45.3" +version = "0.46.0" description = "The little ASGI library that shines." optional = false python-versions = ">=3.9" groups = ["docs"] files = [ - {file = "starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d"}, - {file = "starlette-0.45.3.tar.gz", hash = "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f"}, + {file = "starlette-0.46.0-py3-none-any.whl", hash = "sha256:913f0798bd90ba90a9156383bcf1350a17d6259451d0d8ee27fc0cf2db609038"}, + {file = "starlette-0.46.0.tar.gz", hash = "sha256:b359e4567456b28d473d0193f34c0de0ed49710d75ef183a74a5ce0499324f50"}, ] [package.dependencies] @@ -2602,14 +2658,14 @@ full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart [[package]] name = "telethon" -version = "1.38.1" +version = "1.39.0" description = "Full-featured Telegram client library for Python 3" optional = false python-versions = ">=3.5" groups = ["main"] files = [ - {file = "Telethon-1.38.1-py3-none-any.whl", hash = "sha256:30c187017501bfb982b8af5659f864dda4108f77ea49cfce61e8f6fdb8a18d6e"}, - {file = "Telethon-1.38.1.tar.gz", hash = "sha256:f9866c1e37197a0894e0c02aa56a6359bffb14a585e88e18e3e819df4fda399a"}, + {file = "Telethon-1.39.0-py3-none-any.whl", hash = "sha256:aa9f394b94be144799a6f6a93ab463867bc7c63503ede9631751940a98f6c703"}, + {file = "telethon-1.39.0.tar.gz", hash = "sha256:35d4795d8c91deac515fb0bcb3723866b924de1c724e1d5c230460e96f284a63"}, ] [package.dependencies] @@ -3144,6 +3200,95 @@ files = [ [package.extras] dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] +[[package]] +name = "wrapt" +version = "1.17.2" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"}, + {file = "wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72"}, + {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c"}, + {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62"}, + {file = "wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563"}, + {file = "wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda"}, + {file = "wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000"}, + {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662"}, + {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72"}, + {file = "wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317"}, + {file = "wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392"}, + {file = "wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b"}, + {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae"}, + {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9"}, + {file = "wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9"}, + {file = "wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998"}, + {file = "wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6"}, + {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b"}, + {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504"}, + {file = "wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a"}, + {file = "wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b"}, + {file = "wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb"}, + {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6"}, + {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f"}, + {file = "wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555"}, + {file = "wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119"}, + {file = "wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a"}, + {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04"}, + {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f"}, + {file = "wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7"}, + {file = "wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061"}, + {file = "wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f"}, + {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8"}, + {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9"}, + {file = "wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb"}, + {file = "wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb"}, + {file = "wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8"}, + {file = "wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3"}, +] + [[package]] name = "wsproto" version = "1.2.0" @@ -3161,14 +3306,14 @@ h11 = ">=0.9.0,<1" [[package]] name = "yt-dlp" -version = "2025.1.26" +version = "2025.2.19" description = "A feature-rich command-line audio/video downloader" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "yt_dlp-2025.1.26-py3-none-any.whl", hash = "sha256:3e76bd896b9f96601021ca192ca0fbdd195e3c3dcc28302a3a34c9bc4979da7b"}, - {file = "yt_dlp-2025.1.26.tar.gz", hash = "sha256:1c9738266921ad43c568ad01ac3362fb7c7af549276fbec92bd72f140da16240"}, + {file = "yt_dlp-2025.2.19-py3-none-any.whl", hash = "sha256:3ed218eaeece55e9d715afd41abc450dc406ee63bf79355169dfde312d38fdb8"}, + {file = "yt_dlp-2025.2.19.tar.gz", hash = "sha256:f33ca76df2e4db31880f2fe408d44f5058d9f135015b13e50610dfbe78245bea"}, ] [package.extras] @@ -3181,7 +3326,35 @@ secretstorage = ["cffi", "secretstorage"] static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.9.0,<0.10.0)"] test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"] +[[package]] +name = "yt-dlp-get-pot" +version = "0.3.0" +description = "" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "yt_dlp_get_pot-0.3.0-py3-none-any.whl", hash = "sha256:a49a596a3e3c02cd9ce051192ea3fe8168cf24ece8954bed6aa331a87d86954f"}, + {file = "yt_dlp_get_pot-0.3.0.tar.gz", hash = "sha256:ac9530b9e7b3d667235b9119da475f595d2dc7e6f6bbf98b965011be454e8833"}, +] + +[[package]] +name = "yt-dlp-getpot-wpc" +version = "0.1.2" +description = "" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "yt_dlp_getpot_wpc-0.1.2-py3-none-any.whl", hash = "sha256:61611948d47792ca0fee5632187f53c67be6f196384fb25b4c9034b9bb221207"}, + {file = "yt_dlp_getpot_wpc-0.1.2.tar.gz", hash = "sha256:196e152d6b8d6440867a527d9e9207703a99e41cada225848295e08d82c3fe2c"}, +] + +[package.dependencies] +nodriver = "*" +yt-dlp-get-pot = "*" + [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "2d0a953383901fe12e97f6f56a76a9d8008788695425792eedbf739a18585188" +content-hash = "4dc6b9a471ad641c1da6bece5a3501fe51ac92b768a52742f8db5b998e5ee8c1" diff --git a/pyproject.toml b/pyproject.toml index 3c64eae..afd9e21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,8 @@ dependencies = [ "certvalidator (>=0.0.0)", "rich-argparse (>=1.6.0,<2.0.0)", "ruamel-yaml (>=0.18.10,<0.19.0)", + "yt-dlp-get-pot (>=0.3.0,<0.4.0)", + "yt-dlp-getpot-wpc (>=0.1.2,<0.2.0)", ] [tool.poetry.group.dev.dependencies] From c5127f5fd1fc39869bf24dd08eed6b5c8c3f3065 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 24 Feb 2025 11:40:44 +0000 Subject: [PATCH 02/37] Allow flexible extractor_args in generic_extractor.py. --- .../modules/generic_extractor/__manifest__.py | 5 ++ .../generic_extractor/generic_extractor.py | 74 +++++++++++++++---- 2 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index caa3ae1..2936983 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -64,5 +64,10 @@ via the command line using the `--dropins` option (TODO!). "default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, + "extractor_args": { + "default": {}, + "help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.", + "type": "json_loader", + }, }, } diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 72fe3e0..1bc6b29 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -170,8 +170,8 @@ class GenericExtractor(Extractor): logger.error(f"Error processing entry {entry}: {e}") return self.add_metadata(data, info_extractor, url, result) - - def dropin_for_name(self, dropin_name: str, additional_paths = [], package=__package__) -> Type[InfoExtractor]: + + def dropin_for_name(self, dropin_name: str, additional_paths=[], package=__package__) -> Type[InfoExtractor]: dropin_name = dropin_name.lower() if dropin_name == "generic": @@ -179,6 +179,7 @@ class GenericExtractor(Extractor): return None dropin_class_name = dropin_name.title() + def _load_dropin(dropin): dropin_class = getattr(dropin, dropin_class_name)() return self._dropins.setdefault(dropin_name, dropin_class) @@ -202,7 +203,7 @@ class GenericExtractor(Extractor): return _load_dropin(dropin) except (FileNotFoundError, ModuleNotFoundError): pass - + # fallback to loading the dropins within auto-archiver try: return _load_dropin(importlib.import_module(f".{dropin_name}", package=package)) @@ -241,7 +242,8 @@ class GenericExtractor(Extractor): # don't clutter the logs with issues about the 'generic' extractor not having a dropin return False - logger.debug(f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead') + logger.debug( + f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead') try: result = self.get_metadata_for_post(info_extractor, url, ydl) except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e: @@ -273,14 +275,22 @@ class GenericExtractor(Extractor): ydl_options = {'outtmpl': os.path.join(self.tmp_dir, f'%(id)s.%(ext)s'), - 'quiet': False, 'noplaylist': not self.allow_playlist , - 'writesubtitles': self.subtitles,'writeautomaticsub': self.subtitles, - "live_from_start": self.live_from_start, "proxy": self.proxy, - "max_downloads": self.max_downloads, "playlistend": self.max_downloads} - - # set up auth + 'quiet': False, + 'noplaylist': not self.allow_playlist , + 'writesubtitles': self.subtitles, + 'writeautomaticsub': self.subtitles, + "live_from_start": self.live_from_start, + "proxy": self.proxy, + "max_downloads": self.max_downloads, + "playlistend": self.max_downloads, + # TODO + # "verbose": True, + # "print_traffic": True, + } + + # Set up auth auth = self.auth_for_site(url, extract_cookies=False) - # order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file + # order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file if auth: if 'username' in auth and 'password' in auth: logger.debug(f'Using provided auth username and password for {url}') @@ -296,12 +306,46 @@ class GenericExtractor(Extractor): logger.debug(f'Using cookies from file {self.cookie_file} for {url}') ydl_options['cookiesfile'] = auth['cookies_file'] + + # Applying user-defined extractor_args + if self.extractor_args: + logger.info(f"Applying user-defined extractor_args") + ydl_options.setdefault('extractor_args', {}) + + for key, args in self.extractor_args.items(): + logger.debug(f"Setting extractor_args: {key}") + if isinstance(args, dict): + # Site specific arguments (e.g., youtube: somekey=value) + ydl_options['extractor_args'].setdefault(key, {}).update(args) + else: + # General extractor_args (e.g., somekey=value) + ydl_options['extractor_args'][key] = args + + ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en" for info_extractor in self.suitable_extractors(url): - result = self.download_for_extractor(info_extractor, url, ydl) - if result: - return result - + try: + result = self.download_for_extractor(info_extractor, url, ydl) + if result: + return result + except yt_dlp.utils.ExtractorError as e: + # TODO Does this catch empty/ incomplete failures? + if self.extractor_args: + logger.warning( + f"Extraction with custom extractor_args failed for {url}. Retrying without extractor_args...") + # Remove extractor_args and try without + del ydl_options['extractor_args'] + ydl = yt_dlp.YoutubeDL(ydl_options) + try: + result = self.download_for_extractor(info_extractor, url, ydl) + if result: + return result + except Exception as retry_error: + logger.error(f"Extraction failed for {url} after retrying: {retry_error}") + return False + else: + logger.error(f"Extraction failed for {url}: {e}") + return False return False From 2d4f1b5b790ed642d4950d09fc03d910966bc209 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 25 Feb 2025 10:49:57 +0000 Subject: [PATCH 03/37] Added Bgutils PO token provider. --- poetry.lock | 43 +++++++++++++++++++++++++++++-------------- pyproject.toml | 1 + 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 75cbacf..486db42 100644 --- a/poetry.lock +++ b/poetry.lock @@ -103,14 +103,14 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "authlib" -version = "1.4.1" +version = "1.5.0" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "Authlib-1.4.1-py2.py3-none-any.whl", hash = "sha256:edc29c3f6a3e72cd9e9f45fff67fc663a2c364022eb0371c003f22d5405915c1"}, - {file = "authlib-1.4.1.tar.gz", hash = "sha256:30ead9ea4993cdbab821dc6e01e818362f92da290c04c7f6a1940f86507a790d"}, + {file = "Authlib-1.5.0-py2.py3-none-any.whl", hash = "sha256:b3cc5ccfc19cf87678046b6e7cb19d402d8a631a33c40e36385232203227953a"}, + {file = "authlib-1.5.0.tar.gz", hash = "sha256:8fd8bd8f806485a532ac39a17b579982cf54688f956174f995cc938a91725423"}, ] [package.dependencies] @@ -170,20 +170,35 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "bgutil-ytdlp-pot-provider" +version = "0.7.3" +description = "" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "bgutil_ytdlp_pot_provider-0.7.3-py3-none-any.whl", hash = "sha256:b15806ad884e46c9ccd915af7ebf3f3cd419a564632155bf2304a3cf74f14667"}, + {file = "bgutil_ytdlp_pot_provider-0.7.3.tar.gz", hash = "sha256:e01213e13fdbae3ef87c74820b6217fc4942fa6f1092541c3eb1509ead2b92fb"}, +] + +[package.dependencies] +yt-dlp-get-pot = ">=0.1.1" + [[package]] name = "boto3" -version = "1.36.26" +version = "1.37.0" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.36.26-py3-none-any.whl", hash = "sha256:f67d014a7c5a3cd540606d64d7cb9eec3600cf42acab1ac0518df9751ae115e2"}, - {file = "boto3-1.36.26.tar.gz", hash = "sha256:523b69457eee55ac15aa707c0e768b2a45ca1521f95b2442931090633ec72458"}, + {file = "boto3-1.37.0-py3-none-any.whl", hash = "sha256:03bd8c93b226f07d944fd6b022e11a307bff94ab6a21d51675d7e3ea81ee8424"}, + {file = "boto3-1.37.0.tar.gz", hash = "sha256:01015b38017876d79efd7273f35d9a4adfba505237159621365bed21b9b65eca"}, ] [package.dependencies] -botocore = ">=1.36.26,<1.37.0" +botocore = ">=1.37.0,<1.38.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -192,14 +207,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.36.26" +version = "1.37.0" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.36.26-py3-none-any.whl", hash = "sha256:4e3f19913887a58502e71ef8d696fe7eaa54de7813ff73390cd5883f837dfa6e"}, - {file = "botocore-1.36.26.tar.gz", hash = "sha256:4a63bcef7ecf6146fd3a61dc4f9b33b7473b49bdaf1770e9aaca6eee0c9eab62"}, + {file = "botocore-1.37.0-py3-none-any.whl", hash = "sha256:d01661f38c0edac87424344cdf4169f3ab9bc1bf1b677c8b230d025eb66c54a3"}, + {file = "botocore-1.37.0.tar.gz", hash = "sha256:b129d091a8360b4152ab65327186bf4e250de827c4a9b7ddf40a72b1acf1f3c1"}, ] [package.dependencies] @@ -2775,14 +2790,14 @@ sortedcontainers = "*" [[package]] name = "trio-websocket" -version = "0.12.1" +version = "0.12.2" description = "WebSocket library for Trio" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "trio_websocket-0.12.1-py3-none-any.whl", hash = "sha256:608ec746bb287e5d5a66baf483e41194193c5cf05ffaad6240e7d1fcd80d1e6f"}, - {file = "trio_websocket-0.12.1.tar.gz", hash = "sha256:d55ccd4d3eae27c494f3fdae14823317839bdcb8214d1173eacc4d42c69fc91b"}, + {file = "trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6"}, + {file = "trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae"}, ] [package.dependencies] @@ -3357,4 +3372,4 @@ yt-dlp-get-pot = "*" [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "4dc6b9a471ad641c1da6bece5a3501fe51ac92b768a52742f8db5b998e5ee8c1" +content-hash = "4ac237775ed376b562a0cb3743c38a6900dab829d0ec18fe32cf34feb2d244bb" diff --git a/pyproject.toml b/pyproject.toml index afd9e21..cd55a71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ dependencies = [ "ruamel-yaml (>=0.18.10,<0.19.0)", "yt-dlp-get-pot (>=0.3.0,<0.4.0)", "yt-dlp-getpot-wpc (>=0.1.2,<0.2.0)", + "bgutil-ytdlp-pot-provider (>=0.7.3,<0.8.0)", ] [tool.poetry.group.dev.dependencies] From 0eae2bee6a547edfce740c8a771a571ca0c1da21 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 24 Feb 2025 11:40:39 +0000 Subject: [PATCH 04/37] Add yt-dlp-get-pot and yt-dlp-getpot-wpc requirements. --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 3c64eae..afd9e21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,8 @@ dependencies = [ "certvalidator (>=0.0.0)", "rich-argparse (>=1.6.0,<2.0.0)", "ruamel-yaml (>=0.18.10,<0.19.0)", + "yt-dlp-get-pot (>=0.3.0,<0.4.0)", + "yt-dlp-getpot-wpc (>=0.1.2,<0.2.0)", ] [tool.poetry.group.dev.dependencies] From dd07b0b830c1ef8d8a053259bb47f26aa70af8ac Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 24 Feb 2025 11:40:44 +0000 Subject: [PATCH 05/37] Allow flexible extractor_args in generic_extractor.py. --- .../modules/generic_extractor/__manifest__.py | 5 ++ .../generic_extractor/generic_extractor.py | 75 +++++++++++++++---- 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index caa3ae1..2936983 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -64,5 +64,10 @@ via the command line using the `--dropins` option (TODO!). "default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, + "extractor_args": { + "default": {}, + "help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.", + "type": "json_loader", + }, }, } diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 47c03f6..8ceda27 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -170,8 +170,8 @@ class GenericExtractor(Extractor): logger.error(f"Error processing entry {entry}: {e}") return self.add_metadata(data, info_extractor, url, result) - - def dropin_for_name(self, dropin_name: str, additional_paths = [], package=__package__) -> Type[InfoExtractor]: + + def dropin_for_name(self, dropin_name: str, additional_paths=[], package=__package__) -> Type[InfoExtractor]: dropin_name = dropin_name.lower() if dropin_name == "generic": @@ -179,6 +179,7 @@ class GenericExtractor(Extractor): return None dropin_class_name = dropin_name.title() + def _load_dropin(dropin): dropin_class = getattr(dropin, dropin_class_name)() return self._dropins.setdefault(dropin_name, dropin_class) @@ -202,7 +203,7 @@ class GenericExtractor(Extractor): return _load_dropin(dropin) except (FileNotFoundError, ModuleNotFoundError): pass - + # fallback to loading the dropins within auto-archiver try: return _load_dropin(importlib.import_module(f".{dropin_name}", package=package)) @@ -241,7 +242,8 @@ class GenericExtractor(Extractor): # don't clutter the logs with issues about the 'generic' extractor not having a dropin return False - logger.debug(f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead') + logger.debug( + f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead') try: result = self.get_metadata_for_post(info_extractor, url, ydl) except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e: @@ -273,15 +275,22 @@ class GenericExtractor(Extractor): ydl_options = {'outtmpl': os.path.join(self.tmp_dir, f'%(id)s.%(ext)s'), - 'quiet': False, 'noplaylist': not self.allow_playlist , - 'writesubtitles': self.subtitles,'writeautomaticsub': self.subtitles, - "live_from_start": self.live_from_start, "proxy": self.proxy, - "max_downloads": self.max_downloads, "playlistend": self.max_downloads} - - # set up auth - auth = self.auth_for_site(url, extract_cookies=False) + 'quiet': False, + 'noplaylist': not self.allow_playlist , + 'writesubtitles': self.subtitles, + 'writeautomaticsub': self.subtitles, + "live_from_start": self.live_from_start, + "proxy": self.proxy, + "max_downloads": self.max_downloads, + "playlistend": self.max_downloads, + # TODO + # "verbose": True, + # "print_traffic": True, + } - # order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file + # Set up auth + auth = self.auth_for_site(url, extract_cookies=False) + # order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file if auth: if 'username' in auth and 'password' in auth: logger.debug(f'Using provided auth username and password for {url}') @@ -297,12 +306,46 @@ class GenericExtractor(Extractor): logger.debug(f'Using cookies from file {auth["cookies_file"]} for {url}') ydl_options['cookiefile'] = auth['cookies_file'] + + # Applying user-defined extractor_args + if self.extractor_args: + logger.info(f"Applying user-defined extractor_args") + ydl_options.setdefault('extractor_args', {}) + + for key, args in self.extractor_args.items(): + logger.debug(f"Setting extractor_args: {key}") + if isinstance(args, dict): + # Site specific arguments (e.g., youtube: somekey=value) + ydl_options['extractor_args'].setdefault(key, {}).update(args) + else: + # General extractor_args (e.g., somekey=value) + ydl_options['extractor_args'][key] = args + + ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en" for info_extractor in self.suitable_extractors(url): - result = self.download_for_extractor(info_extractor, url, ydl) - if result: - return result - + try: + result = self.download_for_extractor(info_extractor, url, ydl) + if result: + return result + except yt_dlp.utils.ExtractorError as e: + # TODO Does this catch empty/ incomplete failures? + if self.extractor_args: + logger.warning( + f"Extraction with custom extractor_args failed for {url}. Retrying without extractor_args...") + # Remove extractor_args and try without + del ydl_options['extractor_args'] + ydl = yt_dlp.YoutubeDL(ydl_options) + try: + result = self.download_for_extractor(info_extractor, url, ydl) + if result: + return result + except Exception as retry_error: + logger.error(f"Extraction failed for {url} after retrying: {retry_error}") + return False + else: + logger.error(f"Extraction failed for {url}: {e}") + return False return False From 2c1753e14b00d1e683d82ede700106032ae22652 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 25 Feb 2025 10:49:57 +0000 Subject: [PATCH 06/37] Added Bgutils PO token provider. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index afd9e21..cd55a71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ dependencies = [ "ruamel-yaml (>=0.18.10,<0.19.0)", "yt-dlp-get-pot (>=0.3.0,<0.4.0)", "yt-dlp-getpot-wpc (>=0.1.2,<0.2.0)", + "bgutil-ytdlp-pot-provider (>=0.7.3,<0.8.0)", ] [tool.poetry.group.dev.dependencies] From 7e4b44883b1b9b989d29d8a23ebf6f8c97363a5f Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 4 Mar 2025 14:03:39 +0000 Subject: [PATCH 07/37] Add temp options for testing --- poetry.lock | 212 +++--------------- pyproject.toml | 2 - .../generic_extractor/generic_extractor.py | 44 +--- 3 files changed, 31 insertions(+), 227 deletions(-) diff --git a/poetry.lock b/poetry.lock index 486db42..a8028ff 100644 --- a/poetry.lock +++ b/poetry.lock @@ -103,14 +103,14 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "authlib" -version = "1.5.0" +version = "1.5.1" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "Authlib-1.5.0-py2.py3-none-any.whl", hash = "sha256:b3cc5ccfc19cf87678046b6e7cb19d402d8a631a33c40e36385232203227953a"}, - {file = "authlib-1.5.0.tar.gz", hash = "sha256:8fd8bd8f806485a532ac39a17b579982cf54688f956174f995cc938a91725423"}, + {file = "authlib-1.5.1-py2.py3-none-any.whl", hash = "sha256:8408861cbd9b4ea2ff759b00b6f02fd7d81ac5a56d0b2b22c08606c6049aae11"}, + {file = "authlib-1.5.1.tar.gz", hash = "sha256:5cbc85ecb0667312c1cdc2f9095680bb735883b123fb509fde1e65b1c5df972e"}, ] [package.dependencies] @@ -187,18 +187,18 @@ yt-dlp-get-pot = ">=0.1.1" [[package]] name = "boto3" -version = "1.37.0" +version = "1.37.5" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.37.0-py3-none-any.whl", hash = "sha256:03bd8c93b226f07d944fd6b022e11a307bff94ab6a21d51675d7e3ea81ee8424"}, - {file = "boto3-1.37.0.tar.gz", hash = "sha256:01015b38017876d79efd7273f35d9a4adfba505237159621365bed21b9b65eca"}, + {file = "boto3-1.37.5-py3-none-any.whl", hash = "sha256:12166353519aca0cc8d9dcfbbb0d38f8915955a5912b8cb241b2b2314f0dbc14"}, + {file = "boto3-1.37.5.tar.gz", hash = "sha256:ae6e7048beeaa4478368e554a4b290e3928beb0ae8d8767d108d72381a81af30"}, ] [package.dependencies] -botocore = ">=1.37.0,<1.38.0" +botocore = ">=1.37.5,<1.38.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -207,14 +207,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.37.0" +version = "1.37.5" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.37.0-py3-none-any.whl", hash = "sha256:d01661f38c0edac87424344cdf4169f3ab9bc1bf1b677c8b230d025eb66c54a3"}, - {file = "botocore-1.37.0.tar.gz", hash = "sha256:b129d091a8360b4152ab65327186bf4e250de827c4a9b7ddf40a72b1acf1f3c1"}, + {file = "botocore-1.37.5-py3-none-any.whl", hash = "sha256:e5cfbb8026d5b4fadd9b3a18b61d238a41a8b8f620ab75873dc1467d456150d6"}, + {file = "botocore-1.37.5.tar.gz", hash = "sha256:f8f526d33ae74d242c577e0440b57b9ec7d53edd41db211155ec8087fe7a5a21"}, ] [package.dependencies] @@ -711,24 +711,6 @@ calendars = ["convertdate (>=2.2.1)", "hijridate"] fasttext = ["fasttext (>=0.9.1)", "numpy (>=1.19.3,<2)"] langdetect = ["langdetect (>=1.0.0)"] -[[package]] -name = "deprecated" -version = "1.2.18" -description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" -groups = ["main"] -files = [ - {file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"}, - {file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"}, -] - -[package.dependencies] -wrapt = ">=1.10,<2" - -[package.extras] -dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools", "tox"] - [[package]] name = "docutils" version = "0.21.2" @@ -814,14 +796,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-api-python-client" -version = "2.161.0" +version = "2.162.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "google_api_python_client-2.161.0-py2.py3-none-any.whl", hash = "sha256:9476a5a4f200bae368140453df40f9cda36be53fa7d0e9a9aac4cdb859a26448"}, - {file = "google_api_python_client-2.161.0.tar.gz", hash = "sha256:324c0cce73e9ea0a0d2afd5937e01b7c2d6a4d7e2579cdb6c384f9699d6c9f37"}, + {file = "google_api_python_client-2.162.0-py2.py3-none-any.whl", hash = "sha256:49365fa4f7795fe81a747f5544d6528ea94314fa59664e0ea1005f603facf1ec"}, + {file = "google_api_python_client-2.162.0.tar.gz", hash = "sha256:5f8bc934a5b6eea73a7d12d999e6585c1823179f48340234acb385e2502e735a"}, ] [package.dependencies] @@ -893,14 +875,14 @@ tool = ["click (>=6.0.0)"] [[package]] name = "googleapis-common-protos" -version = "1.68.0" +version = "1.69.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "googleapis_common_protos-1.68.0-py2.py3-none-any.whl", hash = "sha256:aaf179b2f81df26dfadac95def3b16a95064c76a5f45f07e4c68a21bb371c4ac"}, - {file = "googleapis_common_protos-1.68.0.tar.gz", hash = "sha256:95d38161f4f9af0d9423eed8fb7b64ffd2568c3464eb542ff02c5bfa1953ab3c"}, + {file = "googleapis_common_protos-1.69.0-py2.py3-none-any.whl", hash = "sha256:17835fdc4fa8da1d61cfe2d4d5d57becf7c61d4112f8d81c67eaa9d7ce43042d"}, + {file = "googleapis_common_protos-1.69.0.tar.gz", hash = "sha256:5a46d58af72846f59009b9c4710425b9af2139555c71837081706b213b298187"}, ] [package.dependencies] @@ -911,14 +893,14 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] [[package]] name = "gspread" -version = "6.1.4" +version = "6.2.0" description = "Google Spreadsheets Python API" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "gspread-6.1.4-py3-none-any.whl", hash = "sha256:c34781c426031a243ad154952b16f21ac56a5af90687885fbee3d1fba5280dcd"}, - {file = "gspread-6.1.4.tar.gz", hash = "sha256:b8eec27de7cadb338bb1b9f14a9be168372dee8965c0da32121816b5050ac1de"}, + {file = "gspread-6.2.0-py3-none-any.whl", hash = "sha256:7fa1a11e1ecacc6c5946fa016be05941baca8540404314f59aec963dd8ae5db3"}, + {file = "gspread-6.2.0.tar.gz", hash = "sha256:bc3d02d1c39e0b40bfc8035b4fec407aa71a17f343fc81cc7e3f75bfa6555de6"}, ] [package.dependencies] @@ -1242,23 +1224,6 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] -[[package]] -name = "mss" -version = "10.0.0" -description = "An ultra fast cross-platform multiple screenshots module in pure python using ctypes." -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "mss-10.0.0-py3-none-any.whl", hash = "sha256:82cf6460a53d09e79b7b6d871163c982e6c7e9649c426e7b7591b74956d5cb64"}, - {file = "mss-10.0.0.tar.gz", hash = "sha256:d903e0d51262bf0f8782841cf16eaa6d7e3e1f12eae35ab41c2e318837c6637f"}, -] - -[package.extras] -dev = ["build (==1.2.2.post1)", "mypy (==1.13.0)", "ruff (==0.7.3)", "twine (==5.1.1)"] -docs = ["sphinx (==8.1.3)"] -tests = ["numpy (==2.1.3)", "pillow (==11.0.0)", "pytest (==8.3.3)", "pytest-cov (==6.0.0)", "pytest-rerunfailures (==14.0.0)", "pyvirtualdisplay (==3.0)"] - [[package]] name = "mutagen" version = "1.47.0" @@ -1310,26 +1275,6 @@ rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-bo testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"] testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"] -[[package]] -name = "nodriver" -version = "0.39" -description = "[Docs here](https://ultrafunkamsterdam.github.io/nodriver)" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "nodriver-0.39-py3-none-any.whl", hash = "sha256:f245be52e6328393ece340a6dcbc8d5754fd7cf0838f0e1e40076944617178fc"}, - {file = "nodriver-0.39.tar.gz", hash = "sha256:af84f76215877c74166f95c8e7615268e31f6118f4c7291d201f29003f2248ef"}, -] - -[package.dependencies] -deprecated = "*" -mss = "*" -websockets = ">=14" - -[package.extras] -dev = ["black", "build", "furo", "pygments", "sphinx", "sphinx_autodoc_typehints", "sphinx_markdown_builder"] - [[package]] name = "numpy" version = "2.1.3" @@ -1847,14 +1792,14 @@ files = [ [[package]] name = "pytest" -version = "8.3.4" +version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, - {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, + {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, + {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, ] [package.dependencies] @@ -2318,14 +2263,14 @@ files = [ [[package]] name = "s3transfer" -version = "0.11.2" +version = "0.11.3" description = "An Amazon S3 Transfer Manager" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "s3transfer-0.11.2-py3-none-any.whl", hash = "sha256:be6ecb39fadd986ef1701097771f87e4d2f821f27f6071c872143884d2950fbc"}, - {file = "s3transfer-0.11.2.tar.gz", hash = "sha256:3b39185cb72f5acc77db1a58b6e25b977f28d20496b6e58d6813d75f464d632f"}, + {file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"}, + {file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"}, ] [package.dependencies] @@ -3215,95 +3160,6 @@ files = [ [package.extras] dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] -[[package]] -name = "wrapt" -version = "1.17.2" -description = "Module for decorators, wrappers and monkey patching." -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"}, - {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"}, - {file = "wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7"}, - {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c"}, - {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72"}, - {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061"}, - {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2"}, - {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c"}, - {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62"}, - {file = "wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563"}, - {file = "wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f"}, - {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58"}, - {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda"}, - {file = "wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438"}, - {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a"}, - {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000"}, - {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6"}, - {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b"}, - {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662"}, - {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72"}, - {file = "wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317"}, - {file = "wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3"}, - {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925"}, - {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392"}, - {file = "wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40"}, - {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d"}, - {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b"}, - {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98"}, - {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82"}, - {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae"}, - {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9"}, - {file = "wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9"}, - {file = "wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991"}, - {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125"}, - {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998"}, - {file = "wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5"}, - {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8"}, - {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6"}, - {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc"}, - {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2"}, - {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b"}, - {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504"}, - {file = "wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a"}, - {file = "wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845"}, - {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192"}, - {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b"}, - {file = "wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0"}, - {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306"}, - {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb"}, - {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681"}, - {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6"}, - {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6"}, - {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f"}, - {file = "wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555"}, - {file = "wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c"}, - {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9"}, - {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119"}, - {file = "wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6"}, - {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9"}, - {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a"}, - {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2"}, - {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a"}, - {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04"}, - {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f"}, - {file = "wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7"}, - {file = "wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3"}, - {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a"}, - {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061"}, - {file = "wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82"}, - {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9"}, - {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f"}, - {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b"}, - {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f"}, - {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8"}, - {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9"}, - {file = "wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb"}, - {file = "wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb"}, - {file = "wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8"}, - {file = "wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3"}, -] - [[package]] name = "wsproto" version = "1.2.0" @@ -3353,23 +3209,7 @@ files = [ {file = "yt_dlp_get_pot-0.3.0.tar.gz", hash = "sha256:ac9530b9e7b3d667235b9119da475f595d2dc7e6f6bbf98b965011be454e8833"}, ] -[[package]] -name = "yt-dlp-getpot-wpc" -version = "0.1.2" -description = "" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "yt_dlp_getpot_wpc-0.1.2-py3-none-any.whl", hash = "sha256:61611948d47792ca0fee5632187f53c67be6f196384fb25b4c9034b9bb221207"}, - {file = "yt_dlp_getpot_wpc-0.1.2.tar.gz", hash = "sha256:196e152d6b8d6440867a527d9e9207703a99e41cada225848295e08d82c3fe2c"}, -] - -[package.dependencies] -nodriver = "*" -yt-dlp-get-pot = "*" - [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "4ac237775ed376b562a0cb3743c38a6900dab829d0ec18fe32cf34feb2d244bb" +content-hash = "70257413acdd5a72b4eb00d5234196a2bf6e89f4758633d2ccc7bba09891dfb2" diff --git a/pyproject.toml b/pyproject.toml index cd55a71..3f2a84e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,8 +57,6 @@ dependencies = [ "certvalidator (>=0.0.0)", "rich-argparse (>=1.6.0,<2.0.0)", "ruamel-yaml (>=0.18.10,<0.19.0)", - "yt-dlp-get-pot (>=0.3.0,<0.4.0)", - "yt-dlp-getpot-wpc (>=0.1.2,<0.2.0)", "bgutil-ytdlp-pot-provider (>=0.7.3,<0.8.0)", ] diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 68e9c92..c3fcc30 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -285,6 +285,8 @@ class GenericExtractor(Extractor): "playlistend": self.max_downloads, # TODO # "verbose": True, + # 'debug_extractor': True, + # 'cachedir': False, # "print_traffic": True, } @@ -307,22 +309,6 @@ class GenericExtractor(Extractor): ydl_options['cookiefile'] = auth['cookies_file'] - # Applying user-defined extractor_args - if self.extractor_args: - logger.info(f"Applying user-defined extractor_args") - ydl_options.setdefault('extractor_args', {}) - - for key, args in self.extractor_args.items(): - logger.debug(f"Setting extractor_args: {key}") - if isinstance(args, dict): - # Site specific arguments (e.g., youtube: somekey=value) - ydl_options['extractor_args'].setdefault(key, {}).update(args) - else: - # General extractor_args (e.g., somekey=value) - ydl_options['extractor_args'][key] = args - - - # Applying user-defined extractor_args if self.extractor_args: logger.info(f"Applying user-defined extractor_args") @@ -341,27 +327,7 @@ class GenericExtractor(Extractor): ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en" for info_extractor in self.suitable_extractors(url): - try: - result = self.download_for_extractor(info_extractor, url, ydl) - if result: - return result - except yt_dlp.utils.ExtractorError as e: - # TODO Does this catch empty/ incomplete failures? - if self.extractor_args: - logger.warning( - f"Extraction with custom extractor_args failed for {url}. Retrying without extractor_args...") - # Remove extractor_args and try without - del ydl_options['extractor_args'] - ydl = yt_dlp.YoutubeDL(ydl_options) - try: - result = self.download_for_extractor(info_extractor, url, ydl) - if result: - return result - except Exception as retry_error: - logger.error(f"Extraction failed for {url} after retrying: {retry_error}") - return False - else: - logger.error(f"Extraction failed for {url}: {e}") - return False - + result = self.download_for_extractor(info_extractor, url, ydl) + if result: + return result return False From 5daeae994ac9a8b23eea42b4d3bccd78a41bea72 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 17 Mar 2025 14:17:31 +0000 Subject: [PATCH 08/37] Fix the extractor args for new list structure. --- poetry.lock | 53 ++++++++++++------- .../generic_extractor/generic_extractor.py | 37 ++++++------- 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/poetry.lock b/poetry.lock index e5e221b..6ee6175 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -51,7 +51,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -94,12 +94,12 @@ files = [ ] [package.extras] -benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "authlib" @@ -145,7 +145,7 @@ files = [ ] [package.extras] -dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] +dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"] [[package]] name = "beautifulsoup4" @@ -170,6 +170,21 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "bgutil-ytdlp-pot-provider" +version = "0.7.4" +description = "" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "bgutil_ytdlp_pot_provider-0.7.4-py3-none-any.whl", hash = "sha256:5f0b1d884fec66dff703c421ea06f5fc9b11022d9c0babdaa0cab13ed99b9d77"}, + {file = "bgutil_ytdlp_pot_provider-0.7.4.tar.gz", hash = "sha256:b6c1462b8f979540078085cd82462ef967b8b70cd0810d469243a31f5081e5c6"}, +] + +[package.dependencies] +yt-dlp-get-pot = ">=0.1.1" + [[package]] name = "boto3" version = "1.37.8" @@ -781,7 +796,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] +typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "future" @@ -816,7 +831,7 @@ requests = ">=2.18.0,<3.0.0.dev0" [package.extras] async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] @@ -1115,7 +1130,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] +dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"] [[package]] name = "markdown-it-py" @@ -1594,7 +1609,7 @@ docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] -typing = ["typing-extensions ; python_version < \"3.10\""] +typing = ["typing-extensions"] xmp = ["defusedxml"] [[package]] @@ -3018,7 +3033,7 @@ files = [ pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""} [package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -3041,7 +3056,7 @@ h11 = ">=0.8" typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] -standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "virtualenv" @@ -3062,7 +3077,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] [[package]] name = "vk-api" @@ -3324,7 +3339,7 @@ files = [ ] [package.extras] -dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] [[package]] name = "wsproto" @@ -3355,8 +3370,8 @@ files = [ [package.extras] build = ["build", "hatchling", "pip", "setuptools (>=71.0.2)", "wheel"] -curl-cffi = ["curl-cffi (==0.5.10) ; os_name == \"nt\" and implementation_name == \"cpython\"", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2) ; os_name != \"nt\" and implementation_name == \"cpython\""] -default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"] +curl-cffi = ["curl-cffi (==0.5.10)", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2)"] +default = ["brotli", "brotlicffi", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"] dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.9.0,<0.10.0)"] pyinstaller = ["pyinstaller (>=6.11.1)"] secretstorage = ["cffi", "secretstorage"] @@ -3378,4 +3393,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "beb354960b8d8af491a13e09cb565c7e3099a2b150167c16147aa0438e970018" +content-hash = "830a11953091225cfc5f9cb8fb7e06c9be468695492ee4487093f988e4ac1956" diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 2e53629..4a54759 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -404,12 +404,20 @@ class GenericExtractor(Extractor): "--write-subs" if self.subtitles else "--no-write-subs", "--write-auto-subs" if self.subtitles else "--no-write-auto-subs", "--live-from-start" if self.live_from_start else "--no-live-from-start", - "--proxy", - self.proxy if self.proxy else "", - f"--max-downloads {self.max_downloads}" if self.max_downloads != "inf" else "", - f"--playlist-end {self.max_downloads}" if self.max_downloads != "inf" else "", + # TODO: Move this to documentation + # Note: add the --verbose flag for debugging + "--verbose", ] + # proxy handling + if self.proxy: + ydl_options.extend(["--proxy", self.proxy]) + + # max_downloads handling + if self.max_downloads != "inf": + ydl_options.extend(["--max-downloads", str(self.max_downloads)]) + ydl_options.extend(["--playlist-end", str(self.max_downloads)]) + # set up auth auth = self.auth_for_site(url, extract_cookies=False) # order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file @@ -428,22 +436,15 @@ class GenericExtractor(Extractor): logger.debug(f"Using cookies from file {auth['cookies_file']} for {url}") ydl_options.extend(("--cookies", auth["cookies_file"])) - # Applying user-defined extractor_args if self.extractor_args: - logger.info(f"Applying user-defined extractor_args") - ydl_options.setdefault('extractor_args', {}) - - for key, args in self.extractor_args.items(): - logger.debug(f"Setting extractor_args: {key}") - if isinstance(args, dict): - # Site specific arguments (e.g., youtube: somekey=value) - ydl_options['extractor_args'].setdefault(key, {}).update(args) - else: - # General extractor_args (e.g., somekey=value) - ydl_options['extractor_args'][key] = args - - + for key, args in self.extractor_args.items(): + logger.debug(f"Setting extractor_args: {key}") + if isinstance(args, dict): + arg_str = ";".join(f"{k}={v}" for k, v in args.items()) + else: + arg_str = str(args) + ydl_options.extend(["--extractor-args", f"{key}:{arg_str}"]) if self.ytdlp_args: logger.debug("Adding additional ytdlp arguments: {self.ytdlp_args}") From 8548b7def7ddceb0020cfda1a8a01f52275ae244 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 17 Mar 2025 18:53:59 +0000 Subject: [PATCH 09/37] Refactor setup method to pull and transpile the token generator. --- .../generic_extractor/generic_extractor.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index e96ac41..769c488 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -23,6 +23,11 @@ class GenericExtractor(Extractor): _dropins = {} def setup(self): + self.check_ytdlp_update() + self.setup_token_script() + + def check_ytdlp_update(self): + """Handles checking and updating yt-dlp if necessary.""" # check for file .ytdlp-update in the secrets folder if self.ytdlp_update_interval < 0: return @@ -63,6 +68,24 @@ class GenericExtractor(Extractor): except Exception as e: logger.error(f"Error updating yt-dlp: {e}") + def setup_token_script(self): + """Setup PO Token provider https://github.com/Brainicism/bgutil-ytdlp-pot-provider.""" + + # Determine the default location for the transpiled PO token script. + default_script = os.path.expanduser("~/bgutil-ytdlp-pot-provider/server/build/generate_once.js") + # Check if the PO token script exists. if not, trigger the script generation. + if not os.path.exists(default_script): + logger.info("PO Token script not found. Running setup...") + try: + subprocess.run(["bash", "scripts/potoken_provider/setup_pot_provider.sh"], check=True) + except subprocess.CalledProcessError as e: + logger.error(f"Failed to setup PO Token script: {e}") + return + + # Set the extractor_args to point to the default script, if not already provided. + # self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = default_script + logger.info(f"Using PO Token script at: {default_script}") + def suitable_extractors(self, url: str) -> Generator[str, None, None]: """ Returns a list of valid extractors for the given URL""" From e6b1a8c8935c52ebd0829c6751628bf243a970fc Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 17 Mar 2025 20:34:00 +0000 Subject: [PATCH 10/37] Add POT setup script. --- .../potoken_provider/setup_pot_provider.sh | 25 +++++++++++++++++++ .../generic_extractor/generic_extractor.py | 11 ++++---- 2 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 scripts/potoken_provider/setup_pot_provider.sh diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh new file mode 100644 index 0000000..9990e42 --- /dev/null +++ b/scripts/potoken_provider/setup_pot_provider.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +SCRIPTS_DIR="scripts/potoken_provider" +BGUTIL_DIR="$SCRIPTS_DIR/bgutil-ytdlp-pot-provider" + +# Clone the repository, or update if it exists +if [ ! -d "$BGUTIL_DIR" ]; then + echo "Cloning bgutil-ytdlp-pot-provider repository..." + git clone https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git "$BGUTIL_DIR" +else + echo "Updating existing bgutil-ytdlp-pot-provider repository..." + cd "$BGUTIL_DIR" || exit 1 + git pull origin master +fi + +# Move into the server directory +cd "$BGUTIL_DIR/server" || exit 1 + +# Install dependencies and transpile the script +yarn install --frozen-lockfile +npx tsc + +# The built script is now available +echo "PO Token provider script is ready: $BGUTIL_DIR/server/build/generate_once.js" diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 769c488..de538d9 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -72,7 +72,9 @@ class GenericExtractor(Extractor): """Setup PO Token provider https://github.com/Brainicism/bgutil-ytdlp-pot-provider.""" # Determine the default location for the transpiled PO token script. - default_script = os.path.expanduser("~/bgutil-ytdlp-pot-provider/server/build/generate_once.js") + default_script = os.path.join( + "scripts", "potoken_provider", "bgutil-ytdlp-pot-provider", "server", "build", "generate_once.js" + ) # Check if the PO token script exists. if not, trigger the script generation. if not os.path.exists(default_script): logger.info("PO Token script not found. Running setup...") @@ -82,8 +84,8 @@ class GenericExtractor(Extractor): logger.error(f"Failed to setup PO Token script: {e}") return - # Set the extractor_args to point to the default script, if not already provided. - # self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = default_script + # Use the PO Token script in yt-dlp to fetch tokens on demand. + self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = default_script logger.info(f"Using PO Token script at: {default_script}") def suitable_extractors(self, url: str) -> Generator[str, None, None]: @@ -443,9 +445,6 @@ class GenericExtractor(Extractor): "--write-subs" if self.subtitles else "--no-write-subs", "--write-auto-subs" if self.subtitles else "--no-write-auto-subs", "--live-from-start" if self.live_from_start else "--no-live-from-start", - # TODO: Move this to documentation - # Note: add the --verbose flag for debugging - "--verbose", ] # proxy handling From 43ef8f2aebe81cb26ea3acaf7e2c0804d9441483 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Mon, 17 Mar 2025 20:59:34 +0000 Subject: [PATCH 11/37] Add update to POT setup script. --- scripts/potoken_provider/setup_pot_provider.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh index 9990e42..470d1a3 100644 --- a/scripts/potoken_provider/setup_pot_provider.sh +++ b/scripts/potoken_provider/setup_pot_provider.sh @@ -3,12 +3,18 @@ set -e SCRIPTS_DIR="scripts/potoken_provider" BGUTIL_DIR="$SCRIPTS_DIR/bgutil-ytdlp-pot-provider" +UPDATE=false + +# Parse optional flag +if [[ "$1" == "--update" ]]; then + UPDATE=true +fi # Clone the repository, or update if it exists if [ ! -d "$BGUTIL_DIR" ]; then echo "Cloning bgutil-ytdlp-pot-provider repository..." git clone https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git "$BGUTIL_DIR" -else +elif [ "$UPDATE" == true ]; then echo "Updating existing bgutil-ytdlp-pot-provider repository..." cd "$BGUTIL_DIR" || exit 1 git pull origin master @@ -21,5 +27,5 @@ cd "$BGUTIL_DIR/server" || exit 1 yarn install --frozen-lockfile npx tsc -# The built script is now available +# The built script is now available and can be used by the generic extractor echo "PO Token provider script is ready: $BGUTIL_DIR/server/build/generate_once.js" From 0c892f3cf119ed5cabdfccf7b76a00ebacaa43d3 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 18 Mar 2025 11:44:08 +0000 Subject: [PATCH 12/37] Temp fix for tests by setting path in manifest. --- scripts/potoken_provider/setup_pot_provider.sh | 2 +- src/auto_archiver/modules/generic_extractor/__manifest__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh index 470d1a3..e0f6d7c 100644 --- a/scripts/potoken_provider/setup_pot_provider.sh +++ b/scripts/potoken_provider/setup_pot_provider.sh @@ -27,5 +27,5 @@ cd "$BGUTIL_DIR/server" || exit 1 yarn install --frozen-lockfile npx tsc -# The built script is now available and can be used by the generic extractor +# The transpiled POT generation script is now available and will be used automatically by the generic extractor echo "PO Token provider script is ready: $BGUTIL_DIR/server/build/generate_once.js" diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 9ef1cb3..bd75c1c 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -75,7 +75,7 @@ If you are having issues with the extractor, you can review the version of `yt-d "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, "extractor_args": { - "default": {}, + "default": {"youtube": {"getpot_bgutil_script": "scripts/potoken_provider/bgutil-ytdlp-pot-provider/server/build/generate_once.js"}}, "help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.", "type": "json_loader", }, From cb632723bde1d4bcf3a0ead0a3b4adec7d3480dc Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 18 Mar 2025 13:47:01 +0000 Subject: [PATCH 13/37] Add scripts to pull only /server/ section of pots generator, adn only install at runtime. --- .../potoken_provider/setup_pot_provider.sh | 55 +++++++++++-------- .../potoken_provider/update_pot_provider.sh | 27 +++++++++ .../modules/generic_extractor/__manifest__.py | 6 +- .../generic_extractor/generic_extractor.py | 5 +- 4 files changed, 69 insertions(+), 24 deletions(-) mode change 100644 => 100755 scripts/potoken_provider/setup_pot_provider.sh create mode 100755 scripts/potoken_provider/update_pot_provider.sh diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh old mode 100644 new mode 100755 index e0f6d7c..85c9e60 --- a/scripts/potoken_provider/setup_pot_provider.sh +++ b/scripts/potoken_provider/setup_pot_provider.sh @@ -1,31 +1,42 @@ #!/bin/bash -set -e +set -e # Exit on error SCRIPTS_DIR="scripts/potoken_provider" -BGUTIL_DIR="$SCRIPTS_DIR/bgutil-ytdlp-pot-provider" -UPDATE=false +TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" +SERVER_DIR="$TARGET_DIR/server" +GEN_SCRIPT="$SERVER_DIR/build/generate_once.js" -# Parse optional flag -if [[ "$1" == "--update" ]]; then - UPDATE=true -fi - -# Clone the repository, or update if it exists -if [ ! -d "$BGUTIL_DIR" ]; then - echo "Cloning bgutil-ytdlp-pot-provider repository..." - git clone https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git "$BGUTIL_DIR" -elif [ "$UPDATE" == true ]; then - echo "Updating existing bgutil-ytdlp-pot-provider repository..." - cd "$BGUTIL_DIR" || exit 1 - git pull origin master +# Ensure the server directory exists +if [ ! -d "$SERVER_DIR" ]; then + echo "Error: PO Token provider server directory is missing! Please run update_pot_provider.sh first." + exit 1 fi # Move into the server directory -cd "$BGUTIL_DIR/server" || exit 1 +cd "$SERVER_DIR" || exit 1 -# Install dependencies and transpile the script -yarn install --frozen-lockfile -npx tsc +# Check if dependencies need installation +if [ ! -d "node_modules" ]; then + echo "Installing dependencies..." + yarn install --frozen-lockfile +else + echo "Dependencies already installed. Skipping yarn install." +fi -# The transpiled POT generation script is now available and will be used automatically by the generic extractor -echo "PO Token provider script is ready: $BGUTIL_DIR/server/build/generate_once.js" +# Check if build directory exists and if transpiling is needed +if [ ! -d "build" ] || [ "$SERVER_DIR/src" -nt "$GEN_SCRIPT" ]; then + echo "Build directory missing or outdated. Running transpilation..." + npx tsc +else + echo "Build directory is up to date. Skipping transpilation." +fi + +# Ensure the script exists after transpilation +if [ ! -f "$GEN_SCRIPT" ]; then + echo "Error: PO Token script not found after attempting transpilation." + exit 1 +fi + + +# Confirm success +echo "PO Token provider script is ready for use." diff --git a/scripts/potoken_provider/update_pot_provider.sh b/scripts/potoken_provider/update_pot_provider.sh new file mode 100755 index 0000000..58d8783 --- /dev/null +++ b/scripts/potoken_provider/update_pot_provider.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e # Exit on error + +SCRIPTS_DIR="scripts/potoken_provider" +TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" +BGUTIL_REPO="https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git" +BGUTIL_TEMP_DIR="$SCRIPTS_DIR/bgutil-temp" + + +# Clone fresh copy into temporary directory +git clone --depth 1 "$REPO_URL" "$TMP_DIR" + +# Ensure the target directory exists +rm -rf "$TARGET_DIR" +mkdir -p "$TARGET_DIR" + +# Copy the entire server directory +echo "Copying /server/ directory..." +cp -r "$BGUTIL_TEMP_DIR/server" "$TARGET_DIR/" + +# Clean up: remove the cloned repository +echo "Cleaning up temporary files..." +rm -rf "$BGUTIL_TEMP_DIR" + +# Confirm success +echo "PO Token provider script is ready in: $TARGET_DIR/server" +echo "Commit and push changes to include it in version control." diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index bd75c1c..956a924 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -74,8 +74,12 @@ If you are having issues with the extractor, you can review the version of `yt-d "default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, + "update_pots": { + "default": False, + "help": "If set, will run the script to update the pot generation script.", + }, "extractor_args": { - "default": {"youtube": {"getpot_bgutil_script": "scripts/potoken_provider/bgutil-ytdlp-pot-provider/server/build/generate_once.js"}}, + "default": {}, "help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.", "type": "json_loader", }, diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index de538d9..d46e64f 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -73,7 +73,8 @@ class GenericExtractor(Extractor): # Determine the default location for the transpiled PO token script. default_script = os.path.join( - "scripts", "potoken_provider", "bgutil-ytdlp-pot-provider", "server", "build", "generate_once.js" + # "scripts", "potoken_provider", "bgutil-server", "build", "generate_once.js" + "scripts", "potoken_provider", "bgutil-provider", "server", "build", "generate_once.js" ) # Check if the PO token script exists. if not, trigger the script generation. if not os.path.exists(default_script): @@ -445,6 +446,8 @@ class GenericExtractor(Extractor): "--write-subs" if self.subtitles else "--no-write-subs", "--write-auto-subs" if self.subtitles else "--no-write-auto-subs", "--live-from-start" if self.live_from_start else "--no-live-from-start", + # TODO + "--verbose" ] # proxy handling From b83bfda187e8ca71f7b7015bbe2538db02b17258 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 18 Mar 2025 14:10:20 +0000 Subject: [PATCH 14/37] Update directory location, add .gitignore --- scripts/potoken_provider/.gitignore | 2 ++ scripts/potoken_provider/setup_pot_provider.sh | 9 ++++----- scripts/potoken_provider/update_pot_provider.sh | 11 +++++------ 3 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 scripts/potoken_provider/.gitignore diff --git a/scripts/potoken_provider/.gitignore b/scripts/potoken_provider/.gitignore new file mode 100644 index 0000000..b044bd7 --- /dev/null +++ b/scripts/potoken_provider/.gitignore @@ -0,0 +1,2 @@ + +bgutil-provider/node_modules \ No newline at end of file diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh index 85c9e60..64b3060 100755 --- a/scripts/potoken_provider/setup_pot_provider.sh +++ b/scripts/potoken_provider/setup_pot_provider.sh @@ -3,17 +3,16 @@ set -e # Exit on error SCRIPTS_DIR="scripts/potoken_provider" TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" -SERVER_DIR="$TARGET_DIR/server" -GEN_SCRIPT="$SERVER_DIR/build/generate_once.js" +GEN_SCRIPT="$TARGET_DIR/build/generate_once.js" # Ensure the server directory exists -if [ ! -d "$SERVER_DIR" ]; then +if [ ! -d "$TARGET_DIR" ]; then echo "Error: PO Token provider server directory is missing! Please run update_pot_provider.sh first." exit 1 fi # Move into the server directory -cd "$SERVER_DIR" || exit 1 +cd "$TARGET_DIR" || exit 1 # Check if dependencies need installation if [ ! -d "node_modules" ]; then @@ -24,7 +23,7 @@ else fi # Check if build directory exists and if transpiling is needed -if [ ! -d "build" ] || [ "$SERVER_DIR/src" -nt "$GEN_SCRIPT" ]; then +if [ ! -d "build" ] || [ "src" -nt "$GEN_SCRIPT" ]; then echo "Build directory missing or outdated. Running transpilation..." npx tsc else diff --git a/scripts/potoken_provider/update_pot_provider.sh b/scripts/potoken_provider/update_pot_provider.sh index 58d8783..29c8376 100755 --- a/scripts/potoken_provider/update_pot_provider.sh +++ b/scripts/potoken_provider/update_pot_provider.sh @@ -6,22 +6,21 @@ TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" BGUTIL_REPO="https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git" BGUTIL_TEMP_DIR="$SCRIPTS_DIR/bgutil-temp" - # Clone fresh copy into temporary directory -git clone --depth 1 "$REPO_URL" "$TMP_DIR" +git clone --depth 1 "$BGUTIL_REPO" "$BGUTIL_TEMP_DIR" # Ensure the target directory exists rm -rf "$TARGET_DIR" mkdir -p "$TARGET_DIR" -# Copy the entire server directory -echo "Copying /server/ directory..." -cp -r "$BGUTIL_TEMP_DIR/server" "$TARGET_DIR/" +# Copy only the contents inside /server/ into bgutil-provider +echo "Copy /server/ contents into $TARGET_DIR..." +cp -r "$BGUTIL_TEMP_DIR/server/"* "$TARGET_DIR/" # Clean up: remove the cloned repository echo "Cleaning up temporary files..." rm -rf "$BGUTIL_TEMP_DIR" # Confirm success -echo "PO Token provider script is ready in: $TARGET_DIR/server" +echo "PO Token provider script is ready in: $TARGET_DIR/build" echo "Commit and push changes to include it in version control." From c4e63ebd8c4b0622e4e7b41320a0f7654c03fdf7 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 18 Mar 2025 14:54:57 +0000 Subject: [PATCH 15/37] Add conditional check to setup bgutils token generation script. TODO: Update tests --- .../potoken_provider/setup_pot_provider.sh | 15 +++++++------ .../modules/generic_extractor/__manifest__.py | 6 ++--- .../generic_extractor/generic_extractor.py | 22 ++++++++----------- tests/extractors/test_generic_extractor.py | 1 + 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh index 64b3060..8d27e75 100755 --- a/scripts/potoken_provider/setup_pot_provider.sh +++ b/scripts/potoken_provider/setup_pot_provider.sh @@ -7,7 +7,7 @@ GEN_SCRIPT="$TARGET_DIR/build/generate_once.js" # Ensure the server directory exists if [ ! -d "$TARGET_DIR" ]; then - echo "Error: PO Token provider server directory is missing! Please run update_pot_provider.sh first." + echo "Error: PO Token provider server directory is missing! Please run scripts/update_pot_provider.sh first." exit 1 fi @@ -23,18 +23,19 @@ else fi # Check if build directory exists and if transpiling is needed -if [ ! -d "build" ] || [ "src" -nt "$GEN_SCRIPT" ]; then +if [ ! -d "build" ]; then echo "Build directory missing or outdated. Running transpilation..." npx tsc else echo "Build directory is up to date. Skipping transpilation." fi -# Ensure the script exists after transpilation -if [ ! -f "$GEN_SCRIPT" ]; then - echo "Error: PO Token script not found after attempting transpilation." - exit 1 -fi + +## Ensure the script exists after transpilation +#if [ ! -f "$GEN_SCRIPT" ]; then +# echo "Error: PO Token script not found after attempting transpilation." +# exit 1 +#fi # Confirm success diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 956a924..a5d33d4 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -74,9 +74,9 @@ If you are having issues with the extractor, you can review the version of `yt-d "default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, - "update_pots": { - "default": False, - "help": "If set, will run the script to update the pot generation script.", + "pot_provider": { + "default": "bgutils", + "help": "The Proof of origin provider method.", }, "extractor_args": { "default": {}, diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index d46e64f..11d969d 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -71,23 +71,21 @@ class GenericExtractor(Extractor): def setup_token_script(self): """Setup PO Token provider https://github.com/Brainicism/bgutil-ytdlp-pot-provider.""" - # Determine the default location for the transpiled PO token script. - default_script = os.path.join( - # "scripts", "potoken_provider", "bgutil-server", "build", "generate_once.js" - "scripts", "potoken_provider", "bgutil-provider", "server", "build", "generate_once.js" - ) - # Check if the PO token script exists. if not, trigger the script generation. - if not os.path.exists(default_script): - logger.info("PO Token script not found. Running setup...") + if self.pot_provider == "bgutils": + + # Check if the PO token generation script exists, set it up if not. try: subprocess.run(["bash", "scripts/potoken_provider/setup_pot_provider.sh"], check=True) except subprocess.CalledProcessError as e: logger.error(f"Failed to setup PO Token script: {e}") return - # Use the PO Token script in yt-dlp to fetch tokens on demand. - self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = default_script - logger.info(f"Using PO Token script at: {default_script}") + # Use the PO Token script in yt-dlp to fetch tokens on demand. + pot_script = os.path.join( + "scripts", "potoken_provider", "bgutil-provider", "build", "generate_once.js" + ) + self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = pot_script + def suitable_extractors(self, url: str) -> Generator[str, None, None]: """ @@ -446,8 +444,6 @@ class GenericExtractor(Extractor): "--write-subs" if self.subtitles else "--no-write-subs", "--write-auto-subs" if self.subtitles else "--no-write-auto-subs", "--live-from-start" if self.live_from_start else "--no-live-from-start", - # TODO - "--verbose" ] # proxy handling diff --git a/tests/extractors/test_generic_extractor.py b/tests/extractors/test_generic_extractor.py index 2089007..76395fb 100644 --- a/tests/extractors/test_generic_extractor.py +++ b/tests/extractors/test_generic_extractor.py @@ -29,6 +29,7 @@ class TestGenericExtractor(TestExtractorBase): "proxy": None, "cookies_from_browser": False, "cookie_file": None, + "pot_provider": False, } def test_load_dropin(self): From 2fdf6b75648cb8aaaa75e29a5a92d4c8307179c6 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 18 Mar 2025 21:10:06 +0000 Subject: [PATCH 16/37] Update generic_extractor.py for general/ youtube extraction. --- .../modules/generic_extractor/generic_extractor.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 2969a72..fda01e0 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -227,7 +227,7 @@ class GenericExtractor(Extractor): if not result.get("url"): result.set_url(url) - if "description" in video_data and not result.get_content(): + if "description" in video_data and not result.get("content"): result.set_content(video_data["description"]) # extract comments if enabled if self.comments: @@ -244,10 +244,13 @@ class GenericExtractor(Extractor): ) # then add the common metadata - if timestamp := video_data.pop("timestamp", None) and not result.get("timestamp"): + timestamp = video_data.pop("timestamp", None) + if timestamp and not result.get("timestamp"): timestamp = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc).isoformat() result.set_timestamp(timestamp) - if upload_date := video_data.pop("upload_date", None) and not result.get("upload_date"): + + upload_date = video_data.pop("upload_date", None) + if upload_date and not result.get("upload_date"): upload_date = get_datetime_from_str(upload_date, "%Y%m%d").replace(tzinfo=datetime.timezone.utc) result.set("upload_date", upload_date) From fc6946f78af4d644f6fc70fd018070cb2949c72b Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 18 Mar 2025 21:43:18 +0000 Subject: [PATCH 17/37] Run format. --- .../modules/generic_extractor/generic_extractor.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index fda01e0..8821ac2 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -74,7 +74,6 @@ class GenericExtractor(Extractor): """Setup PO Token provider https://github.com/Brainicism/bgutil-ytdlp-pot-provider.""" if self.pot_provider == "bgutils": - # Check if the PO token generation script exists, set it up if not. try: subprocess.run(["bash", "scripts/potoken_provider/setup_pot_provider.sh"], check=True) @@ -83,12 +82,9 @@ class GenericExtractor(Extractor): return # Use the PO Token script in yt-dlp to fetch tokens on demand. - pot_script = os.path.join( - "scripts", "potoken_provider", "bgutil-provider", "build", "generate_once.js" - ) + pot_script = os.path.join("scripts", "potoken_provider", "bgutil-provider", "build", "generate_once.js") self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = pot_script - def suitable_extractors(self, url: str) -> Generator[str, None, None]: """ Returns a list of valid extractors for the given URL""" From 675de50ee7f6327aeb8b182cfd5d81e21e2c6375 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 19 Mar 2025 10:47:28 +0000 Subject: [PATCH 18/37] Update module test to test for default config keys within loaded --- tests/test_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index f672ca6..248e16d 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -82,7 +82,7 @@ def test_load_modules(module_name): default_config = module.configs assert loaded_module.name in loaded_module.config.keys() defaults = {k: v.get("default") for k, v in default_config.items()} - assert loaded_module.config[module_name] == defaults + assert defaults.keys() in [loaded_module.config[module_name].keys()] @pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"]) From 93921e71d428c397d2f25d9a2b9932a5668d76ed Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 19 Mar 2025 11:33:35 +0000 Subject: [PATCH 19/37] Clarify comments in pot scripts. --- scripts/potoken_provider/setup_pot_provider.sh | 11 +---------- scripts/potoken_provider/update_pot_provider.sh | 12 ++++++------ 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh index 8d27e75..b533ea3 100755 --- a/scripts/potoken_provider/setup_pot_provider.sh +++ b/scripts/potoken_provider/setup_pot_provider.sh @@ -1,5 +1,5 @@ #!/bin/bash -set -e # Exit on error +set -e SCRIPTS_DIR="scripts/potoken_provider" TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" @@ -11,7 +11,6 @@ if [ ! -d "$TARGET_DIR" ]; then exit 1 fi -# Move into the server directory cd "$TARGET_DIR" || exit 1 # Check if dependencies need installation @@ -31,12 +30,4 @@ else fi -## Ensure the script exists after transpilation -#if [ ! -f "$GEN_SCRIPT" ]; then -# echo "Error: PO Token script not found after attempting transpilation." -# exit 1 -#fi - - -# Confirm success echo "PO Token provider script is ready for use." diff --git a/scripts/potoken_provider/update_pot_provider.sh b/scripts/potoken_provider/update_pot_provider.sh index 29c8376..58113fc 100755 --- a/scripts/potoken_provider/update_pot_provider.sh +++ b/scripts/potoken_provider/update_pot_provider.sh @@ -1,26 +1,26 @@ #!/bin/bash -set -e # Exit on error +set -e SCRIPTS_DIR="scripts/potoken_provider" TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" BGUTIL_REPO="https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git" BGUTIL_TEMP_DIR="$SCRIPTS_DIR/bgutil-temp" -# Clone fresh copy into temporary directory +# Clone fresh copy of the POT generation script repo into temporary directory git clone --depth 1 "$BGUTIL_REPO" "$BGUTIL_TEMP_DIR" -# Ensure the target directory exists +# Ensure the target directory exists, clear for a fresh install rm -rf "$TARGET_DIR" mkdir -p "$TARGET_DIR" -# Copy only the contents inside /server/ into bgutil-provider +# Copy only the contents inside /server/ into bgutil-provider, +# as this is the part containing the PO Token generation script echo "Copy /server/ contents into $TARGET_DIR..." cp -r "$BGUTIL_TEMP_DIR/server/"* "$TARGET_DIR/" -# Clean up: remove the cloned repository +# Clean up: remove the cloned repository as we only needed the /server/ contents echo "Cleaning up temporary files..." rm -rf "$BGUTIL_TEMP_DIR" -# Confirm success echo "PO Token provider script is ready in: $TARGET_DIR/build" echo "Commit and push changes to include it in version control." From 633290a9ccb1ed70a4bc33458c3949bfc9ea39f9 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Tue, 25 Mar 2025 18:27:06 +0000 Subject: [PATCH 20/37] Update for pot providers list --- src/auto_archiver/modules/generic_extractor/__manifest__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 9ef1cb3..6cff94e 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -74,6 +74,11 @@ If you are having issues with the extractor, you can review the version of `yt-d "default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, + "pot_providers": { + "default": ["bgutils"], + "type": "list", + "help": "The Proof of origin provider methods to use. Set to an empty list: [] to disable.", + }, "extractor_args": { "default": {}, "help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.", From d87c0dc3a9f654b1cb373b36fc0eb1a814620d76 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 16:02:29 +0000 Subject: [PATCH 21/37] Implement update for pot plugin. --- .../generic_extractor/generic_extractor.py | 74 +++++++++---------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 3ac6ede..0ea2f7f 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -1,10 +1,13 @@ +import shutil import sys import datetime import os import importlib import subprocess +import zipfile from typing import Generator, Type +from urllib.request import urlretrieve import yt_dlp from yt_dlp.extractor.common import InfoExtractor @@ -26,64 +29,56 @@ class GenericExtractor(Extractor): _dropins = {} def setup(self): - self.check_ytdlp_update() + self.in_docker = os.environ.get("RUNNING_IN_DOCKER") + self.check_for_extractor_updates() self.setup_token_script() - def check_ytdlp_update(self): - """Handles checking and updating yt-dlp if necessary.""" - # check for file .ytdlp-update in the secrets folder + def check_for_extractor_updates(self): + """Checks whether yt-dlp or its plugins need updating and triggers a restart if so.""" if self.ytdlp_update_interval < 0: return - use_secrets = os.path.exists("secrets") - path = os.path.join("secrets" if use_secrets else "", ".ytdlp-update") - next_update_check = None - if os.path.exists(path): - with open(path, "r") as f: - next_update_check = datetime.datetime.fromisoformat(f.read()) + update_file = os.path.join("secrets" if os.path.exists("secrets") else "", ".ytdlp-update") + next_check = None + if os.path.exists(update_file): + with open(update_file, "r") as f: + next_check = datetime.datetime.fromisoformat(f.read()) - if not next_update_check or next_update_check < datetime.datetime.now(): - updated = self.update_ytdlp() + if next_check and next_check > datetime.datetime.now(): + return - next_update_check = datetime.datetime.now() + datetime.timedelta(days=self.ytdlp_update_interval) - with open(path, "w") as f: - f.write(next_update_check.isoformat()) + yt_dlp_updated = self.update_package("yt-dlp") + bgutil_updated = self.update_package("bgutil-ytdlp-pot-provider") - if not updated: - return + # Write the new timestamp + with open(update_file, "w") as f: + next_check = datetime.datetime.now() + datetime.timedelta(days=self.ytdlp_update_interval) + f.write(next_check.isoformat()) + if yt_dlp_updated or bgutil_updated: if os.environ.get("AUTO_ARCHIVER_ALLOW_RESTART", "1") != "1": - logger.warning( - "yt-dlp has been updated. Auto archiver should be restarted for these changes to take effect" - ) + logger.warning("yt-dlp or plugin was updated — please restart auto-archiver manually") else: - logger.warning("Restarting auto-archiver to apply yt-dlp update") + logger.warning("yt-dlp or plugin was updated — restarting auto-archiver") logger.warning(" ======= RESTARTING ======= ") os.execv(sys.executable, [sys.executable] + sys.argv) - def update_ytdlp(self): - logger.info("Checking and updating yt-dlp...") - logger.info( - f"Tip: change the 'ytdlp_update_interval' setting to control how often yt-dlp is updated. Set to -1 to disable or 0 to enable on every run. Current setting: {self.ytdlp_update_interval}" - ) + def update_package(self, package_name: str) -> bool: + logger.info(f"Checking and updating {package_name}...") from importlib.metadata import version as get_version - old_version = get_version("yt-dlp") + old_version = get_version(package_name) try: - # try and update with pip (this works inside poetry environment and in a normal virtualenv) - result = subprocess.run(["pip", "install", "--upgrade", "yt-dlp"], check=True, capture_output=True) - - if "Successfully installed yt-dlp" in result.stdout.decode(): - new_version = importlib.metadata.version("yt-dlp") - logger.info(f"yt-dlp successfully (from {old_version} to {new_version})") + result = subprocess.run(["pip", "install", "--upgrade", package_name], check=True, capture_output=True) + if f"Successfully installed {package_name}" in result.stdout.decode(): + new_version = importlib.metadata.version(package_name) + logger.info(f"{package_name} updated from {old_version} to {new_version}") return True - else: - logger.info("yt-dlp already up to date") - return False - + logger.info(f"{package_name} already up to date") except Exception as e: - logger.error(f"Error updating yt-dlp: {e}") - return False + logger.error(f"Error updating {package_name}: {e}") + return False + def setup_token_script(self): """Setup PO Token provider https://github.com/Brainicism/bgutil-ytdlp-pot-provider.""" @@ -100,6 +95,7 @@ class GenericExtractor(Extractor): pot_script = os.path.join("scripts", "potoken_provider", "bgutil-provider", "build", "generate_once.js") self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = pot_script + def suitable_extractors(self, url: str) -> Generator[str, None, None]: """ Returns a list of valid extractors for the given URL""" From ae523eb06faf94915c7ac1bd453ecd03c2f6ecf9 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 16:45:29 +0000 Subject: [PATCH 22/37] Udpate PO token generation script method --- .../modules/generic_extractor/__manifest__.py | 8 +- .../generic_extractor/generic_extractor.py | 91 ++++++++++++++++--- 2 files changed, 82 insertions(+), 17 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 6cff94e..227c186 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -74,10 +74,10 @@ If you are having issues with the extractor, you can review the version of `yt-d "default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, - "pot_providers": { - "default": ["bgutils"], - "type": "list", - "help": "The Proof of origin provider methods to use. Set to an empty list: [] to disable.", + "bguils_po_token_method": { + "default": "auto", + "help": "Set up a Proof of origin token provider. This process has additional requirements. See [authentication](https://auto-archiver.readthedocs.io/en/latest/how_to/authentication_how_to.html) for more information.", + "choices": ["default", "script", "disabled"], }, "extractor_args": { "default": {}, diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 0ea2f7f..0c1ce3c 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -29,9 +29,8 @@ class GenericExtractor(Extractor): _dropins = {} def setup(self): - self.in_docker = os.environ.get("RUNNING_IN_DOCKER") self.check_for_extractor_updates() - self.setup_token_script() + self.setup_po_tokens() def check_for_extractor_updates(self): """Checks whether yt-dlp or its plugins need updating and triggers a restart if so.""" @@ -79,21 +78,87 @@ class GenericExtractor(Extractor): logger.error(f"Error updating {package_name}: {e}") return False + def setup_po_tokens(self) -> None: + """Setup Proof of Origin Token method conditionally. + Uses provider: https://github.com/Brainicism/bgutil-ytdlp-pot-provider. + """ + in_docker = os.environ.get("RUNNING_IN_DOCKER") + if self.bguils_po_token_method == "disabled": + # This allows disabling of the PO Token generation script in the Docker implementation. + logger.warning("Proof of Origin Token generation is disabled.") + return - def setup_token_script(self): - """Setup PO Token provider https://github.com/Brainicism/bgutil-ytdlp-pot-provider.""" + if self.bguils_po_token_method == "default" and not in_docker: + logger.info( + "Proof of Origin Token method not explicitly set. " + "If you're running an external HTTP server separately, you can safely ignore this message. " + "To reduce the likelihood of bot detection, enable one of the methods described in the documentation: " + "https://auto-archiver.readthedocs.io/en/settings_page/installation/authentication.html#proof-of-origin-tokens" + ) + return - if self.pot_provider == "bgutils": - # Check if the PO token generation script exists, set it up if not. - try: - subprocess.run(["bash", "scripts/potoken_provider/setup_pot_provider.sh"], check=True) - except subprocess.CalledProcessError as e: - logger.error(f"Failed to setup PO Token script: {e}") + # Either running in Docker, or "script" method is set beyond this point + self.setup_token_generation_script() + + def setup_token_generation_script(self) -> None: + """ This function sets up the Proof of Origin Token generation script method for + bgutil-ytdlp-pot-provider if enabled or in Docker.""" + missing_tools = [tool for tool in ("node", "yarn", "npx") if shutil.which(tool) is None] + if missing_tools: + logger.error( + f"Cannot set up PO Token script; missing required tools: {', '.join(missing_tools)}. " + "Install these tools or run bgutils via Docker. " + "See: https://github.com/Brainicism/bgutil-ytdlp-pot-provider" + ) + return + try: + from importlib.metadata import version as get_version + + plugin_version = get_version("bgutil-ytdlp-pot-provider") + base_dir = os.path.expanduser("~/bgutil-ytdlp-pot-provider") + server_dir = os.path.join(base_dir, "server") + version_file = os.path.join(server_dir, ".VERSION") + transpiled_script = os.path.join(server_dir, "build", "generate_once.js") + + # Skip setup if version is correct and transpiled script exists + if os.path.isfile(transpiled_script) and os.path.isfile(version_file): + with open(version_file) as vf: + if vf.read().strip() == plugin_version: + logger.info("PO Token script already set up and up to date.") + else: + # Remove an outdated directory and pull a new version + if os.path.exists(base_dir): + shutil.rmtree(base_dir) + os.makedirs(base_dir, exist_ok=True) + + zip_url = f"https://github.com/Brainicism/bgutil-ytdlp-pot-provider/archive/refs/tags/{plugin_version}.zip" + zip_path = os.path.join(base_dir, f"{plugin_version}.zip") + logger.info(f"Downloading bgutils release zip for version {plugin_version}...") + urlretrieve(zip_url, zip_path) + with zipfile.ZipFile(zip_path, "r") as z: + z.extractall(base_dir) + os.remove(zip_path) + + extracted_root = os.path.join(base_dir, f"bgutil-ytdlp-pot-provider-{plugin_version}") + shutil.move(os.path.join(extracted_root, "server"), server_dir) + shutil.rmtree(extracted_root) + logger.info("Installing dependencies and transpiling PoT Generator script...") + subprocess.run(["yarn", "install", "--frozen-lockfile"], cwd=server_dir, check=True) + subprocess.run(["npx", "tsc"], cwd=server_dir, check=True) + + with open(version_file, "w") as vf: + vf.write(plugin_version) + + script_path = os.path.join(server_dir, "build", "generate_once.js") + if not os.path.exists(script_path): + logger.error("generate_once.js not found after transpilation.") return - # Use the PO Token script in yt-dlp to fetch tokens on demand. - pot_script = os.path.join("scripts", "potoken_provider", "bgutil-provider", "build", "generate_once.js") - self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = pot_script + self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = script_path + logger.info(f"PO Token script configured at: {script_path}") + + except Exception as e: + logger.error(f"Failed to set up PO Token script: {e}") def suitable_extractors(self, url: str) -> Generator[str, None, None]: From 4a02407659ab0e5302f3649c34dbe763d88c8eb9 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 16:46:21 +0000 Subject: [PATCH 23/37] Typo fix. --- docs/source/how_to/authentication_how_to.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/how_to/authentication_how_to.md b/docs/source/how_to/authentication_how_to.md index 0e842fb..3fc8a12 100644 --- a/docs/source/how_to/authentication_how_to.md +++ b/docs/source/how_to/authentication_how_to.md @@ -106,5 +106,6 @@ Finally,Some important things to remember: ## Authenticating on XXXX site with username/password -```{note} This section is still under construction 🚧 +```{note} +This section is still under construction 🚧 ``` From 565275ac37c3c018c58d66c53c9cee133b55185e Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 16:59:01 +0000 Subject: [PATCH 24/37] Basic documentation for POT process --- docs/source/how_to/authentication_how_to.md | 83 +++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/docs/source/how_to/authentication_how_to.md b/docs/source/how_to/authentication_how_to.md index 3fc8a12..a9ecc3f 100644 --- a/docs/source/how_to/authentication_how_to.md +++ b/docs/source/how_to/authentication_how_to.md @@ -109,3 +109,86 @@ Finally,Some important things to remember: ```{note} This section is still under construction 🚧 ``` + + +# Proof of Origin Tokens + +YouTube uses **Proof of Origin Tokens (POT)** as part of its bot detection system to verify that requests originate from valid clients. If a token is missing or invalid, some videos may return errors like "Sign in to confirm you're not a bot." + +yt-dlp provides [a detailed guide to POTs](https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide). + +### How we can add POTs to Auto Archiver +This feature is enabled for the Generic Archiver via two yt-dlp plugins: + +- **Client-side plugin**: [yt-dlp-get-pot](https://github.com/coletdjnz/yt-dlp-get-pot) + Detects when a token is required and requests one from a provider. + +- **Provider plugin**: [bgutil-ytdlp-pot-provider](https://github.com/Brainicism/bgutil-ytdlp-pot-provider) + Includes both a Python plugin and a **Node.js server or script** to generate the token. + +These are installed in our Poetry environment. + +### Integration Methods + +**Docker**: + +When running the Auto Archiver using the Docker image, we use the [Node.js token generation script](https://github.com/Brainicism/bgutil-ytdlp-pot-provider/tree/master/server). +This is to avoid managing a separate server process, and is handled automatically inside the Docker container when needed. + +**PyPi/ Local**: + +When using the Auto Archiver PyPI package, or running locally, you will need additional system requirements to run the token generation script, namely either Docker, or Node.js and Yarn. + +See the [bgutil-ytdlp-pot-provider](https://github.com/Brainicism/bgutil-ytdlp-pot-provider?tab=readme-ov-file#a-http-server-option) documentation for more details. + +- You can set the config option `"po_token_provider": true` under the `GenericExtractor` section of your config to "script" to enable the token generation script process locally. +- Or you can run the bgutil-ytdlp-pot-provider server separately using their Docker image. + +### Notes + +- The token generation script is only triggered when needed by yt-dlp, so it should have no effect unless YouTube requests a POT. +- If you're running the Auto Archiver in Docker, this is set up automatically. +- If you're running locally, you'll need to run the setup script manually or enable the feature in your config. + +Configurations: +- **default**: In Docker this downloads, transpiles and creates a token generation script. Locally it does nothing. If you are running the bgutil-ytdlp-pot-provider server via Docker you can choose this. +- **script**: Download and create the node script, even outside of Docker. +- **disabled**: Disable POT generation, even in docker. + +### Advanced Configuration + +If you change the default port of the bgutil-ytdlp-pot-provider server, you can pass the updated values using our `extractor_args` option for the gereric extractor. + +```yaml +generic_extractor: + ytdlp_args: "--no-abort-on-error --abort-on-error --verbose" + ytdlp_update_interval: 5 + bguils_po_token_method: "script" + extractor_args: + youtube: + getpot_bgutil_baseurl: "http://127.0.0.1:8080" + player_client: web,tv +``` +For more details on this for bgutils see [here](https://github.com/Brainicism/bgutil-ytdlp-pot-provider?tab=readme-ov-file#usage) + +### Checking the logs + +To verify that the POT process working, look for the following lines in your log after adding the config option: + +```shell +[GetPOT] BgUtilScript: Generating POT via script: /Users/you/bgutil-ytdlp-pot-provider/server/build/generate_once.js +[debug] [GetPOT] BgUtilScript: Executing command to get POT via script: /Users/you/.nvm/versions/node/v20.18.0/bin/node /Users/you/bgutil-ytdlp-pot-provider/server/build/generate_once.js -v ymCMy8OflKM +[debug] [GetPOT] BgUtilScript: stdout: +{"poToken":"MlMxojNFhEJvUzGeHEkVRSK_luXtwcDnwSNIOgaUutqB7t99nmlNvtWgYayboopG6ZopZgmQ-6PJCWEMHv89MIiFGGlJRY25Fkwzxmia_8uYgf5AWf==","generatedAt":"2025-03-26T10:45:26.156Z","visitIdentifier":"ymCMy8OflKM"} +[debug] [GetPOT] Fetching gvs PO Token for tv client +``` + +If it can't find the script, you'll see: +```shell +[debug] [GetPOT] Fetching player PO Token for tv client +WARNING: [GetPOT] BgUtilScript: Script path doesn't exist: /Users/you/bgutil-ytdlp-pot-provider/server/build/generate_once.js. Please make sure the script has been transpiled correctly. +WARNING: [GetPOT] BgUtilHTTP: Error reaching GET http://127.0.0.1:4416/ping (caused by TransportError). Please make sure that the server is reachable at http://127.0.0.1:4416. +[debug] [GetPOT] No player PO Token provider available for tv client +``` + +In this case check that the script has been transpiled correctly and is available at the path specified in the log. From 23e7dd0995cfceeed9f8bd107ebf0d8bbff22beb Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 17:00:31 +0000 Subject: [PATCH 25/37] Remove old implementaion --- scripts/potoken_provider/.gitignore | 2 -- .../potoken_provider/setup_pot_provider.sh | 33 ------------------- .../potoken_provider/update_pot_provider.sh | 26 --------------- 3 files changed, 61 deletions(-) delete mode 100644 scripts/potoken_provider/.gitignore delete mode 100755 scripts/potoken_provider/setup_pot_provider.sh delete mode 100755 scripts/potoken_provider/update_pot_provider.sh diff --git a/scripts/potoken_provider/.gitignore b/scripts/potoken_provider/.gitignore deleted file mode 100644 index b044bd7..0000000 --- a/scripts/potoken_provider/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ - -bgutil-provider/node_modules \ No newline at end of file diff --git a/scripts/potoken_provider/setup_pot_provider.sh b/scripts/potoken_provider/setup_pot_provider.sh deleted file mode 100755 index b533ea3..0000000 --- a/scripts/potoken_provider/setup_pot_provider.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -set -e - -SCRIPTS_DIR="scripts/potoken_provider" -TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" -GEN_SCRIPT="$TARGET_DIR/build/generate_once.js" - -# Ensure the server directory exists -if [ ! -d "$TARGET_DIR" ]; then - echo "Error: PO Token provider server directory is missing! Please run scripts/update_pot_provider.sh first." - exit 1 -fi - -cd "$TARGET_DIR" || exit 1 - -# Check if dependencies need installation -if [ ! -d "node_modules" ]; then - echo "Installing dependencies..." - yarn install --frozen-lockfile -else - echo "Dependencies already installed. Skipping yarn install." -fi - -# Check if build directory exists and if transpiling is needed -if [ ! -d "build" ]; then - echo "Build directory missing or outdated. Running transpilation..." - npx tsc -else - echo "Build directory is up to date. Skipping transpilation." -fi - - -echo "PO Token provider script is ready for use." diff --git a/scripts/potoken_provider/update_pot_provider.sh b/scripts/potoken_provider/update_pot_provider.sh deleted file mode 100755 index 58113fc..0000000 --- a/scripts/potoken_provider/update_pot_provider.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -set -e - -SCRIPTS_DIR="scripts/potoken_provider" -TARGET_DIR="$SCRIPTS_DIR/bgutil-provider" -BGUTIL_REPO="https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git" -BGUTIL_TEMP_DIR="$SCRIPTS_DIR/bgutil-temp" - -# Clone fresh copy of the POT generation script repo into temporary directory -git clone --depth 1 "$BGUTIL_REPO" "$BGUTIL_TEMP_DIR" - -# Ensure the target directory exists, clear for a fresh install -rm -rf "$TARGET_DIR" -mkdir -p "$TARGET_DIR" - -# Copy only the contents inside /server/ into bgutil-provider, -# as this is the part containing the PO Token generation script -echo "Copy /server/ contents into $TARGET_DIR..." -cp -r "$BGUTIL_TEMP_DIR/server/"* "$TARGET_DIR/" - -# Clean up: remove the cloned repository as we only needed the /server/ contents -echo "Cleaning up temporary files..." -rm -rf "$BGUTIL_TEMP_DIR" - -echo "PO Token provider script is ready in: $TARGET_DIR/build" -echo "Commit and push changes to include it in version control." From 093ce34a6a3a63a4de02ce0d41163c11304238ac Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 17:02:20 +0000 Subject: [PATCH 26/37] Ruff format. --- .../modules/generic_extractor/generic_extractor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 0c1ce3c..88f7b38 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -80,7 +80,7 @@ class GenericExtractor(Extractor): def setup_po_tokens(self) -> None: """Setup Proof of Origin Token method conditionally. - Uses provider: https://github.com/Brainicism/bgutil-ytdlp-pot-provider. + Uses provider: https://github.com/Brainicism/bgutil-ytdlp-pot-provider. """ in_docker = os.environ.get("RUNNING_IN_DOCKER") if self.bguils_po_token_method == "disabled": @@ -101,8 +101,8 @@ class GenericExtractor(Extractor): self.setup_token_generation_script() def setup_token_generation_script(self) -> None: - """ This function sets up the Proof of Origin Token generation script method for - bgutil-ytdlp-pot-provider if enabled or in Docker.""" + """This function sets up the Proof of Origin Token generation script method for + bgutil-ytdlp-pot-provider if enabled or in Docker.""" missing_tools = [tool for tool in ("node", "yarn", "npx") if shutil.which(tool) is None] if missing_tools: logger.error( @@ -131,7 +131,9 @@ class GenericExtractor(Extractor): shutil.rmtree(base_dir) os.makedirs(base_dir, exist_ok=True) - zip_url = f"https://github.com/Brainicism/bgutil-ytdlp-pot-provider/archive/refs/tags/{plugin_version}.zip" + zip_url = ( + f"https://github.com/Brainicism/bgutil-ytdlp-pot-provider/archive/refs/tags/{plugin_version}.zip" + ) zip_path = os.path.join(base_dir, f"{plugin_version}.zip") logger.info(f"Downloading bgutils release zip for version {plugin_version}...") urlretrieve(zip_url, zip_path) @@ -160,7 +162,6 @@ class GenericExtractor(Extractor): except Exception as e: logger.error(f"Failed to set up PO Token script: {e}") - def suitable_extractors(self, url: str) -> Generator[str, None, None]: """ Returns a list of valid extractors for the given URL""" From 2cad5edea8f513b10235941ec031890c6479f0c3 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 17:33:00 +0000 Subject: [PATCH 27/37] Fix default config --- docs/source/how_to/authentication_how_to.md | 2 ++ src/auto_archiver/modules/generic_extractor/__manifest__.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/how_to/authentication_how_to.md b/docs/source/how_to/authentication_how_to.md index a9ecc3f..3363c85 100644 --- a/docs/source/how_to/authentication_how_to.md +++ b/docs/source/how_to/authentication_how_to.md @@ -141,6 +141,8 @@ When using the Auto Archiver PyPI package, or running locally, you will need add See the [bgutil-ytdlp-pot-provider](https://github.com/Brainicism/bgutil-ytdlp-pot-provider?tab=readme-ov-file#a-http-server-option) documentation for more details. +⚠️WARNING⚠️: This will add the server scripts to the home directory of wherever this is running. + - You can set the config option `"po_token_provider": true` under the `GenericExtractor` section of your config to "script" to enable the token generation script process locally. - Or you can run the bgutil-ytdlp-pot-provider server separately using their Docker image. diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 227c186..f34151c 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -75,7 +75,7 @@ If you are having issues with the extractor, you can review the version of `yt-d "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, "bguils_po_token_method": { - "default": "auto", + "default": "default", "help": "Set up a Proof of origin token provider. This process has additional requirements. See [authentication](https://auto-archiver.readthedocs.io/en/latest/how_to/authentication_how_to.html) for more information.", "choices": ["default", "script", "disabled"], }, From 456b2746c8187c488c6c6bf3991cb6ef22285bb8 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 18:01:20 +0000 Subject: [PATCH 28/37] Update the docs --- docs/source/how_to/authentication_how_to.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/how_to/authentication_how_to.md b/docs/source/how_to/authentication_how_to.md index 3363c85..025e65b 100644 --- a/docs/source/how_to/authentication_how_to.md +++ b/docs/source/how_to/authentication_how_to.md @@ -151,6 +151,7 @@ See the [bgutil-ytdlp-pot-provider](https://github.com/Brainicism/bgutil-ytdlp-p - The token generation script is only triggered when needed by yt-dlp, so it should have no effect unless YouTube requests a POT. - If you're running the Auto Archiver in Docker, this is set up automatically. - If you're running locally, you'll need to run the setup script manually or enable the feature in your config. +- You can set up both the server and the script, and the plugin will fallback on each other if needed. This is recommended for robustness! Configurations: - **default**: In Docker this downloads, transpiles and creates a token generation script. Locally it does nothing. If you are running the bgutil-ytdlp-pot-provider server via Docker you can choose this. @@ -185,7 +186,7 @@ To verify that the POT process working, look for the following lines in your log [debug] [GetPOT] Fetching gvs PO Token for tv client ``` -If it can't find the script, you'll see: +If it can't find the script or something, you'll see something like this: ```shell [debug] [GetPOT] Fetching player PO Token for tv client WARNING: [GetPOT] BgUtilScript: Script path doesn't exist: /Users/you/bgutil-ytdlp-pot-provider/server/build/generate_once.js. Please make sure the script has been transpiled correctly. @@ -193,4 +194,6 @@ WARNING: [GetPOT] BgUtilHTTP: Error reaching GET http://127.0.0.1:4416/ping (cau [debug] [GetPOT] No player PO Token provider available for tv client ``` -In this case check that the script has been transpiled correctly and is available at the path specified in the log. +In this case check that the script has been transpiled correctly and is available at the path specified in the log, +or that the server is running and reachable. + From f54b5c5f186f4786c175dc217b218c7b6edf9672 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 26 Mar 2025 18:05:04 +0000 Subject: [PATCH 29/37] Update poetry.lock --- poetry.lock | 126 ++++++++++++++++++++-------------------------------- 1 file changed, 49 insertions(+), 77 deletions(-) diff --git a/poetry.lock b/poetry.lock index 631c1d6..8b3c9f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -51,7 +51,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] +test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -82,12 +82,12 @@ files = [ ] [package.extras] -benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "authlib" @@ -133,7 +133,7 @@ files = [ ] [package.extras] -dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] +dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"] [[package]] name = "beautifulsoup4" @@ -158,6 +158,21 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "bgutil-ytdlp-pot-provider" +version = "0.7.4" +description = "" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "bgutil_ytdlp_pot_provider-0.7.4-py3-none-any.whl", hash = "sha256:5f0b1d884fec66dff703c421ea06f5fc9b11022d9c0babdaa0cab13ed99b9d77"}, + {file = "bgutil_ytdlp_pot_provider-0.7.4.tar.gz", hash = "sha256:b6c1462b8f979540078085cd82462ef967b8b70cd0810d469243a31f5081e5c6"}, +] + +[package.dependencies] +yt-dlp-get-pot = ">=0.1.1" + [[package]] name = "boto3" version = "1.37.18" @@ -645,10 +660,10 @@ files = [ cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0) ; python_version >= \"3.8\""] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"] docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] -nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2) ; python_version >= \"3.8\""] -pep8test = ["check-sdist ; python_version >= \"3.8\"", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] +nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"] +pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] test = ["certifi (>=2024)", "cryptography-vectors (==44.0.2)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] @@ -766,7 +781,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] +typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "future" @@ -801,7 +816,7 @@ requests = ">=2.18.0,<3.0.0" [package.extras] async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] @@ -1100,7 +1115,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] +dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"] [[package]] name = "markdown-it-py" @@ -1564,7 +1579,7 @@ docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] -typing = ["typing-extensions ; python_version < \"3.10\""] +typing = ["typing-extensions"] xmp = ["defusedxml"] [[package]] @@ -2265,23 +2280,6 @@ files = [ [package.dependencies] rich = ">=11.0.0" -[[package]] -name = "roman-numerals-py" -version = "3.1.0" -description = "Manipulate well-formed Roman numerals" -optional = false -python-versions = ">=3.9" -groups = ["docs"] -markers = "python_version >= \"3.12\"" -files = [ - {file = "roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c"}, - {file = "roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d"}, -] - -[package.extras] -lint = ["mypy (==1.15.0)", "pyright (==1.1.394)", "ruff (==0.9.7)"] -test = ["pytest (>=8)"] - [[package]] name = "rsa" version = "4.9" @@ -2506,7 +2504,6 @@ description = "Python documentation generator" optional = false python-versions = ">=3.10" groups = ["docs"] -markers = "python_version < \"3.12\"" files = [ {file = "sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"}, {file = "sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927"}, @@ -2536,43 +2533,6 @@ docs = ["sphinxcontrib-websupport"] lint = ["flake8 (>=6.0)", "mypy (==1.11.1)", "pyright (==1.1.384)", "pytest (>=6.0)", "ruff (==0.6.9)", "sphinx-lint (>=0.9)", "tomli (>=2)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.18.0.20240506)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241005)", "types-requests (==2.32.0.20240914)", "types-urllib3 (==1.26.25.14)"] test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"] -[[package]] -name = "sphinx" -version = "8.2.3" -description = "Python documentation generator" -optional = false -python-versions = ">=3.11" -groups = ["docs"] -markers = "python_version >= \"3.12\"" -files = [ - {file = "sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3"}, - {file = "sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348"}, -] - -[package.dependencies] -alabaster = ">=0.7.14" -babel = ">=2.13" -colorama = {version = ">=0.4.6", markers = "sys_platform == \"win32\""} -docutils = ">=0.20,<0.22" -imagesize = ">=1.3" -Jinja2 = ">=3.1" -packaging = ">=23.0" -Pygments = ">=2.17" -requests = ">=2.30.0" -roman-numerals-py = ">=1.0.0" -snowballstemmer = ">=2.2" -sphinxcontrib-applehelp = ">=1.0.7" -sphinxcontrib-devhelp = ">=1.0.6" -sphinxcontrib-htmlhelp = ">=2.0.6" -sphinxcontrib-jsmath = ">=1.0.1" -sphinxcontrib-qthelp = ">=1.0.6" -sphinxcontrib-serializinghtml = ">=1.1.9" - -[package.extras] -docs = ["sphinxcontrib-websupport"] -lint = ["betterproto (==2.0.0b6)", "mypy (==1.15.0)", "pypi-attestations (==0.0.21)", "pyright (==1.1.395)", "pytest (>=8.0)", "ruff (==0.9.9)", "sphinx-lint (>=0.9)", "types-Pillow (==10.2.0.20240822)", "types-Pygments (==2.19.0.20250219)", "types-colorama (==0.4.15.20240311)", "types-defusedxml (==0.7.0.20240218)", "types-docutils (==0.21.0.20241128)", "types-requests (==2.32.0.20241016)", "types-urllib3 (==1.26.25.14)"] -test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "pytest-xdist[psutil] (>=3.4)", "setuptools (>=70.0)", "typing_extensions (>=4.9)"] - [[package]] name = "sphinx-autoapi" version = "3.6.0" @@ -3031,7 +2991,7 @@ files = [ pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""} [package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -3054,7 +3014,7 @@ h11 = ">=0.8" typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] -standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "virtualenv" @@ -3075,7 +3035,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] [[package]] name = "vk-api" @@ -3337,7 +3297,7 @@ files = [ ] [package.extras] -dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] +dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] [[package]] name = "wsproto" @@ -3368,15 +3328,27 @@ files = [ [package.extras] build = ["build", "hatchling", "pip", "setuptools (>=71.0.2)", "wheel"] -curl-cffi = ["curl-cffi (==0.5.10) ; os_name == \"nt\" and implementation_name == \"cpython\"", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2) ; os_name != \"nt\" and implementation_name == \"cpython\""] -default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"] +curl-cffi = ["curl-cffi (==0.5.10)", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2)"] +default = ["brotli", "brotlicffi", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"] dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.11.0,<0.12.0)"] pyinstaller = ["pyinstaller (>=6.11.1)"] secretstorage = ["cffi", "secretstorage"] static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.11.0,<0.12.0)"] test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"] +[[package]] +name = "yt-dlp-get-pot" +version = "0.3.0" +description = "" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "yt_dlp_get_pot-0.3.0-py3-none-any.whl", hash = "sha256:a49a596a3e3c02cd9ce051192ea3fe8168cf24ece8954bed6aa331a87d86954f"}, + {file = "yt_dlp_get_pot-0.3.0.tar.gz", hash = "sha256:ac9530b9e7b3d667235b9119da475f595d2dc7e6f6bbf98b965011be454e8833"}, +] + [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "ac5d473189adbadb3ee5d8a36e1898a39725755704e0677768303ae46bc246c8" +content-hash = "c612e9f98ca5199092141bb04a0de4cd5314a8fdc8cb12c1d63eafe26bbf16aa" From bc35116975c956561621960743ccf45c1f204c06 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 10:37:13 +0000 Subject: [PATCH 30/37] Update poetry.lock --- poetry.lock | 74 ++++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8b3c9f9..8a2819a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -175,18 +175,18 @@ yt-dlp-get-pot = ">=0.1.1" [[package]] name = "boto3" -version = "1.37.18" +version = "1.37.22" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.37.18-py3-none-any.whl", hash = "sha256:1545c943f36db41853cdfdb6ff09c4eda9220dd95bd2fae76fc73091603525d1"}, - {file = "boto3-1.37.18.tar.gz", hash = "sha256:9b272268794172b0b8bb9fb1f3c470c3b6c0ffb92fbd4882465cc740e40fbdcd"}, + {file = "boto3-1.37.22-py3-none-any.whl", hash = "sha256:a14324d5fa5f4fea00c0e3c69754cbd28100f7fe194693eeecf2dc07446cf4ef"}, + {file = "boto3-1.37.22.tar.gz", hash = "sha256:78a0ec0aafbf6044104c98ad80b69e6d1c83d8233fda2c2d241029e6c705c510"}, ] [package.dependencies] -botocore = ">=1.37.18,<1.38.0" +botocore = ">=1.37.22,<1.38.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -195,14 +195,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.37.18" +version = "1.37.22" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.37.18-py3-none-any.whl", hash = "sha256:a8b97d217d82b3c4f6bcc906e264df7ebb51e2c6a62b3548a97cd173fb8759a1"}, - {file = "botocore-1.37.18.tar.gz", hash = "sha256:99e8eefd5df6347ead15df07ce55f4e62a51ea7b54de1127522a08597923b726"}, + {file = "botocore-1.37.22-py3-none-any.whl", hash = "sha256:184db7c9314d13002bc827f511a5140574b5da1acda342d51e093dad6317de98"}, + {file = "botocore-1.37.22.tar.gz", hash = "sha256:b3b26f1a90236bcd17d4092f8c85a256b44e9955a16b633319a2f5678d605e9f"}, ] [package.dependencies] @@ -822,14 +822,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-api-python-client" -version = "2.165.0" +version = "2.166.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "google_api_python_client-2.165.0-py2.py3-none-any.whl", hash = "sha256:4eaab7d4a20be0d3d1dde462fa95e9e0ccc2a3e177a656701bf73fe738ddef7d"}, - {file = "google_api_python_client-2.165.0.tar.gz", hash = "sha256:0d2aee76727a104705630bebbc43669c864b766924e9329051ef7b7e2468eb72"}, + {file = "google_api_python_client-2.166.0-py2.py3-none-any.whl", hash = "sha256:dd8cc74d9fc18538ab05cbd2e93cb4f82382f910c5f6945db06c91f1deae6e45"}, + {file = "google_api_python_client-2.166.0.tar.gz", hash = "sha256:b8cf843bd9d736c134aef76cf1dc7a47c9283a2ef24267b97207b9dd43b30ef7"}, ] [package.dependencies] @@ -1654,21 +1654,21 @@ testing = ["google-api-core (>=1.31.5)"] [[package]] name = "protobuf" -version = "6.30.1" +version = "6.30.2" description = "" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "protobuf-6.30.1-cp310-abi3-win32.whl", hash = "sha256:ba0706f948d0195f5cac504da156d88174e03218d9364ab40d903788c1903d7e"}, - {file = "protobuf-6.30.1-cp310-abi3-win_amd64.whl", hash = "sha256:ed484f9ddd47f0f1bf0648806cccdb4fe2fb6b19820f9b79a5adf5dcfd1b8c5f"}, - {file = "protobuf-6.30.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:aa4f7dfaed0d840b03d08d14bfdb41348feaee06a828a8c455698234135b4075"}, - {file = "protobuf-6.30.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:47cd320b7db63e8c9ac35f5596ea1c1e61491d8a8eb6d8b45edc44760b53a4f6"}, - {file = "protobuf-6.30.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e3083660225fa94748ac2e407f09a899e6a28bf9c0e70c75def8d15706bf85fc"}, - {file = "protobuf-6.30.1-cp39-cp39-win32.whl", hash = "sha256:554d7e61cce2aa4c63ca27328f757a9f3867bce8ec213bf09096a8d16bcdcb6a"}, - {file = "protobuf-6.30.1-cp39-cp39-win_amd64.whl", hash = "sha256:b510f55ce60f84dc7febc619b47215b900466e3555ab8cb1ba42deb4496d6cc0"}, - {file = "protobuf-6.30.1-py3-none-any.whl", hash = "sha256:3c25e51e1359f1f5fa3b298faa6016e650d148f214db2e47671131b9063c53be"}, - {file = "protobuf-6.30.1.tar.gz", hash = "sha256:535fb4e44d0236893d5cf1263a0f706f1160b689a7ab962e9da8a9ce4050b780"}, + {file = "protobuf-6.30.2-cp310-abi3-win32.whl", hash = "sha256:b12ef7df7b9329886e66404bef5e9ce6a26b54069d7f7436a0853ccdeb91c103"}, + {file = "protobuf-6.30.2-cp310-abi3-win_amd64.whl", hash = "sha256:7653c99774f73fe6b9301b87da52af0e69783a2e371e8b599b3e9cb4da4b12b9"}, + {file = "protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:0eb523c550a66a09a0c20f86dd554afbf4d32b02af34ae53d93268c1f73bc65b"}, + {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:50f32cc9fd9cb09c783ebc275611b4f19dfdfb68d1ee55d2f0c7fa040df96815"}, + {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4f6c687ae8efae6cf6093389a596548214467778146b7245e886f35e1485315d"}, + {file = "protobuf-6.30.2-cp39-cp39-win32.whl", hash = "sha256:524afedc03b31b15586ca7f64d877a98b184f007180ce25183d1a5cb230ee72b"}, + {file = "protobuf-6.30.2-cp39-cp39-win_amd64.whl", hash = "sha256:acec579c39c88bd8fbbacab1b8052c793efe83a0a5bd99db4a31423a25c0a0e2"}, + {file = "protobuf-6.30.2-py3-none-any.whl", hash = "sha256:ae86b030e69a98e08c77beab574cbcb9fff6d031d57209f574a5aea1445f4b51"}, + {file = "protobuf-6.30.2.tar.gz", hash = "sha256:35c859ae076d8c56054c25b59e5e59638d86545ed6e2b6efac6be0b6ea3ba048"}, ] [[package]] @@ -1696,18 +1696,18 @@ files = [ [[package]] name = "pyasn1-modules" -version = "0.4.1" +version = "0.4.2" description = "A collection of ASN.1-based protocols modules" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"}, - {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"}, + {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, + {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, ] [package.dependencies] -pyasn1 = ">=0.4.6,<0.7.0" +pyasn1 = ">=0.6.1,<0.7.0" [[package]] name = "pycodestyle" @@ -1819,14 +1819,14 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyparsing" -version = "3.2.2" +version = "3.2.3" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pyparsing-3.2.2-py3-none-any.whl", hash = "sha256:6ab05e1cb111cc72acc8ed811a3ca4c2be2af8d7b6df324347f04fd057d8d793"}, - {file = "pyparsing-3.2.2.tar.gz", hash = "sha256:2a857aee851f113c2de9d4bfd9061baea478cb0f1c7ca6cbf594942d6d111575"}, + {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"}, + {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"}, ] [package.extras] @@ -1980,14 +1980,14 @@ requests = ">=2.28" [[package]] name = "pytz" -version = "2025.1" +version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"}, - {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"}, + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] [[package]] @@ -2891,14 +2891,14 @@ wsproto = ">=0.14" [[package]] name = "typing-extensions" -version = "4.12.2" +version = "4.13.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" groups = ["main", "docs"] files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, + {file = "typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5"}, + {file = "typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b"}, ] [[package]] @@ -3316,19 +3316,19 @@ h11 = ">=0.9.0,<1" [[package]] name = "yt-dlp" -version = "2025.3.21" +version = "2025.3.27" description = "A feature-rich command-line audio/video downloader" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "yt_dlp-2025.3.21-py3-none-any.whl", hash = "sha256:80d5ce15f9223e0c27020b861a4c5b72c6ba5d6c957c1b8fd2a022a69783f482"}, - {file = "yt_dlp-2025.3.21.tar.gz", hash = "sha256:5bcf47b2897254ea3816935a8dde47d243bff556782cced6b16a2b85e6b682ba"}, + {file = "yt_dlp-2025.3.27-py3-none-any.whl", hash = "sha256:bef4105af0f25c13c1a505daf46142af45d9312dc2c3553651db083e6860e641"}, + {file = "yt_dlp-2025.3.27.tar.gz", hash = "sha256:30cb078f803b539b2a66521c5ec86d328c07f74aeca9440069e58619c299cf15"}, ] [package.extras] build = ["build", "hatchling", "pip", "setuptools (>=71.0.2)", "wheel"] -curl-cffi = ["curl-cffi (==0.5.10)", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2)"] +curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || ==0.10.*)"] default = ["brotli", "brotlicffi", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"] dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.11.0,<0.12.0)"] pyinstaller = ["pyinstaller (>=6.11.1)"] From efab0f9a91cafddb9d313fd8843e0b05ab4ceafc Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 10:37:22 +0000 Subject: [PATCH 31/37] Add test --- tests/extractors/test_generic_extractor.py | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/extractors/test_generic_extractor.py b/tests/extractors/test_generic_extractor.py index 1339d33..6becbea 100644 --- a/tests/extractors/test_generic_extractor.py +++ b/tests/extractors/test_generic_extractor.py @@ -292,3 +292,43 @@ class TestGenericExtractor(TestExtractorBase): post = self.extractor.download(make_item(url)) assert "Bellingcat researcher Kolina Koltai delves deeper into Clothoff" in post.get("content") assert post.get_title() == "Bellingcat" + + +class TestGenericExtractorPoToken: + @pytest.fixture + def extractor(self, mocker): + extractor = GenericExtractor() + extractor.extractor_args = {} + extractor.setup_token_generation_script = mocker.Mock() + return extractor + + def test_po_token_disabled_does_not_call_setup(self, extractor): + extractor.bguils_po_token_method = "disabled" + extractor.in_docker = True + extractor.setup_po_tokens() + extractor.setup_token_generation_script.assert_not_called() + + def test_po_token_default_in_docker_calls_setup(self, extractor, mocker): + extractor.bguils_po_token_method = "default" + mocker.patch.dict(os.environ, {"RUNNING_IN_DOCKER": "1"}) + extractor.setup_po_tokens() + extractor.setup_token_generation_script.assert_called_once() + + def test_po_token_default_local_does_not_call_setup(self, extractor, caplog, mocker): + extractor.bguils_po_token_method = "default" + # clears env vars for this test + mocker.patch.dict(os.environ, {}, clear=True) + extractor.setup_po_tokens() + extractor.setup_token_generation_script.assert_not_called() + assert "Proof of Origin Token method not explicitly set" in caplog.text + + def test_po_token_script_always_calls_setup(self, extractor): + extractor.bguils_po_token_method = "script" + extractor.in_docker = False + extractor.setup_po_tokens() + extractor.setup_token_generation_script.assert_called_once() + extractor.setup_token_generation_script.reset_mock() + extractor.in_docker = True + extractor.setup_po_tokens() + extractor.setup_token_generation_script.assert_called_once() + From 0840b7283cf0db87fd05549a7abf56474a38e0f0 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 10:43:00 +0000 Subject: [PATCH 32/37] Format --- tests/extractors/test_generic_extractor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/extractors/test_generic_extractor.py b/tests/extractors/test_generic_extractor.py index b221f3e..b096c63 100644 --- a/tests/extractors/test_generic_extractor.py +++ b/tests/extractors/test_generic_extractor.py @@ -331,4 +331,3 @@ class TestGenericExtractorPoToken: extractor.in_docker = True extractor.setup_po_tokens() extractor.setup_token_generation_script.assert_called_once() - From dbcf19d1b83921ecd2f33b9305a947c7b7affb04 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 10:55:21 +0000 Subject: [PATCH 33/37] Update update path reference --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6377646..ba7b48d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,7 @@ TESTS_TO_RUN_LAST = ["test_generic_archiver", "test_twitter_api_archiver"] @pytest.fixture(autouse=True) def skip_check_for_update(mocker): update_ytdlp = mocker.patch( - "auto_archiver.modules.generic_extractor.generic_extractor.GenericExtractor.update_ytdlp" + "auto_archiver.modules.generic_extractor.generic_extractor.GenericExtractor.update_package" ) update_ytdlp.return_value = False From 1db7d6702d7ee9dafaca7a5090c51be018a222f7 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 12:27:18 +0000 Subject: [PATCH 34/37] Update the documentation --- docs/source/how_to/authentication_how_to.md | 38 ++++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/docs/source/how_to/authentication_how_to.md b/docs/source/how_to/authentication_how_to.md index 025e65b..334c9ac 100644 --- a/docs/source/how_to/authentication_how_to.md +++ b/docs/source/how_to/authentication_how_to.md @@ -117,7 +117,7 @@ YouTube uses **Proof of Origin Tokens (POT)** as part of its bot detection syste yt-dlp provides [a detailed guide to POTs](https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide). -### How we can add POTs to Auto Archiver +### How Auto Archiver Uses POT This feature is enabled for the Generic Archiver via two yt-dlp plugins: - **Client-side plugin**: [yt-dlp-get-pot](https://github.com/coletdjnz/yt-dlp-get-pot) @@ -130,11 +130,17 @@ These are installed in our Poetry environment. ### Integration Methods -**Docker**: +**Docker (Recommended)**: When running the Auto Archiver using the Docker image, we use the [Node.js token generation script](https://github.com/Brainicism/bgutil-ytdlp-pot-provider/tree/master/server). This is to avoid managing a separate server process, and is handled automatically inside the Docker container when needed. +This is already included in the Docker image, however if you need to disable this you can set the config option `bguils_po_token_method` under the `generic_extractor` section of your `orchestration.yaml` config file to "disabled". +```yaml +generic_extractor: + bguils_po_token_method: "disabled" +``` + **PyPi/ Local**: When using the Auto Archiver PyPI package, or running locally, you will need additional system requirements to run the token generation script, namely either Docker, or Node.js and Yarn. @@ -143,8 +149,8 @@ See the [bgutil-ytdlp-pot-provider](https://github.com/Brainicism/bgutil-ytdlp-p ⚠️WARNING⚠️: This will add the server scripts to the home directory of wherever this is running. -- You can set the config option `"po_token_provider": true` under the `GenericExtractor` section of your config to "script" to enable the token generation script process locally. -- Or you can run the bgutil-ytdlp-pot-provider server separately using their Docker image. +- You can set the config option `bguils_po_token_method` under the `generic_extractor` section of your `orchestration.yaml` config file to "script" to enable the token generation script process locally. +- Alternatively you can run the bgutil-ytdlp-pot-provider server separately using their Docker image or Node.js server. ### Notes @@ -153,12 +159,26 @@ See the [bgutil-ytdlp-pot-provider](https://github.com/Brainicism/bgutil-ytdlp-p - If you're running locally, you'll need to run the setup script manually or enable the feature in your config. - You can set up both the server and the script, and the plugin will fallback on each other if needed. This is recommended for robustness! -Configurations: -- **default**: In Docker this downloads, transpiles and creates a token generation script. Locally it does nothing. If you are running the bgutil-ytdlp-pot-provider server via Docker you can choose this. -- **script**: Download and create the node script, even outside of Docker. -- **disabled**: Disable POT generation, even in docker. +### Configurations: -### Advanced Configuration +## Configurations Summary + +| Option | Behavior | Docker Default? | +| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------ | --------------- | +| `default` | Docker: Automatically downloads and uses the token generation script. Local: Does nothing; assumes a separate server is running externally. | ✅ Yes | +| `script` | Explicitly downloads and uses the token generation script, even locally. | ❌ No | +| `disabled` | Disables token generation completely. | ❌ No | + +Example configuration: + + +```yaml +generic_extractor: + # ... + bguils_po_token_method: "script" +``` + +**Advanced Configuration:** If you change the default port of the bgutil-ytdlp-pot-provider server, you can pass the updated values using our `extractor_args` option for the gereric extractor. From 5e7c57650b72e3bc43d593a3756b753a4adf4483 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 13:16:16 +0000 Subject: [PATCH 35/37] Update "default" to "auto" for clarity, update docs --- docs/source/how_to/authentication_how_to.md | 7 +++++-- .../modules/generic_extractor/__manifest__.py | 4 ++-- .../modules/generic_extractor/generic_extractor.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/source/how_to/authentication_how_to.md b/docs/source/how_to/authentication_how_to.md index 334c9ac..60f237a 100644 --- a/docs/source/how_to/authentication_how_to.md +++ b/docs/source/how_to/authentication_how_to.md @@ -164,8 +164,8 @@ See the [bgutil-ytdlp-pot-provider](https://github.com/Brainicism/bgutil-ytdlp-p ## Configurations Summary | Option | Behavior | Docker Default? | -| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------ | --------------- | -| `default` | Docker: Automatically downloads and uses the token generation script. Local: Does nothing; assumes a separate server is running externally. | ✅ Yes | +|------------| ------------------------------------------------------------------------------------------------------------------------------------------ | --------------- | +| `auto` | Docker: Automatically downloads and uses the token generation script. Local: Does nothing; assumes a separate server is running externally. | ✅ Yes | | `script` | Explicitly downloads and uses the token generation script, even locally. | ❌ No | | `disabled` | Disables token generation completely. | ❌ No | @@ -176,6 +176,9 @@ Example configuration: generic_extractor: # ... bguils_po_token_method: "script" + # For debugging add the verbose flag here: + ytdlp_args: "--no-abort-on-error --abort-on-error --verbose" + ``` **Advanced Configuration:** diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index f34151c..b734421 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -75,9 +75,9 @@ If you are having issues with the extractor, you can review the version of `yt-d "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, "bguils_po_token_method": { - "default": "default", + "default": "auto", "help": "Set up a Proof of origin token provider. This process has additional requirements. See [authentication](https://auto-archiver.readthedocs.io/en/latest/how_to/authentication_how_to.html) for more information.", - "choices": ["default", "script", "disabled"], + "choices": ["auto", "script", "disabled"], }, "extractor_args": { "default": {}, diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 88f7b38..e56167a 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -88,7 +88,7 @@ class GenericExtractor(Extractor): logger.warning("Proof of Origin Token generation is disabled.") return - if self.bguils_po_token_method == "default" and not in_docker: + if self.bguils_po_token_method == "auto" and not in_docker: logger.info( "Proof of Origin Token method not explicitly set. " "If you're running an external HTTP server separately, you can safely ignore this message. " From a0d955fe845bdf86273af4357fa4287514ca6512 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 13:39:58 +0000 Subject: [PATCH 36/37] lock --- poetry.lock | 114 ++++++++++++++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8a2819a..e9db266 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -51,7 +51,7 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] -test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] +test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\" and python_version < \"3.14\""] trio = ["trio (>=0.26.1)"] [[package]] @@ -82,12 +82,12 @@ files = [ ] [package.extras] -benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] [[package]] name = "authlib" @@ -133,7 +133,7 @@ files = [ ] [package.extras] -dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"] +dev = ["backports.zoneinfo ; python_version < \"3.9\"", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata ; sys_platform == \"win32\""] [[package]] name = "beautifulsoup4" @@ -175,18 +175,18 @@ yt-dlp-get-pot = ">=0.1.1" [[package]] name = "boto3" -version = "1.37.22" +version = "1.37.18" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "boto3-1.37.22-py3-none-any.whl", hash = "sha256:a14324d5fa5f4fea00c0e3c69754cbd28100f7fe194693eeecf2dc07446cf4ef"}, - {file = "boto3-1.37.22.tar.gz", hash = "sha256:78a0ec0aafbf6044104c98ad80b69e6d1c83d8233fda2c2d241029e6c705c510"}, + {file = "boto3-1.37.18-py3-none-any.whl", hash = "sha256:1545c943f36db41853cdfdb6ff09c4eda9220dd95bd2fae76fc73091603525d1"}, + {file = "boto3-1.37.18.tar.gz", hash = "sha256:9b272268794172b0b8bb9fb1f3c470c3b6c0ffb92fbd4882465cc740e40fbdcd"}, ] [package.dependencies] -botocore = ">=1.37.22,<1.38.0" +botocore = ">=1.37.18,<1.38.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.11.0,<0.12.0" @@ -195,14 +195,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.37.22" +version = "1.37.18" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "botocore-1.37.22-py3-none-any.whl", hash = "sha256:184db7c9314d13002bc827f511a5140574b5da1acda342d51e093dad6317de98"}, - {file = "botocore-1.37.22.tar.gz", hash = "sha256:b3b26f1a90236bcd17d4092f8c85a256b44e9955a16b633319a2f5678d605e9f"}, + {file = "botocore-1.37.18-py3-none-any.whl", hash = "sha256:a8b97d217d82b3c4f6bcc906e264df7ebb51e2c6a62b3548a97cd173fb8759a1"}, + {file = "botocore-1.37.18.tar.gz", hash = "sha256:99e8eefd5df6347ead15df07ce55f4e62a51ea7b54de1127522a08597923b726"}, ] [package.dependencies] @@ -660,10 +660,10 @@ files = [ cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0) ; python_version >= \"3.8\""] docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] -nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"] -pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] +nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2) ; python_version >= \"3.8\""] +pep8test = ["check-sdist ; python_version >= \"3.8\"", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] test = ["certifi (>=2024)", "cryptography-vectors (==44.0.2)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] @@ -781,7 +781,7 @@ files = [ [package.extras] docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2)"] +typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] [[package]] name = "future" @@ -816,20 +816,20 @@ requests = ">=2.18.0,<3.0.0" [package.extras] async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-api-python-client" -version = "2.166.0" +version = "2.165.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "google_api_python_client-2.166.0-py2.py3-none-any.whl", hash = "sha256:dd8cc74d9fc18538ab05cbd2e93cb4f82382f910c5f6945db06c91f1deae6e45"}, - {file = "google_api_python_client-2.166.0.tar.gz", hash = "sha256:b8cf843bd9d736c134aef76cf1dc7a47c9283a2ef24267b97207b9dd43b30ef7"}, + {file = "google_api_python_client-2.165.0-py2.py3-none-any.whl", hash = "sha256:4eaab7d4a20be0d3d1dde462fa95e9e0ccc2a3e177a656701bf73fe738ddef7d"}, + {file = "google_api_python_client-2.165.0.tar.gz", hash = "sha256:0d2aee76727a104705630bebbc43669c864b766924e9329051ef7b7e2468eb72"}, ] [package.dependencies] @@ -1115,7 +1115,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"] +dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] [[package]] name = "markdown-it-py" @@ -1579,7 +1579,7 @@ docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] -typing = ["typing-extensions"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] @@ -1654,21 +1654,21 @@ testing = ["google-api-core (>=1.31.5)"] [[package]] name = "protobuf" -version = "6.30.2" +version = "6.30.1" description = "" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "protobuf-6.30.2-cp310-abi3-win32.whl", hash = "sha256:b12ef7df7b9329886e66404bef5e9ce6a26b54069d7f7436a0853ccdeb91c103"}, - {file = "protobuf-6.30.2-cp310-abi3-win_amd64.whl", hash = "sha256:7653c99774f73fe6b9301b87da52af0e69783a2e371e8b599b3e9cb4da4b12b9"}, - {file = "protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:0eb523c550a66a09a0c20f86dd554afbf4d32b02af34ae53d93268c1f73bc65b"}, - {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:50f32cc9fd9cb09c783ebc275611b4f19dfdfb68d1ee55d2f0c7fa040df96815"}, - {file = "protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4f6c687ae8efae6cf6093389a596548214467778146b7245e886f35e1485315d"}, - {file = "protobuf-6.30.2-cp39-cp39-win32.whl", hash = "sha256:524afedc03b31b15586ca7f64d877a98b184f007180ce25183d1a5cb230ee72b"}, - {file = "protobuf-6.30.2-cp39-cp39-win_amd64.whl", hash = "sha256:acec579c39c88bd8fbbacab1b8052c793efe83a0a5bd99db4a31423a25c0a0e2"}, - {file = "protobuf-6.30.2-py3-none-any.whl", hash = "sha256:ae86b030e69a98e08c77beab574cbcb9fff6d031d57209f574a5aea1445f4b51"}, - {file = "protobuf-6.30.2.tar.gz", hash = "sha256:35c859ae076d8c56054c25b59e5e59638d86545ed6e2b6efac6be0b6ea3ba048"}, + {file = "protobuf-6.30.1-cp310-abi3-win32.whl", hash = "sha256:ba0706f948d0195f5cac504da156d88174e03218d9364ab40d903788c1903d7e"}, + {file = "protobuf-6.30.1-cp310-abi3-win_amd64.whl", hash = "sha256:ed484f9ddd47f0f1bf0648806cccdb4fe2fb6b19820f9b79a5adf5dcfd1b8c5f"}, + {file = "protobuf-6.30.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:aa4f7dfaed0d840b03d08d14bfdb41348feaee06a828a8c455698234135b4075"}, + {file = "protobuf-6.30.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:47cd320b7db63e8c9ac35f5596ea1c1e61491d8a8eb6d8b45edc44760b53a4f6"}, + {file = "protobuf-6.30.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e3083660225fa94748ac2e407f09a899e6a28bf9c0e70c75def8d15706bf85fc"}, + {file = "protobuf-6.30.1-cp39-cp39-win32.whl", hash = "sha256:554d7e61cce2aa4c63ca27328f757a9f3867bce8ec213bf09096a8d16bcdcb6a"}, + {file = "protobuf-6.30.1-cp39-cp39-win_amd64.whl", hash = "sha256:b510f55ce60f84dc7febc619b47215b900466e3555ab8cb1ba42deb4496d6cc0"}, + {file = "protobuf-6.30.1-py3-none-any.whl", hash = "sha256:3c25e51e1359f1f5fa3b298faa6016e650d148f214db2e47671131b9063c53be"}, + {file = "protobuf-6.30.1.tar.gz", hash = "sha256:535fb4e44d0236893d5cf1263a0f706f1160b689a7ab962e9da8a9ce4050b780"}, ] [[package]] @@ -1696,18 +1696,18 @@ files = [ [[package]] name = "pyasn1-modules" -version = "0.4.2" +version = "0.4.1" description = "A collection of ASN.1-based protocols modules" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, - {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, + {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"}, + {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"}, ] [package.dependencies] -pyasn1 = ">=0.6.1,<0.7.0" +pyasn1 = ">=0.4.6,<0.7.0" [[package]] name = "pycodestyle" @@ -1819,14 +1819,14 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyparsing" -version = "3.2.3" +version = "3.2.2" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"}, - {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"}, + {file = "pyparsing-3.2.2-py3-none-any.whl", hash = "sha256:6ab05e1cb111cc72acc8ed811a3ca4c2be2af8d7b6df324347f04fd057d8d793"}, + {file = "pyparsing-3.2.2.tar.gz", hash = "sha256:2a857aee851f113c2de9d4bfd9061baea478cb0f1c7ca6cbf594942d6d111575"}, ] [package.extras] @@ -1980,14 +1980,14 @@ requests = ">=2.28" [[package]] name = "pytz" -version = "2025.2" +version = "2025.1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, - {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, + {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"}, + {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"}, ] [[package]] @@ -2891,14 +2891,14 @@ wsproto = ">=0.14" [[package]] name = "typing-extensions" -version = "4.13.0" +version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" groups = ["main", "docs"] files = [ - {file = "typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5"}, - {file = "typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b"}, + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] [[package]] @@ -2991,7 +2991,7 @@ files = [ pysocks = {version = ">=1.5.6,<1.5.7 || >1.5.7,<2.0", optional = true, markers = "extra == \"socks\""} [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -3014,7 +3014,7 @@ h11 = ">=0.8" typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] -standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "virtualenv" @@ -3035,7 +3035,7 @@ platformdirs = ">=3.9.1,<5" [package.extras] docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] [[package]] name = "vk-api" @@ -3297,7 +3297,7 @@ files = [ ] [package.extras] -dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] +dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] [[package]] name = "wsproto" @@ -3316,20 +3316,20 @@ h11 = ">=0.9.0,<1" [[package]] name = "yt-dlp" -version = "2025.3.27" +version = "2025.3.21" description = "A feature-rich command-line audio/video downloader" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "yt_dlp-2025.3.27-py3-none-any.whl", hash = "sha256:bef4105af0f25c13c1a505daf46142af45d9312dc2c3553651db083e6860e641"}, - {file = "yt_dlp-2025.3.27.tar.gz", hash = "sha256:30cb078f803b539b2a66521c5ec86d328c07f74aeca9440069e58619c299cf15"}, + {file = "yt_dlp-2025.3.21-py3-none-any.whl", hash = "sha256:80d5ce15f9223e0c27020b861a4c5b72c6ba5d6c957c1b8fd2a022a69783f482"}, + {file = "yt_dlp-2025.3.21.tar.gz", hash = "sha256:5bcf47b2897254ea3816935a8dde47d243bff556782cced6b16a2b85e6b682ba"}, ] [package.extras] build = ["build", "hatchling", "pip", "setuptools (>=71.0.2)", "wheel"] -curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || ==0.10.*)"] -default = ["brotli", "brotlicffi", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"] +curl-cffi = ["curl-cffi (==0.5.10) ; os_name == \"nt\" and implementation_name == \"cpython\"", "curl-cffi (>=0.5.10,!=0.6.*,<0.7.2) ; os_name != \"nt\" and implementation_name == \"cpython\""] +default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=1.26.17,<3)", "websockets (>=13.0)"] dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.11.0,<0.12.0)"] pyinstaller = ["pyinstaller (>=6.11.1)"] secretstorage = ["cffi", "secretstorage"] From c510c0464394db2bc400546f9c8cc093148e954b Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Fri, 28 Mar 2025 13:43:46 +0000 Subject: [PATCH 37/37] Update config reference in test_generic_extractor.py --- tests/extractors/test_generic_extractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/extractors/test_generic_extractor.py b/tests/extractors/test_generic_extractor.py index b096c63..f737794 100644 --- a/tests/extractors/test_generic_extractor.py +++ b/tests/extractors/test_generic_extractor.py @@ -309,13 +309,13 @@ class TestGenericExtractorPoToken: extractor.setup_token_generation_script.assert_not_called() def test_po_token_default_in_docker_calls_setup(self, extractor, mocker): - extractor.bguils_po_token_method = "default" + extractor.bguils_po_token_method = "auto" mocker.patch.dict(os.environ, {"RUNNING_IN_DOCKER": "1"}) extractor.setup_po_tokens() extractor.setup_token_generation_script.assert_called_once() def test_po_token_default_local_does_not_call_setup(self, extractor, caplog, mocker): - extractor.bguils_po_token_method = "default" + extractor.bguils_po_token_method = "auto" # clears env vars for this test mocker.patch.dict(os.environ, {}, clear=True) extractor.setup_po_tokens()