mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
Merge main
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# iterate through all the modules in auto_archiver.modules and turn the __manifest__.py file into a markdown table
|
||||
from pathlib import Path
|
||||
from auto_archiver.core.module import available_modules
|
||||
from auto_archiver.core.module import ModuleFactory
|
||||
from auto_archiver.core.base_module import BaseModule
|
||||
from ruamel.yaml import YAML
|
||||
import io
|
||||
@@ -41,7 +41,7 @@ def generate_module_docs():
|
||||
configs_cheatsheet = "\n## Configuration Options\n"
|
||||
configs_cheatsheet += header_row
|
||||
|
||||
for module in sorted(available_modules(with_manifest=True), key=lambda x: (x.requires_setup, x.name)):
|
||||
for module in sorted(ModuleFactory().available_modules(), key=lambda x: (x.requires_setup, x.name)):
|
||||
# generate the markdown file from the __manifest__.py file.
|
||||
|
||||
manifest = module.manifest
|
||||
|
||||
310
poetry.lock
generated
310
poetry.lock
generated
@@ -103,14 +103,14 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
|
||||
|
||||
[[package]]
|
||||
name = "authlib"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "Authlib-1.4.0-py2.py3-none-any.whl", hash = "sha256:4bb20b978c8b636222b549317c1815e1fe62234fc1c5efe8855d84aebf3a74e3"},
|
||||
{file = "authlib-1.4.0.tar.gz", hash = "sha256:1c1e6608b5ed3624aeeee136ca7f8c120d6f51f731aa152b153d54741840e1f2"},
|
||||
{file = "Authlib-1.4.1-py2.py3-none-any.whl", hash = "sha256:edc29c3f6a3e72cd9e9f45fff67fc663a2c364022eb0371c003f22d5405915c1"},
|
||||
{file = "authlib-1.4.1.tar.gz", hash = "sha256:30ead9ea4993cdbab821dc6e01e818362f92da290c04c7f6a1940f86507a790d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -134,33 +134,34 @@ tomli = {version = "*", markers = "python_version < \"3.11\""}
|
||||
|
||||
[[package]]
|
||||
name = "babel"
|
||||
version = "2.16.0"
|
||||
version = "2.17.0"
|
||||
description = "Internationalization utilities"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["docs"]
|
||||
files = [
|
||||
{file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
|
||||
{file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
|
||||
{file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"},
|
||||
{file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
|
||||
dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.12.3"
|
||||
version = "4.13.3"
|
||||
description = "Screen-scraping library"
|
||||
optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
python-versions = ">=3.7.0"
|
||||
groups = ["main", "docs"]
|
||||
files = [
|
||||
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
|
||||
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
|
||||
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
|
||||
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
soupsieve = ">1.2"
|
||||
typing-extensions = ">=4.0.0"
|
||||
|
||||
[package.extras]
|
||||
cchardet = ["cchardet"]
|
||||
@@ -171,18 +172,18 @@ lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.36.6"
|
||||
version = "1.36.22"
|
||||
description = "The AWS SDK for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "boto3-1.36.6-py3-none-any.whl", hash = "sha256:6d473f0f340d02b4e9ad5b8e68786a09728101a8b950231b89ebdaf72b6dca21"},
|
||||
{file = "boto3-1.36.6.tar.gz", hash = "sha256:b36feae061dc0793cf311468956a0a9e99215ce38bc99a1a4e55a5b105f16297"},
|
||||
{file = "boto3-1.36.22-py3-none-any.whl", hash = "sha256:39957eabdce009353d72d131046489fbbfa15891865d5f069f1e8bfa414e6b81"},
|
||||
{file = "boto3-1.36.22.tar.gz", hash = "sha256:768c8a4d4a6227fe2258105efa086f1424cba5ca915a5eb2305b2cd979306ad1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
botocore = ">=1.36.6,<1.37.0"
|
||||
botocore = ">=1.36.22,<1.37.0"
|
||||
jmespath = ">=0.7.1,<2.0.0"
|
||||
s3transfer = ">=0.11.0,<0.12.0"
|
||||
|
||||
@@ -191,14 +192,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
|
||||
|
||||
[[package]]
|
||||
name = "botocore"
|
||||
version = "1.36.6"
|
||||
version = "1.36.22"
|
||||
description = "Low-level, data-driven core of boto 3."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "botocore-1.36.6-py3-none-any.whl", hash = "sha256:f77bbbb03fb420e260174650fb5c0cc142ec20a96967734eed2b0ef24334ef34"},
|
||||
{file = "botocore-1.36.6.tar.gz", hash = "sha256:4864c53d638da191a34daf3ede3ff1371a3719d952cc0c6bd24ce2836a38dd77"},
|
||||
{file = "botocore-1.36.22-py3-none-any.whl", hash = "sha256:75d6b34acb0686ee4d54ff6eb285e78ccfe318407428769d1e3e13351714d890"},
|
||||
{file = "botocore-1.36.22.tar.gz", hash = "sha256:59520247d5a479731724f97c995d5a1c2aae3b303b324f39d99efcfad1d3019e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -207,7 +208,7 @@ python-dateutil = ">=2.1,<3.0.0"
|
||||
urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
|
||||
|
||||
[package.extras]
|
||||
crt = ["awscrt (==0.23.4)"]
|
||||
crt = ["awscrt (==0.23.8)"]
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
@@ -674,26 +675,26 @@ typing-inspect = ">=0.4.0,<1"
|
||||
|
||||
[[package]]
|
||||
name = "dateparser"
|
||||
version = "1.2.0"
|
||||
version = "1.2.1"
|
||||
description = "Date parsing library designed to parse dates from HTML pages"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "dateparser-1.2.0-py2.py3-none-any.whl", hash = "sha256:0b21ad96534e562920a0083e97fd45fa959882d4162acc358705144520a35830"},
|
||||
{file = "dateparser-1.2.0.tar.gz", hash = "sha256:7975b43a4222283e0ae15be7b4999d08c9a70e2d378ac87385b1ccf2cffbbb30"},
|
||||
{file = "dateparser-1.2.1-py3-none-any.whl", hash = "sha256:bdcac262a467e6260030040748ad7c10d6bacd4f3b9cdb4cfd2251939174508c"},
|
||||
{file = "dateparser-1.2.1.tar.gz", hash = "sha256:7e4919aeb48481dbfc01ac9683c8e20bfe95bb715a38c1e9f6af889f4f30ccc3"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
python-dateutil = "*"
|
||||
pytz = "*"
|
||||
regex = "<2019.02.19 || >2019.02.19,<2021.8.27 || >2021.8.27"
|
||||
tzlocal = "*"
|
||||
python-dateutil = ">=2.7.0"
|
||||
pytz = ">=2024.2"
|
||||
regex = ">=2015.06.24,<2019.02.19 || >2019.02.19,<2021.8.27 || >2021.8.27"
|
||||
tzlocal = ">=0.2"
|
||||
|
||||
[package.extras]
|
||||
calendars = ["convertdate", "hijri-converter"]
|
||||
fasttext = ["fasttext"]
|
||||
langdetect = ["langdetect"]
|
||||
calendars = ["convertdate (>=2.2.1)", "hijridate"]
|
||||
fasttext = ["fasttext (>=0.9.1)", "numpy (>=1.19.3,<2)"]
|
||||
langdetect = ["langdetect (>=1.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "docutils"
|
||||
@@ -755,14 +756,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "google-api-core"
|
||||
version = "2.24.0"
|
||||
version = "2.24.1"
|
||||
description = "Google API client core library"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "google_api_core-2.24.0-py3-none-any.whl", hash = "sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9"},
|
||||
{file = "google_api_core-2.24.0.tar.gz", hash = "sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf"},
|
||||
{file = "google_api_core-2.24.1-py3-none-any.whl", hash = "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1"},
|
||||
{file = "google_api_core-2.24.1.tar.gz", hash = "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -780,14 +781,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
|
||||
|
||||
[[package]]
|
||||
name = "google-api-python-client"
|
||||
version = "2.159.0"
|
||||
version = "2.161.0"
|
||||
description = "Google API Client Library for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "google_api_python_client-2.159.0-py2.py3-none-any.whl", hash = "sha256:baef0bb631a60a0bd7c0bf12a5499e3a40cd4388484de7ee55c1950bf820a0cf"},
|
||||
{file = "google_api_python_client-2.159.0.tar.gz", hash = "sha256:55197f430f25c907394b44fa078545ffef89d33fd4dca501b7db9f0d8e224bd6"},
|
||||
{file = "google_api_python_client-2.161.0-py2.py3-none-any.whl", hash = "sha256:9476a5a4f200bae368140453df40f9cda36be53fa7d0e9a9aac4cdb859a26448"},
|
||||
{file = "google_api_python_client-2.161.0.tar.gz", hash = "sha256:324c0cce73e9ea0a0d2afd5937e01b7c2d6a4d7e2579cdb6c384f9699d6c9f37"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -859,14 +860,14 @@ tool = ["click (>=6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "googleapis-common-protos"
|
||||
version = "1.66.0"
|
||||
version = "1.67.0"
|
||||
description = "Common protobufs used in Google APIs"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
|
||||
{file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
|
||||
{file = "googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741"},
|
||||
{file = "googleapis_common_protos-1.67.0.tar.gz", hash = "sha256:21398025365f138be356d5923e9168737d94d46a72aefee4a6110a1f23463c86"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1158,14 +1159,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "marshmallow"
|
||||
version = "3.26.0"
|
||||
version = "3.26.1"
|
||||
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "marshmallow-3.26.0-py3-none-any.whl", hash = "sha256:1287bca04e6a5f4094822ac153c03da5e214a0a60bcd557b140f3e66991b8ca1"},
|
||||
{file = "marshmallow-3.26.0.tar.gz", hash = "sha256:eb36762a1cc76d7abf831e18a3a1b26d3d481bbc74581b8e532a3d3a8115e1cb"},
|
||||
{file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"},
|
||||
{file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1234,14 +1235,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "myst-parser"
|
||||
version = "4.0.0"
|
||||
version = "4.0.1"
|
||||
description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser,"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["docs"]
|
||||
files = [
|
||||
{file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"},
|
||||
{file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"},
|
||||
{file = "myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"},
|
||||
{file = "myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1253,10 +1254,10 @@ pyyaml = "*"
|
||||
sphinx = ">=7,<9"
|
||||
|
||||
[package.extras]
|
||||
code-style = ["pre-commit (>=3.0,<4.0)"]
|
||||
code-style = ["pre-commit (>=4.0,<5.0)"]
|
||||
linkify = ["linkify-it-py (>=2.0,<3.0)"]
|
||||
rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"]
|
||||
testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
|
||||
testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
|
||||
testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"]
|
||||
|
||||
[[package]]
|
||||
@@ -1530,14 +1531,14 @@ testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "proto-plus"
|
||||
version = "1.25.0"
|
||||
description = "Beautiful, Pythonic protocol buffers."
|
||||
version = "1.26.0"
|
||||
description = "Beautiful, Pythonic protocol buffers"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"},
|
||||
{file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"},
|
||||
{file = "proto_plus-1.26.0-py3-none-any.whl", hash = "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"},
|
||||
{file = "proto_plus-1.26.0.tar.gz", hash = "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1814,6 +1815,24 @@ loguru = "*"
|
||||
[package.extras]
|
||||
test = ["pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-mock"
|
||||
version = "3.14.0"
|
||||
description = "Thin-wrapper around the mock package for easier use with pytest"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"},
|
||||
{file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pytest = ">=6.2.5"
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "pytest-asyncio", "tox"]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
@@ -1866,14 +1885,14 @@ requests = ">=2.28"
|
||||
|
||||
[[package]]
|
||||
name = "pytz"
|
||||
version = "2024.2"
|
||||
version = "2025.1"
|
||||
description = "World timezone definitions, modern and historical"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"},
|
||||
{file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
|
||||
{file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"},
|
||||
{file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2122,14 +2141,14 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]
|
||||
|
||||
[[package]]
|
||||
name = "rich-argparse"
|
||||
version = "1.6.0"
|
||||
version = "1.7.0"
|
||||
description = "Rich help formatters for argparse and optparse"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "rich_argparse-1.6.0-py3-none-any.whl", hash = "sha256:fbe70a1d821b3f2fa8958cddf0cae131870a6e9faa04ab52b409cb1eda809bd7"},
|
||||
{file = "rich_argparse-1.6.0.tar.gz", hash = "sha256:092083c30da186f25bcdff8b1d47fdfb571288510fb051e0488a72cc3128de13"},
|
||||
{file = "rich_argparse-1.7.0-py3-none-any.whl", hash = "sha256:b8ec8943588e9731967f4f97b735b03dc127c416f480a083060433a97baf2fd3"},
|
||||
{file = "rich_argparse-1.7.0.tar.gz", hash = "sha256:f31d809c465ee43f367d599ccaf88b73bc2c4d75d74ed43f2d538838c53544ba"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -2362,24 +2381,24 @@ test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools
|
||||
|
||||
[[package]]
|
||||
name = "sphinx-autoapi"
|
||||
version = "3.4.0"
|
||||
version = "3.6.0"
|
||||
description = "Sphinx API documentation generator"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.9"
|
||||
groups = ["docs"]
|
||||
files = [
|
||||
{file = "sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92"},
|
||||
{file = "sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c"},
|
||||
{file = "sphinx_autoapi-3.6.0-py3-none-any.whl", hash = "sha256:f3b66714493cab140b0e896d33ce7137654a16ac1edb6563edcbd47bf975f711"},
|
||||
{file = "sphinx_autoapi-3.6.0.tar.gz", hash = "sha256:c685f274e41d0842ae7e199460c322c4bd7fec816ccc2da8d806094b4f64af06"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
astroid = [
|
||||
{version = ">=2.7", markers = "python_version < \"3.12\""},
|
||||
{version = ">=3.0.0a1", markers = "python_version >= \"3.12\""},
|
||||
{version = ">=3", markers = "python_version >= \"3.12\""},
|
||||
]
|
||||
Jinja2 = "*"
|
||||
PyYAML = "*"
|
||||
sphinx = ">=6.1.0"
|
||||
sphinx = ">=7.4.0"
|
||||
|
||||
[[package]]
|
||||
name = "sphinx-autobuild"
|
||||
@@ -2679,14 +2698,14 @@ telegram = ["requests"]
|
||||
|
||||
[[package]]
|
||||
name = "trio"
|
||||
version = "0.28.0"
|
||||
version = "0.29.0"
|
||||
description = "A friendly Python library for async concurrency and I/O"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "trio-0.28.0-py3-none-any.whl", hash = "sha256:56d58977acc1635735a96581ec70513cc781b8b6decd299c487d3be2a721cd94"},
|
||||
{file = "trio-0.28.0.tar.gz", hash = "sha256:4e547896fe9e8a5658e54e4c7c5fa1db748cbbbaa7c965e7d40505b928c73c05"},
|
||||
{file = "trio-0.29.0-py3-none-any.whl", hash = "sha256:d8c463f1a9cc776ff63e331aba44c125f423a5a13c684307e828d930e625ba66"},
|
||||
{file = "trio-0.29.0.tar.gz", hash = "sha256:ea0d3967159fc130acb6939a0be0e558e364fee26b5deeecc893a6b08c361bdf"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -2700,18 +2719,19 @@ sortedcontainers = "*"
|
||||
|
||||
[[package]]
|
||||
name = "trio-websocket"
|
||||
version = "0.11.1"
|
||||
version = "0.12.1"
|
||||
description = "WebSocket library for Trio"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "trio-websocket-0.11.1.tar.gz", hash = "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f"},
|
||||
{file = "trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638"},
|
||||
{file = "trio_websocket-0.12.1-py3-none-any.whl", hash = "sha256:608ec746bb287e5d5a66baf483e41194193c5cf05ffaad6240e7d1fcd80d1e6f"},
|
||||
{file = "trio_websocket-0.12.1.tar.gz", hash = "sha256:d55ccd4d3eae27c494f3fdae14823317839bdcb8214d1173eacc4d42c69fc91b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
|
||||
outcome = ">=1.2.0"
|
||||
trio = ">=0.11"
|
||||
wsproto = ">=0.14"
|
||||
|
||||
@@ -2778,14 +2798,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "tzlocal"
|
||||
version = "5.2"
|
||||
version = "5.3"
|
||||
description = "tzinfo object for the local timezone"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"},
|
||||
{file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"},
|
||||
{file = "tzlocal-5.3-py3-none-any.whl", hash = "sha256:3814135a1bb29763c6e4f08fd6e41dbb435c7a60bfbb03270211bcc537187d8c"},
|
||||
{file = "tzlocal-5.3.tar.gz", hash = "sha256:2fafbfc07e9d8b49ade18f898d6bcd37ae88ce3ad6486842a2e4f03af68323d2"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -3031,81 +3051,81 @@ test = ["websockets"]
|
||||
|
||||
[[package]]
|
||||
name = "websockets"
|
||||
version = "14.2"
|
||||
version = "15.0"
|
||||
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "docs"]
|
||||
files = [
|
||||
{file = "websockets-14.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e8179f95323b9ab1c11723e5d91a89403903f7b001828161b480a7810b334885"},
|
||||
{file = "websockets-14.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d8c3e2cdb38f31d8bd7d9d28908005f6fa9def3324edb9bf336d7e4266fd397"},
|
||||
{file = "websockets-14.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:714a9b682deb4339d39ffa674f7b674230227d981a37d5d174a4a83e3978a610"},
|
||||
{file = "websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e53c72052f2596fb792a7acd9704cbc549bf70fcde8a99e899311455974ca3"},
|
||||
{file = "websockets-14.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fbd68850c837e57373d95c8fe352203a512b6e49eaae4c2f4088ef8cf21980"},
|
||||
{file = "websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b27ece32f63150c268593d5fdb82819584831a83a3f5809b7521df0685cd5d8"},
|
||||
{file = "websockets-14.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4daa0faea5424d8713142b33825fff03c736f781690d90652d2c8b053345b0e7"},
|
||||
{file = "websockets-14.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bc63cee8596a6ec84d9753fd0fcfa0452ee12f317afe4beae6b157f0070c6c7f"},
|
||||
{file = "websockets-14.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a570862c325af2111343cc9b0257b7119b904823c675b22d4ac547163088d0d"},
|
||||
{file = "websockets-14.2-cp310-cp310-win32.whl", hash = "sha256:75862126b3d2d505e895893e3deac0a9339ce750bd27b4ba515f008b5acf832d"},
|
||||
{file = "websockets-14.2-cp310-cp310-win_amd64.whl", hash = "sha256:cc45afb9c9b2dc0852d5c8b5321759cf825f82a31bfaf506b65bf4668c96f8b2"},
|
||||
{file = "websockets-14.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3bdc8c692c866ce5fefcaf07d2b55c91d6922ac397e031ef9b774e5b9ea42166"},
|
||||
{file = "websockets-14.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c93215fac5dadc63e51bcc6dceca72e72267c11def401d6668622b47675b097f"},
|
||||
{file = "websockets-14.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c9b6535c0e2cf8a6bf938064fb754aaceb1e6a4a51a80d884cd5db569886910"},
|
||||
{file = "websockets-14.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a52a6d7cf6938e04e9dceb949d35fbdf58ac14deea26e685ab6368e73744e4c"},
|
||||
{file = "websockets-14.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f05702e93203a6ff5226e21d9b40c037761b2cfb637187c9802c10f58e40473"},
|
||||
{file = "websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22441c81a6748a53bfcb98951d58d1af0661ab47a536af08920d129b4d1c3473"},
|
||||
{file = "websockets-14.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd9b868d78b194790e6236d9cbc46d68aba4b75b22497eb4ab64fa640c3af56"},
|
||||
{file = "websockets-14.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1a5a20d5843886d34ff8c57424cc65a1deda4375729cbca4cb6b3353f3ce4142"},
|
||||
{file = "websockets-14.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34277a29f5303d54ec6468fb525d99c99938607bc96b8d72d675dee2b9f5bf1d"},
|
||||
{file = "websockets-14.2-cp311-cp311-win32.whl", hash = "sha256:02687db35dbc7d25fd541a602b5f8e451a238ffa033030b172ff86a93cb5dc2a"},
|
||||
{file = "websockets-14.2-cp311-cp311-win_amd64.whl", hash = "sha256:862e9967b46c07d4dcd2532e9e8e3c2825e004ffbf91a5ef9dde519ee2effb0b"},
|
||||
{file = "websockets-14.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f20522e624d7ffbdbe259c6b6a65d73c895045f76a93719aa10cd93b3de100c"},
|
||||
{file = "websockets-14.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:647b573f7d3ada919fd60e64d533409a79dcf1ea21daeb4542d1d996519ca967"},
|
||||
{file = "websockets-14.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af99a38e49f66be5a64b1e890208ad026cda49355661549c507152113049990"},
|
||||
{file = "websockets-14.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:091ab63dfc8cea748cc22c1db2814eadb77ccbf82829bac6b2fbe3401d548eda"},
|
||||
{file = "websockets-14.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b374e8953ad477d17e4851cdc66d83fdc2db88d9e73abf755c94510ebddceb95"},
|
||||
{file = "websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a39d7eceeea35db85b85e1169011bb4321c32e673920ae9c1b6e0978590012a3"},
|
||||
{file = "websockets-14.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0a6f3efd47ffd0d12080594f434faf1cd2549b31e54870b8470b28cc1d3817d9"},
|
||||
{file = "websockets-14.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:065ce275e7c4ffb42cb738dd6b20726ac26ac9ad0a2a48e33ca632351a737267"},
|
||||
{file = "websockets-14.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9d0e53530ba7b8b5e389c02282f9d2aa47581514bd6049d3a7cffe1385cf5fe"},
|
||||
{file = "websockets-14.2-cp312-cp312-win32.whl", hash = "sha256:20e6dd0984d7ca3037afcb4494e48c74ffb51e8013cac71cf607fffe11df7205"},
|
||||
{file = "websockets-14.2-cp312-cp312-win_amd64.whl", hash = "sha256:44bba1a956c2c9d268bdcdf234d5e5ff4c9b6dc3e300545cbe99af59dda9dcce"},
|
||||
{file = "websockets-14.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f1372e511c7409a542291bce92d6c83320e02c9cf392223272287ce55bc224e"},
|
||||
{file = "websockets-14.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4da98b72009836179bb596a92297b1a61bb5a830c0e483a7d0766d45070a08ad"},
|
||||
{file = "websockets-14.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8a86a269759026d2bde227652b87be79f8a734e582debf64c9d302faa1e9f03"},
|
||||
{file = "websockets-14.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86cf1aaeca909bf6815ea714d5c5736c8d6dd3a13770e885aafe062ecbd04f1f"},
|
||||
{file = "websockets-14.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b0f6c3ba3b1240f602ebb3971d45b02cc12bd1845466dd783496b3b05783a5"},
|
||||
{file = "websockets-14.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c3e101c246aa85bc8534e495952e2ca208bd87994650b90a23d745902db9a"},
|
||||
{file = "websockets-14.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eabdb28b972f3729348e632ab08f2a7b616c7e53d5414c12108c29972e655b20"},
|
||||
{file = "websockets-14.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2066dc4cbcc19f32c12a5a0e8cc1b7ac734e5b64ac0a325ff8353451c4b15ef2"},
|
||||
{file = "websockets-14.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ab95d357cd471df61873dadf66dd05dd4709cae001dd6342edafc8dc6382f307"},
|
||||
{file = "websockets-14.2-cp313-cp313-win32.whl", hash = "sha256:a9e72fb63e5f3feacdcf5b4ff53199ec8c18d66e325c34ee4c551ca748623bbc"},
|
||||
{file = "websockets-14.2-cp313-cp313-win_amd64.whl", hash = "sha256:b439ea828c4ba99bb3176dc8d9b933392a2413c0f6b149fdcba48393f573377f"},
|
||||
{file = "websockets-14.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7cd5706caec1686c5d233bc76243ff64b1c0dc445339bd538f30547e787c11fe"},
|
||||
{file = "websockets-14.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec607328ce95a2f12b595f7ae4c5d71bf502212bddcea528290b35c286932b12"},
|
||||
{file = "websockets-14.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da85651270c6bfb630136423037dd4975199e5d4114cae6d3066641adcc9d1c7"},
|
||||
{file = "websockets-14.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ecadc7ce90accf39903815697917643f5b7cfb73c96702318a096c00aa71f5"},
|
||||
{file = "websockets-14.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1979bee04af6a78608024bad6dfcc0cc930ce819f9e10342a29a05b5320355d0"},
|
||||
{file = "websockets-14.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dddacad58e2614a24938a50b85969d56f88e620e3f897b7d80ac0d8a5800258"},
|
||||
{file = "websockets-14.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:89a71173caaf75fa71a09a5f614f450ba3ec84ad9fca47cb2422a860676716f0"},
|
||||
{file = "websockets-14.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6af6a4b26eea4fc06c6818a6b962a952441e0e39548b44773502761ded8cc1d4"},
|
||||
{file = "websockets-14.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:80c8efa38957f20bba0117b48737993643204645e9ec45512579132508477cfc"},
|
||||
{file = "websockets-14.2-cp39-cp39-win32.whl", hash = "sha256:2e20c5f517e2163d76e2729104abc42639c41cf91f7b1839295be43302713661"},
|
||||
{file = "websockets-14.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4c8cef610e8d7c70dea92e62b6814a8cd24fbd01d7103cc89308d2bfe1659ef"},
|
||||
{file = "websockets-14.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d7d9cafbccba46e768be8a8ad4635fa3eae1ffac4c6e7cb4eb276ba41297ed29"},
|
||||
{file = "websockets-14.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c76193c1c044bd1e9b3316dcc34b174bbf9664598791e6fb606d8d29000e070c"},
|
||||
{file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd475a974d5352390baf865309fe37dec6831aafc3014ffac1eea99e84e83fc2"},
|
||||
{file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c6c0097a41968b2e2b54ed3424739aab0b762ca92af2379f152c1aef0187e1c"},
|
||||
{file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7ff794c8b36bc402f2e07c0b2ceb4a2424147ed4785ff03e2a7af03711d60a"},
|
||||
{file = "websockets-14.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dec254fcabc7bd488dab64846f588fc5b6fe0d78f641180030f8ea27b76d72c3"},
|
||||
{file = "websockets-14.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbe03eb853e17fd5b15448328b4ec7fb2407d45fb0245036d06a3af251f8e48f"},
|
||||
{file = "websockets-14.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3c4aa3428b904d5404a0ed85f3644d37e2cb25996b7f096d77caeb0e96a3b42"},
|
||||
{file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:577a4cebf1ceaf0b65ffc42c54856214165fb8ceeba3935852fc33f6b0c55e7f"},
|
||||
{file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad1c1d02357b7665e700eca43a31d52814ad9ad9b89b58118bdabc365454b574"},
|
||||
{file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f390024a47d904613577df83ba700bd189eedc09c57af0a904e5c39624621270"},
|
||||
{file = "websockets-14.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3c1426c021c38cf92b453cdf371228d3430acd775edee6bac5a4d577efc72365"},
|
||||
{file = "websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b"},
|
||||
{file = "websockets-14.2.tar.gz", hash = "sha256:5059ed9c54945efb321f097084b4c7e52c246f2c869815876a69d1efc4ad6eb5"},
|
||||
{file = "websockets-15.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5e6ee18a53dd5743e6155b8ff7e8e477c25b29b440f87f65be8165275c87fef0"},
|
||||
{file = "websockets-15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ee06405ea2e67366a661ed313e14cf2a86e84142a3462852eb96348f7219cee3"},
|
||||
{file = "websockets-15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8711682a629bbcaf492f5e0af72d378e976ea1d127a2d47584fa1c2c080b436b"},
|
||||
{file = "websockets-15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94c4a9b01eede952442c088d415861b0cf2053cbd696b863f6d5022d4e4e2453"},
|
||||
{file = "websockets-15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45535fead66e873f411c1d3cf0d3e175e66f4dd83c4f59d707d5b3e4c56541c4"},
|
||||
{file = "websockets-15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e389efe46ccb25a1f93d08c7a74e8123a2517f7b7458f043bd7529d1a63ffeb"},
|
||||
{file = "websockets-15.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:67a04754d121ea5ca39ddedc3f77071651fb5b0bc6b973c71c515415b44ed9c5"},
|
||||
{file = "websockets-15.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bd66b4865c8b853b8cca7379afb692fc7f52cf898786537dfb5e5e2d64f0a47f"},
|
||||
{file = "websockets-15.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a4cc73a6ae0a6751b76e69cece9d0311f054da9b22df6a12f2c53111735657c8"},
|
||||
{file = "websockets-15.0-cp310-cp310-win32.whl", hash = "sha256:89da58e4005e153b03fe8b8794330e3f6a9774ee9e1c3bd5bc52eb098c3b0c4f"},
|
||||
{file = "websockets-15.0-cp310-cp310-win_amd64.whl", hash = "sha256:4ff380aabd7a74a42a760ee76c68826a8f417ceb6ea415bd574a035a111fd133"},
|
||||
{file = "websockets-15.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dd24c4d256558429aeeb8d6c24ebad4e982ac52c50bc3670ae8646c181263965"},
|
||||
{file = "websockets-15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f83eca8cbfd168e424dfa3b3b5c955d6c281e8fc09feb9d870886ff8d03683c7"},
|
||||
{file = "websockets-15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4095a1f2093002c2208becf6f9a178b336b7572512ee0a1179731acb7788e8ad"},
|
||||
{file = "websockets-15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb915101dfbf318486364ce85662bb7b020840f68138014972c08331458d41f3"},
|
||||
{file = "websockets-15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45d464622314973d78f364689d5dbb9144e559f93dca11b11af3f2480b5034e1"},
|
||||
{file = "websockets-15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace960769d60037ca9625b4c578a6f28a14301bd2a1ff13bb00e824ac9f73e55"},
|
||||
{file = "websockets-15.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c7cd4b1015d2f60dfe539ee6c95bc968d5d5fad92ab01bb5501a77393da4f596"},
|
||||
{file = "websockets-15.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4f7290295794b5dec470867c7baa4a14182b9732603fd0caf2a5bf1dc3ccabf3"},
|
||||
{file = "websockets-15.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3abd670ca7ce230d5a624fd3d55e055215d8d9b723adee0a348352f5d8d12ff4"},
|
||||
{file = "websockets-15.0-cp311-cp311-win32.whl", hash = "sha256:110a847085246ab8d4d119632145224d6b49e406c64f1bbeed45c6f05097b680"},
|
||||
{file = "websockets-15.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7bbbe2cd6ed80aceef2a14e9f1c1b61683194c216472ed5ff33b700e784e37"},
|
||||
{file = "websockets-15.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cccc18077acd34c8072578394ec79563664b1c205f7a86a62e94fafc7b59001f"},
|
||||
{file = "websockets-15.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4c22992e24f12de340ca5f824121a5b3e1a37ad4360b4e1aaf15e9d1c42582d"},
|
||||
{file = "websockets-15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1206432cc6c644f6fc03374b264c5ff805d980311563202ed7fef91a38906276"},
|
||||
{file = "websockets-15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d3cc75ef3e17490042c47e0523aee1bcc4eacd2482796107fd59dd1100a44bc"},
|
||||
{file = "websockets-15.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b89504227a5311610e4be16071465885a0a3d6b0e82e305ef46d9b064ce5fb72"},
|
||||
{file = "websockets-15.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56e3efe356416bc67a8e093607315951d76910f03d2b3ad49c4ade9207bf710d"},
|
||||
{file = "websockets-15.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f2205cdb444a42a7919690238fb5979a05439b9dbb73dd47c863d39640d85ab"},
|
||||
{file = "websockets-15.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aea01f40995fa0945c020228ab919b8dfc93fc8a9f2d3d705ab5b793f32d9e99"},
|
||||
{file = "websockets-15.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a9f8e33747b1332db11cf7fcf4a9512bef9748cb5eb4d3f7fbc8c30d75dc6ffc"},
|
||||
{file = "websockets-15.0-cp312-cp312-win32.whl", hash = "sha256:32e02a2d83f4954aa8c17e03fe8ec6962432c39aca4be7e8ee346b05a3476904"},
|
||||
{file = "websockets-15.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc02b159b65c05f2ed9ec176b715b66918a674bd4daed48a9a7a590dd4be1aa"},
|
||||
{file = "websockets-15.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d2244d8ab24374bed366f9ff206e2619345f9cd7fe79aad5225f53faac28b6b1"},
|
||||
{file = "websockets-15.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3a302241fbe825a3e4fe07666a2ab513edfdc6d43ce24b79691b45115273b5e7"},
|
||||
{file = "websockets-15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:10552fed076757a70ba2c18edcbc601c7637b30cdfe8c24b65171e824c7d6081"},
|
||||
{file = "websockets-15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c53f97032b87a406044a1c33d1e9290cc38b117a8062e8a8b285175d7e2f99c9"},
|
||||
{file = "websockets-15.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1caf951110ca757b8ad9c4974f5cac7b8413004d2f29707e4d03a65d54cedf2b"},
|
||||
{file = "websockets-15.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bf1ab71f9f23b0a1d52ec1682a3907e0c208c12fef9c3e99d2b80166b17905f"},
|
||||
{file = "websockets-15.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bfcd3acc1a81f106abac6afd42327d2cf1e77ec905ae11dc1d9142a006a496b6"},
|
||||
{file = "websockets-15.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c8c5c8e1bac05ef3c23722e591ef4f688f528235e2480f157a9cfe0a19081375"},
|
||||
{file = "websockets-15.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:86bfb52a9cfbcc09aba2b71388b0a20ea5c52b6517c0b2e316222435a8cdab72"},
|
||||
{file = "websockets-15.0-cp313-cp313-win32.whl", hash = "sha256:26ba70fed190708551c19a360f9d7eca8e8c0f615d19a574292b7229e0ae324c"},
|
||||
{file = "websockets-15.0-cp313-cp313-win_amd64.whl", hash = "sha256:ae721bcc8e69846af00b7a77a220614d9b2ec57d25017a6bbde3a99473e41ce8"},
|
||||
{file = "websockets-15.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c348abc5924caa02a62896300e32ea80a81521f91d6db2e853e6b1994017c9f6"},
|
||||
{file = "websockets-15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5294fcb410ed0a45d5d1cdedc4e51a60aab5b2b3193999028ea94afc2f554b05"},
|
||||
{file = "websockets-15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c24ba103ecf45861e2e1f933d40b2d93f5d52d8228870c3e7bf1299cd1cb8ff1"},
|
||||
{file = "websockets-15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc8821a03bcfb36e4e4705316f6b66af28450357af8a575dc8f4b09bf02a3dee"},
|
||||
{file = "websockets-15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc5ae23ada6515f31604f700009e2df90b091b67d463a8401c1d8a37f76c1d7"},
|
||||
{file = "websockets-15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ac67b542505186b3bbdaffbc303292e1ee9c8729e5d5df243c1f20f4bb9057e"},
|
||||
{file = "websockets-15.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c86dc2068f1c5ca2065aca34f257bbf4f78caf566eb230f692ad347da191f0a1"},
|
||||
{file = "websockets-15.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:30cff3ef329682b6182c01c568f551481774c476722020b8f7d0daacbed07a17"},
|
||||
{file = "websockets-15.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:98dcf978d4c6048965d1762abd534c9d53bae981a035bfe486690ba11f49bbbb"},
|
||||
{file = "websockets-15.0-cp39-cp39-win32.whl", hash = "sha256:37d66646f929ae7c22c79bc73ec4074d6db45e6384500ee3e0d476daf55482a9"},
|
||||
{file = "websockets-15.0-cp39-cp39-win_amd64.whl", hash = "sha256:24d5333a9b2343330f0f4eb88546e2c32a7f5c280f8dd7d3cc079beb0901781b"},
|
||||
{file = "websockets-15.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b499caef4bca9cbd0bd23cd3386f5113ee7378094a3cb613a2fa543260fe9506"},
|
||||
{file = "websockets-15.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:17f2854c6bd9ee008c4b270f7010fe2da6c16eac5724a175e75010aacd905b31"},
|
||||
{file = "websockets-15.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89f72524033abbfde880ad338fd3c2c16e31ae232323ebdfbc745cbb1b3dcc03"},
|
||||
{file = "websockets-15.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1657a9eecb29d7838e3b415458cc494e6d1b194f7ac73a34aa55c6fb6c72d1f3"},
|
||||
{file = "websockets-15.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e413352a921f5ad5d66f9e2869b977e88d5103fc528b6deb8423028a2befd842"},
|
||||
{file = "websockets-15.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8561c48b0090993e3b2a54db480cab1d23eb2c5735067213bb90f402806339f5"},
|
||||
{file = "websockets-15.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:190bc6ef8690cd88232a038d1b15714c258f79653abad62f7048249b09438af3"},
|
||||
{file = "websockets-15.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:327adab7671f3726b0ba69be9e865bba23b37a605b585e65895c428f6e47e766"},
|
||||
{file = "websockets-15.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd8ef197c87afe0a9009f7a28b5dc613bfc585d329f80b7af404e766aa9e8c7"},
|
||||
{file = "websockets-15.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:789c43bf4a10cd067c24c321238e800b8b2716c863ddb2294d2fed886fa5a689"},
|
||||
{file = "websockets-15.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7394c0b7d460569c9285fa089a429f58465db930012566c03046f9e3ab0ed181"},
|
||||
{file = "websockets-15.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ea4f210422b912ebe58ef0ad33088bc8e5c5ff9655a8822500690abc3b1232d"},
|
||||
{file = "websockets-15.0-py3-none-any.whl", hash = "sha256:51ffd53c53c4442415b613497a34ba0aa7b99ac07f1e4a62db5dcd640ae6c3c3"},
|
||||
{file = "websockets-15.0.tar.gz", hash = "sha256:ca36151289a15b39d8d683fd8b7abbe26fc50be311066c5f8dcf3cb8cee107ab"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3164,4 +3184,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "b3a6142d6495bc4c8741e9411d29352af219851e4b84b263f991e1bb6db1614e"
|
||||
content-hash = "2d0a953383901fe12e97f6f56a76a9d8008788695425792eedbf739a18585188"
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[project]
|
||||
name = "auto-archiver"
|
||||
version = "0.13.3"
|
||||
version = "0.13.4"
|
||||
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
||||
|
||||
requires-python = ">=3.10,<3.13"
|
||||
@@ -63,6 +63,7 @@ dependencies = [
|
||||
pytest = "^8.3.4"
|
||||
autopep8 = "^2.3.1"
|
||||
pytest-loguru = "^0.4.0"
|
||||
pytest-mock = "^3.14.0"
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
sphinx = "^8.1.3"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"""
|
||||
from .metadata import Metadata
|
||||
from .media import Media
|
||||
from .module import BaseModule
|
||||
from .base_module import BaseModule
|
||||
|
||||
# cannot import ArchivingOrchestrator/Config to avoid circular dep
|
||||
# from .orchestrator import ArchivingOrchestrator
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from typing import Mapping, Any
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Mapping, Any, Type, TYPE_CHECKING
|
||||
from abc import ABC
|
||||
from copy import deepcopy, copy
|
||||
from tempfile import TemporaryDirectory
|
||||
from auto_archiver.utils import url as UrlUtil
|
||||
from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES
|
||||
|
||||
from loguru import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .module import ModuleFactory
|
||||
|
||||
class BaseModule(ABC):
|
||||
|
||||
"""
|
||||
@@ -17,41 +22,24 @@ class BaseModule(ABC):
|
||||
however modules can have a .setup() method to run any setup code
|
||||
(e.g. logging in to a site, spinning up a browser etc.)
|
||||
|
||||
See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
|
||||
See consts.MODULE_TYPES for the types of modules you can create, noting that
|
||||
a subclass can be of multiple types. For example, a module that extracts data from
|
||||
a website and stores it in a database would be both an 'extractor' and a 'database' module.
|
||||
|
||||
Each module is a python package, and should have a __manifest__.py file in the
|
||||
same directory as the module file. The __manifest__.py specifies the module information
|
||||
like name, author, version, dependencies etc. See BaseModule._DEFAULT_MANIFEST for the
|
||||
like name, author, version, dependencies etc. See DEFAULT_MANIFEST for the
|
||||
default manifest structure.
|
||||
|
||||
"""
|
||||
|
||||
MODULE_TYPES = [
|
||||
'feeder',
|
||||
'extractor',
|
||||
'enricher',
|
||||
'database',
|
||||
'storage',
|
||||
'formatter'
|
||||
]
|
||||
|
||||
_DEFAULT_MANIFEST = {
|
||||
'name': '', # the display name of the module
|
||||
'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
|
||||
'type': [], # the type of the module, can be one or more of BaseModule.MODULE_TYPES
|
||||
'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
|
||||
'description': '', # a description of the module
|
||||
'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
|
||||
'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
|
||||
'version': '1.0', # the version of the module
|
||||
'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
|
||||
}
|
||||
MODULE_TYPES = CONF_MODULE_TYPES
|
||||
|
||||
# NOTE: these here are declard as class variables, but they are overridden by the instance variables in the __init__ method
|
||||
config: Mapping[str, Any]
|
||||
authentication: Mapping[str, Mapping[str, str]]
|
||||
name: str
|
||||
module_factory: ModuleFactory
|
||||
|
||||
# this is set by the orchestrator prior to archiving
|
||||
tmp_dir: TemporaryDirectory = None
|
||||
|
||||
@@ -10,8 +10,8 @@ from ruamel.yaml import YAML, CommentedMap, add_representer
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from copy import deepcopy, copy
|
||||
from .module import BaseModule
|
||||
from copy import deepcopy
|
||||
from auto_archiver.core.consts import MODULE_TYPES
|
||||
|
||||
from typing import Any, List, Type, Tuple
|
||||
|
||||
@@ -21,7 +21,7 @@ EMPTY_CONFIG = _yaml.load("""
|
||||
# Auto Archiver Configuration
|
||||
# Steps are the modules that will be run in the order they are defined
|
||||
|
||||
steps:""" + "".join([f"\n {module}s: []" for module in BaseModule.MODULE_TYPES]) + \
|
||||
steps:""" + "".join([f"\n {module}s: []" for module in MODULE_TYPES]) + \
|
||||
"""
|
||||
|
||||
# Global configuration
|
||||
@@ -170,4 +170,4 @@ def store_yaml(config: CommentedMap, yaml_filename: str) -> None:
|
||||
|
||||
config_to_save.pop('urls', None)
|
||||
with open(yaml_filename, "w", encoding="utf-8") as outf:
|
||||
_yaml.dump(config_to_save, outf)
|
||||
_yaml.dump(config_to_save, outf)
|
||||
|
||||
23
src/auto_archiver/core/consts.py
Normal file
23
src/auto_archiver/core/consts.py
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
MODULE_TYPES = [
|
||||
'feeder',
|
||||
'extractor',
|
||||
'enricher',
|
||||
'database',
|
||||
'storage',
|
||||
'formatter'
|
||||
]
|
||||
|
||||
MANIFEST_FILE = "__manifest__.py"
|
||||
|
||||
DEFAULT_MANIFEST = {
|
||||
'name': '', # the display name of the module
|
||||
'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
|
||||
'type': [], # the type of the module, can be one or more of MODULE_TYPES
|
||||
'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
|
||||
'description': '', # a description of the module
|
||||
'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
|
||||
'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
|
||||
'version': '1.0', # the version of the module
|
||||
'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
|
||||
}
|
||||
@@ -6,7 +6,7 @@ by handling user configuration, validating the steps properties, and implementin
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
from typing import List, TYPE_CHECKING
|
||||
import shutil
|
||||
import ast
|
||||
import copy
|
||||
@@ -16,99 +16,113 @@ import os
|
||||
from os.path import join
|
||||
from loguru import logger
|
||||
import auto_archiver
|
||||
from .base_module import BaseModule
|
||||
from auto_archiver.core.consts import DEFAULT_MANIFEST, MANIFEST_FILE
|
||||
|
||||
_LAZY_LOADED_MODULES = {}
|
||||
|
||||
MANIFEST_FILE = "__manifest__.py"
|
||||
if TYPE_CHECKING:
|
||||
from .base_module import BaseModule
|
||||
|
||||
|
||||
def setup_paths(paths: list[str]) -> None:
|
||||
"""
|
||||
Sets up the paths for the modules to be loaded from
|
||||
|
||||
This is necessary for the modules to be imported correctly
|
||||
|
||||
"""
|
||||
for path in paths:
|
||||
# check path exists, if it doesn't, log a warning
|
||||
if not os.path.exists(path):
|
||||
logger.warning(f"Path '{path}' does not exist. Skipping...")
|
||||
continue
|
||||
HAS_SETUP_PATHS = False
|
||||
|
||||
# see odoo/module/module.py -> initialize_sys_path
|
||||
if path not in auto_archiver.modules.__path__:
|
||||
auto_archiver.modules.__path__.append(path)
|
||||
class ModuleFactory:
|
||||
|
||||
# sort based on the length of the path, so that the longest path is last in the list
|
||||
auto_archiver.modules.__path__ = sorted(auto_archiver.modules.__path__, key=len, reverse=True)
|
||||
def __init__(self):
|
||||
self._lazy_modules = {}
|
||||
|
||||
def get_module(module_name: str, config: dict) -> BaseModule:
|
||||
"""
|
||||
Gets and sets up a module using the provided config
|
||||
|
||||
This will actually load and instantiate the module, and load all its dependencies (i.e. not lazy)
|
||||
|
||||
"""
|
||||
return get_module_lazy(module_name).load(config)
|
||||
def setup_paths(self, paths: list[str]) -> None:
|
||||
"""
|
||||
Sets up the paths for the modules to be loaded from
|
||||
|
||||
This is necessary for the modules to be imported correctly
|
||||
|
||||
"""
|
||||
global HAS_SETUP_PATHS
|
||||
|
||||
def get_module_lazy(module_name: str, suppress_warnings: bool = False) -> LazyBaseModule:
|
||||
"""
|
||||
Lazily loads a module, returning a LazyBaseModule
|
||||
|
||||
This has all the information about the module, but does not load the module itself or its dependencies
|
||||
|
||||
To load an actual module, call .setup() on a lazy module
|
||||
|
||||
"""
|
||||
if module_name in _LAZY_LOADED_MODULES:
|
||||
return _LAZY_LOADED_MODULES[module_name]
|
||||
|
||||
available = available_modules(limit_to_modules=[module_name], suppress_warnings=suppress_warnings)
|
||||
if not available:
|
||||
raise IndexError(f"Module '{module_name}' not found. Are you sure it's installed/exists?")
|
||||
return available[0]
|
||||
|
||||
def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= [], suppress_warnings: bool = False) -> List[LazyBaseModule]:
|
||||
|
||||
# search through all valid 'modules' paths. Default is 'modules' in the current directory
|
||||
|
||||
# see odoo/modules/module.py -> get_modules
|
||||
def is_really_module(module_path):
|
||||
if os.path.isfile(join(module_path, MANIFEST_FILE)):
|
||||
return True
|
||||
|
||||
all_modules = []
|
||||
|
||||
for module_folder in auto_archiver.modules.__path__:
|
||||
# walk through each module in module_folder and check if it has a valid manifest
|
||||
try:
|
||||
possible_modules = os.listdir(module_folder)
|
||||
except FileNotFoundError:
|
||||
logger.warning(f"Module folder {module_folder} does not exist")
|
||||
continue
|
||||
|
||||
for possible_module in possible_modules:
|
||||
if limit_to_modules and possible_module not in limit_to_modules:
|
||||
for path in paths:
|
||||
# check path exists, if it doesn't, log a warning
|
||||
if not os.path.exists(path):
|
||||
logger.warning(f"Path '{path}' does not exist. Skipping...")
|
||||
continue
|
||||
|
||||
possible_module_path = join(module_folder, possible_module)
|
||||
if not is_really_module(possible_module_path):
|
||||
# see odoo/module/module.py -> initialize_sys_path
|
||||
if path not in auto_archiver.modules.__path__:
|
||||
if HAS_SETUP_PATHS == True:
|
||||
logger.warning(f"You are attempting to re-initialise the module paths with: '{path}' for a 2nd time. \
|
||||
This could lead to unexpected behaviour. It is recommended to only use a single modules path. \
|
||||
If you wish to load modules from different paths then load a 2nd python interpreter (e.g. using multiprocessing).")
|
||||
auto_archiver.modules.__path__.append(path)
|
||||
|
||||
# sort based on the length of the path, so that the longest path is last in the list
|
||||
auto_archiver.modules.__path__ = sorted(auto_archiver.modules.__path__, key=len, reverse=True)
|
||||
|
||||
HAS_SETUP_PATHS = True
|
||||
|
||||
def get_module(self, module_name: str, config: dict) -> BaseModule:
|
||||
"""
|
||||
Gets and sets up a module using the provided config
|
||||
|
||||
This will actually load and instantiate the module, and load all its dependencies (i.e. not lazy)
|
||||
|
||||
"""
|
||||
return self.get_module_lazy(module_name).load(config)
|
||||
|
||||
def get_module_lazy(self, module_name: str, suppress_warnings: bool = False) -> LazyBaseModule:
|
||||
"""
|
||||
Lazily loads a module, returning a LazyBaseModule
|
||||
|
||||
This has all the information about the module, but does not load the module itself or its dependencies
|
||||
|
||||
To load an actual module, call .setup() on a lazy module
|
||||
|
||||
"""
|
||||
if module_name in self._lazy_modules:
|
||||
return self._lazy_modules[module_name]
|
||||
|
||||
available = self.available_modules(limit_to_modules=[module_name], suppress_warnings=suppress_warnings)
|
||||
if not available:
|
||||
raise IndexError(f"Module '{module_name}' not found. Are you sure it's installed/exists?")
|
||||
return available[0]
|
||||
|
||||
def available_modules(self, limit_to_modules: List[str]= [], suppress_warnings: bool = False) -> List[LazyBaseModule]:
|
||||
|
||||
# search through all valid 'modules' paths. Default is 'modules' in the current directory
|
||||
|
||||
# see odoo/modules/module.py -> get_modules
|
||||
def is_really_module(module_path):
|
||||
if os.path.isfile(join(module_path, MANIFEST_FILE)):
|
||||
return True
|
||||
|
||||
all_modules = []
|
||||
|
||||
for module_folder in auto_archiver.modules.__path__:
|
||||
# walk through each module in module_folder and check if it has a valid manifest
|
||||
try:
|
||||
possible_modules = os.listdir(module_folder)
|
||||
except FileNotFoundError:
|
||||
logger.warning(f"Module folder {module_folder} does not exist")
|
||||
continue
|
||||
if _LAZY_LOADED_MODULES.get(possible_module):
|
||||
continue
|
||||
lazy_module = LazyBaseModule(possible_module, possible_module_path)
|
||||
|
||||
_LAZY_LOADED_MODULES[possible_module] = lazy_module
|
||||
for possible_module in possible_modules:
|
||||
if limit_to_modules and possible_module not in limit_to_modules:
|
||||
continue
|
||||
|
||||
all_modules.append(lazy_module)
|
||||
|
||||
if not suppress_warnings:
|
||||
for module in limit_to_modules:
|
||||
if not any(module == m.name for m in all_modules):
|
||||
logger.warning(f"Module '{module}' not found. Are you sure it's installed?")
|
||||
possible_module_path = join(module_folder, possible_module)
|
||||
if not is_really_module(possible_module_path):
|
||||
continue
|
||||
if self._lazy_modules.get(possible_module):
|
||||
continue
|
||||
lazy_module = LazyBaseModule(possible_module, possible_module_path, factory=self)
|
||||
|
||||
return all_modules
|
||||
self._lazy_modules[possible_module] = lazy_module
|
||||
|
||||
all_modules.append(lazy_module)
|
||||
|
||||
if not suppress_warnings:
|
||||
for module in limit_to_modules:
|
||||
if not any(module == m.name for m in all_modules):
|
||||
logger.warning(f"Module '{module}' not found. Are you sure it's installed?")
|
||||
|
||||
return all_modules
|
||||
|
||||
@dataclass
|
||||
class LazyBaseModule:
|
||||
@@ -123,14 +137,16 @@ class LazyBaseModule:
|
||||
type: list
|
||||
description: str
|
||||
path: str
|
||||
module_factory: ModuleFactory
|
||||
|
||||
_manifest: dict = None
|
||||
_instance: BaseModule = None
|
||||
_entry_point: str = None
|
||||
|
||||
def __init__(self, module_name, path):
|
||||
def __init__(self, module_name, path, factory: ModuleFactory):
|
||||
self.name = module_name
|
||||
self.path = path
|
||||
self.module_factory = factory
|
||||
|
||||
@property
|
||||
def entry_point(self):
|
||||
@@ -161,7 +177,7 @@ class LazyBaseModule:
|
||||
return self._manifest
|
||||
# print(f"Loading manifest for module {module_path}")
|
||||
# load the manifest file
|
||||
manifest = copy.deepcopy(BaseModule._DEFAULT_MANIFEST)
|
||||
manifest = copy.deepcopy(DEFAULT_MANIFEST)
|
||||
|
||||
with open(join(self.path, MANIFEST_FILE)) as f:
|
||||
try:
|
||||
@@ -189,13 +205,14 @@ class LazyBaseModule:
|
||||
# clear out any empty strings that a user may have erroneously added
|
||||
continue
|
||||
if not check(dep):
|
||||
logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
|
||||
logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. \
|
||||
Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
|
||||
exit(1)
|
||||
|
||||
def check_python_dep(dep):
|
||||
# first check if it's a module:
|
||||
try:
|
||||
m = get_module_lazy(dep, suppress_warnings=True)
|
||||
m = self.module_factory.get_module_lazy(dep, suppress_warnings=True)
|
||||
try:
|
||||
# we must now load this module and set it up with the config
|
||||
m.load(config)
|
||||
@@ -230,19 +247,21 @@ class LazyBaseModule:
|
||||
__import__(f'{qualname}.{file_name}', fromlist=[self.entry_point])
|
||||
# finally, get the class instance
|
||||
instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)()
|
||||
if not getattr(instance, 'name', None):
|
||||
instance.name = self.name
|
||||
|
||||
if not getattr(instance, 'display_name', None):
|
||||
instance.display_name = self.display_name
|
||||
|
||||
self._instance = instance
|
||||
|
||||
# set the name, display name and module factory
|
||||
instance.name = self.name
|
||||
instance.display_name = self.display_name
|
||||
instance.module_factory = self.module_factory
|
||||
|
||||
# merge the default config with the user config
|
||||
default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
|
||||
|
||||
config[self.name] = default_config | config.get(self.name, {})
|
||||
instance.config_setup(config)
|
||||
instance.setup()
|
||||
|
||||
# save the instance for future easy loading
|
||||
self._instance = instance
|
||||
return instance
|
||||
|
||||
def __repr__(self):
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Generator, Union, List, Type
|
||||
from typing import Generator, Union, List, Type, TYPE_CHECKING
|
||||
from urllib.parse import urlparse
|
||||
from ipaddress import ip_address
|
||||
import argparse
|
||||
@@ -21,12 +21,14 @@ from rich_argparse import RichHelpFormatter
|
||||
from .metadata import Metadata, Media
|
||||
from auto_archiver.version import __version__
|
||||
from .config import _yaml, read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG, DefaultValidatingParser
|
||||
from .module import available_modules, LazyBaseModule, get_module, setup_paths
|
||||
from .module import ModuleFactory, LazyBaseModule
|
||||
from . import validators, Feeder, Extractor, Database, Storage, Formatter, Enricher
|
||||
from .module import BaseModule
|
||||
|
||||
from .consts import MODULE_TYPES
|
||||
from loguru import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .base_module import BaseModule
|
||||
from .module import LazyBaseModule
|
||||
|
||||
DEFAULT_CONFIG_FILE = "orchestration.yaml"
|
||||
|
||||
@@ -87,6 +89,12 @@ class UniqueAppendAction(argparse.Action):
|
||||
|
||||
class ArchivingOrchestrator:
|
||||
|
||||
# instance variables
|
||||
module_factory: ModuleFactory
|
||||
setup_finished: bool
|
||||
logger_id: int
|
||||
|
||||
# instance variables, used for convenience to access modules by step
|
||||
feeders: List[Type[Feeder]]
|
||||
extractors: List[Type[Extractor]]
|
||||
enrichers: List[Type[Enricher]]
|
||||
@@ -94,6 +102,11 @@ class ArchivingOrchestrator:
|
||||
storages: List[Type[Storage]]
|
||||
formatters: List[Type[Formatter]]
|
||||
|
||||
def __init__(self):
|
||||
self.module_factory = ModuleFactory()
|
||||
self.setup_finished = False
|
||||
self.logger_id = None
|
||||
|
||||
def setup_basic_parser(self):
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="auto-archiver",
|
||||
@@ -125,7 +138,7 @@ class ArchivingOrchestrator:
|
||||
)
|
||||
self.add_modules_args(modules_parser)
|
||||
cli_modules, unused_args = modules_parser.parse_known_args(unused_args)
|
||||
for module_type in BaseModule.MODULE_TYPES:
|
||||
for module_type in MODULE_TYPES:
|
||||
yaml_config['steps'][f"{module_type}s"] = getattr(cli_modules, f"{module_type}s", []) or yaml_config['steps'].get(f"{module_type}s", [])
|
||||
|
||||
parser = DefaultValidatingParser(
|
||||
@@ -147,15 +160,15 @@ class ArchivingOrchestrator:
|
||||
# TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
|
||||
enabled_modules = []
|
||||
# first loads the modules from the config file, then from the command line
|
||||
for module_type in BaseModule.MODULE_TYPES:
|
||||
for module_type in MODULE_TYPES:
|
||||
enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))
|
||||
|
||||
# clear out duplicates, but keep the order
|
||||
enabled_modules = list(dict.fromkeys(enabled_modules))
|
||||
avail_modules = available_modules(with_manifest=True, limit_to_modules=enabled_modules, suppress_warnings=True)
|
||||
avail_modules = self.module_factory.available_modules(limit_to_modules=enabled_modules, suppress_warnings=True)
|
||||
self.add_individual_module_args(avail_modules, parser)
|
||||
elif basic_config.mode == 'simple':
|
||||
simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
|
||||
simple_modules = [module for module in self.module_factory.available_modules() if not module.requires_setup]
|
||||
self.add_individual_module_args(simple_modules, parser)
|
||||
|
||||
# for simple mode, we use the cli_feeder and any modules that don't require setup
|
||||
@@ -168,7 +181,7 @@ class ArchivingOrchestrator:
|
||||
yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
|
||||
else:
|
||||
# load all modules, they're not using the 'simple' mode
|
||||
self.add_individual_module_args(available_modules(with_manifest=True), parser)
|
||||
self.add_individual_module_args(self.module_factory.available_modules(), parser)
|
||||
|
||||
parser.set_defaults(**to_dot_notation(yaml_config))
|
||||
|
||||
@@ -198,7 +211,7 @@ class ArchivingOrchestrator:
|
||||
parser = self.parser
|
||||
|
||||
# Module loading from the command line
|
||||
for module_type in BaseModule.MODULE_TYPES:
|
||||
for module_type in MODULE_TYPES:
|
||||
parser.add_argument(f'--{module_type}s', dest=f'{module_type}s', nargs='+', help=f'the {module_type}s to use', default=[], action=UniqueAppendAction)
|
||||
|
||||
def add_additional_args(self, parser: argparse.ArgumentParser = None):
|
||||
@@ -224,7 +237,7 @@ class ArchivingOrchestrator:
|
||||
def add_individual_module_args(self, modules: list[LazyBaseModule] = None, parser: argparse.ArgumentParser = None) -> None:
|
||||
|
||||
if not modules:
|
||||
modules = available_modules(with_manifest=True)
|
||||
modules = self.module_factory.available_modules()
|
||||
|
||||
for module in modules:
|
||||
|
||||
@@ -266,11 +279,18 @@ class ArchivingOrchestrator:
|
||||
|
||||
def setup_logging(self, config):
|
||||
# setup loguru logging
|
||||
logger.remove(0) # remove the default logger
|
||||
try:
|
||||
logger.remove(0) # remove the default logger
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
logging_config = config['logging']
|
||||
logger.add(sys.stderr, level=logging_config['level'])
|
||||
if log_file := logging_config['file']:
|
||||
logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
|
||||
|
||||
# add other logging info
|
||||
if self.logger_id is None: # note - need direct comparison to None since need to consider falsy value 0
|
||||
self.logger_id = logger.add(sys.stderr, level=logging_config['level'])
|
||||
if log_file := logging_config['file']:
|
||||
logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
|
||||
|
||||
def install_modules(self, modules_by_type):
|
||||
"""
|
||||
@@ -280,7 +300,7 @@ class ArchivingOrchestrator:
|
||||
"""
|
||||
|
||||
invalid_modules = []
|
||||
for module_type in BaseModule.MODULE_TYPES:
|
||||
for module_type in MODULE_TYPES:
|
||||
|
||||
step_items = []
|
||||
modules_to_load = modules_by_type[f"{module_type}s"]
|
||||
@@ -325,7 +345,7 @@ class ArchivingOrchestrator:
|
||||
if module in invalid_modules:
|
||||
continue
|
||||
try:
|
||||
loaded_module: BaseModule = get_module(module, self.config)
|
||||
loaded_module: BaseModule = self.module_factory.get_module(module, self.config)
|
||||
except (KeyboardInterrupt, Exception) as e:
|
||||
logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}")
|
||||
if module_type == 'extractor' and loaded_module.name == module:
|
||||
@@ -351,14 +371,17 @@ class ArchivingOrchestrator:
|
||||
def setup_config(self, args: list) -> dict:
|
||||
"""
|
||||
Sets up the configuration file, merging the default config with the user's config
|
||||
|
||||
This function should only ever be run once.
|
||||
"""
|
||||
|
||||
self.setup_basic_parser()
|
||||
|
||||
# parse the known arguments for now (basically, we want the config file)
|
||||
basic_config, unused_args = self.basic_parser.parse_known_args(args)
|
||||
|
||||
# setup any custom module paths, so they'll show in the help and for arg parsing
|
||||
setup_paths(basic_config.module_paths)
|
||||
self.module_factory.setup_paths(basic_config.module_paths)
|
||||
|
||||
# if help flag was called, then show the help
|
||||
if basic_config.help:
|
||||
@@ -370,16 +393,29 @@ class ArchivingOrchestrator:
|
||||
|
||||
def setup(self, args: list):
|
||||
"""
|
||||
Main entry point for the orchestrator, sets up the basic parser, loads the config file, and sets up the complete parser
|
||||
Function to configure all setup of the orchestrator: setup configs and load modules.
|
||||
|
||||
This method should only ever be called once
|
||||
"""
|
||||
|
||||
if self.setup_finished:
|
||||
logger.warning("The `setup_config()` function should only ever be run once. \
|
||||
If you need to re-run the setup, please re-instantiate a new instance of the orchestrator. \
|
||||
For code implementatations, you should call .setup_config() once then you may call .feed() \
|
||||
multiple times to archive multiple URLs.")
|
||||
return
|
||||
|
||||
self.setup_basic_parser()
|
||||
self.config = self.setup_config(args)
|
||||
|
||||
logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
|
||||
self.install_modules(self.config['steps'])
|
||||
|
||||
# log out the modules that were loaded
|
||||
for module_type in BaseModule.MODULE_TYPES:
|
||||
for module_type in MODULE_TYPES:
|
||||
logger.info(f"{module_type.upper()}S: " + ", ".join(m.display_name for m in getattr(self, f"{module_type}s")))
|
||||
|
||||
self.setup_finished = True
|
||||
|
||||
def _command_line_run(self, args: list) -> Generator[Metadata]:
|
||||
"""
|
||||
|
||||
@@ -14,7 +14,7 @@ from auto_archiver.utils.misc import random_str
|
||||
|
||||
from auto_archiver.core import Media, BaseModule, Metadata
|
||||
from auto_archiver.modules.hash_enricher.hash_enricher import HashEnricher
|
||||
from auto_archiver.core.module import get_module
|
||||
|
||||
class Storage(BaseModule):
|
||||
|
||||
"""
|
||||
@@ -74,7 +74,7 @@ class Storage(BaseModule):
|
||||
filename = random_str(24)
|
||||
elif filename_generator == "static":
|
||||
# load the hash_enricher module
|
||||
he = get_module(HashEnricher, self.config)
|
||||
he = self.module_factory.get_module(HashEnricher, self.config)
|
||||
hd = he.calculate_hash(media.filename)
|
||||
filename = hd[:24]
|
||||
else:
|
||||
|
||||
@@ -1 +1 @@
|
||||
from atlos_db import AtlosDb
|
||||
from .atlos_db import AtlosDb
|
||||
1
src/auto_archiver/modules/atlos_storage/__init__.py
Normal file
1
src/auto_archiver/modules/atlos_storage/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .atlos_storage import AtlosStorage
|
||||
@@ -281,7 +281,7 @@ class GenericExtractor(Extractor):
|
||||
# set up auth
|
||||
auth = self.auth_for_site(url, extract_cookies=False)
|
||||
|
||||
# order of importance: username/pasword -> api_key -> cookie -> cookie_from_browser -> cookies_file
|
||||
# order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file
|
||||
if auth:
|
||||
if 'username' in auth and 'password' in auth:
|
||||
logger.debug(f'Using provided auth username and password for {url}')
|
||||
@@ -290,7 +290,7 @@ class GenericExtractor(Extractor):
|
||||
elif 'cookie' in auth:
|
||||
logger.debug(f'Using provided auth cookie for {url}')
|
||||
yt_dlp.utils.std_headers['cookie'] = auth['cookie']
|
||||
elif 'cookie_from_browser' in auth:
|
||||
elif 'cookies_from_browser' in auth:
|
||||
logger.debug(f'Using extracted cookies from browser {auth["cookies_from_browser"]} for {url}')
|
||||
ydl_options['cookiesfrombrowser'] = auth['cookies_from_browser']
|
||||
elif 'cookies_file' in auth:
|
||||
|
||||
@@ -10,7 +10,6 @@ from auto_archiver.version import __version__
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.core import Formatter
|
||||
from auto_archiver.utils.misc import random_str
|
||||
from auto_archiver.core.module import get_module
|
||||
|
||||
class HtmlFormatter(Formatter):
|
||||
environment: Environment = None
|
||||
@@ -50,7 +49,7 @@ class HtmlFormatter(Formatter):
|
||||
final_media = Media(filename=html_path, _mimetype="text/html")
|
||||
|
||||
# get the already instantiated hash_enricher module
|
||||
he = get_module('hash_enricher', self.config)
|
||||
he = self.module_factory.get_module('hash_enricher', self.config)
|
||||
if len(hd := he.calculate_hash(final_media.filename)):
|
||||
final_media.set("hash", f"{he.algorithm}:{hd}")
|
||||
|
||||
|
||||
@@ -77,13 +77,14 @@ class InstagramTbotExtractor(Extractor):
|
||||
chat, since_id = self._send_url_to_bot(url)
|
||||
message = self._process_messages(chat, since_id, tmp_dir, result)
|
||||
|
||||
# This may be outdated and replaced by the below message, but keeping until confirmed
|
||||
if "You must enter a URL to a post" in message:
|
||||
logger.debug(f"invalid link {url=} for {self.name}: {message}")
|
||||
return False
|
||||
# # TODO: It currently returns this as a success - is that intentional?
|
||||
# if "Media not found or unavailable" in message:
|
||||
# logger.debug(f"invalid link {url=} for {self.name}: {message}")
|
||||
# return False
|
||||
|
||||
if "Media not found or unavailable" in message:
|
||||
logger.debug(f"No media found for link {url=} for {self.name}: {message}")
|
||||
return False
|
||||
|
||||
if message:
|
||||
result.set_content(message).set_title(message[:128])
|
||||
|
||||
@@ -11,6 +11,10 @@ from auto_archiver.core import Media, Metadata
|
||||
|
||||
class ScreenshotEnricher(Enricher):
|
||||
|
||||
def __init__(self, webdriver_factory=None):
|
||||
super().__init__()
|
||||
self.webdriver_factory = webdriver_factory or Webdriver
|
||||
|
||||
def enrich(self, to_enrich: Metadata) -> None:
|
||||
url = to_enrich.get_url()
|
||||
|
||||
@@ -20,7 +24,8 @@ class ScreenshotEnricher(Enricher):
|
||||
|
||||
logger.debug(f"Enriching screenshot for {url=}")
|
||||
auth = self.auth_for_site(url)
|
||||
with Webdriver(self.width, self.height, self.timeout, facebook_accept_cookies='facebook.com' in url,
|
||||
with self.webdriver_factory(
|
||||
self.width, self.height, self.timeout, facebook_accept_cookies='facebook.com' in url,
|
||||
http_proxy=self.http_proxy, print_options=self.print_options, auth=auth) as driver:
|
||||
try:
|
||||
driver.get(url)
|
||||
@@ -38,3 +43,4 @@ class ScreenshotEnricher(Enricher):
|
||||
logger.info("TimeoutException loading page for screenshot")
|
||||
except Exception as e:
|
||||
logger.error(f"Got error while loading webdriver for screenshot enricher: {e}")
|
||||
|
||||
|
||||
@@ -7,8 +7,12 @@
|
||||
"bin": ["ffmpeg"]
|
||||
},
|
||||
"configs": {
|
||||
"thumbnails_per_minute": {"default": 60, "help": "how many thumbnails to generate per minute of video, can be limited by max_thumbnails"},
|
||||
"max_thumbnails": {"default": 16, "help": "limit the number of thumbnails to generate per video, 0 means no limit"},
|
||||
"thumbnails_per_minute": {"default": 60,
|
||||
"type": "int",
|
||||
"help": "how many thumbnails to generate per minute of video, can be limited by max_thumbnails"},
|
||||
"max_thumbnails": {"default": 16,
|
||||
"type": "int",
|
||||
"help": "limit the number of thumbnails to generate per video, 0 means no limit"},
|
||||
},
|
||||
"description": """
|
||||
Generates thumbnails for video files to provide visual previews.
|
||||
|
||||
@@ -42,7 +42,7 @@ class ThumbnailEnricher(Enricher):
|
||||
logger.error(f"error getting duration of video {m.filename}: {e}")
|
||||
return
|
||||
|
||||
num_thumbs = int(min(max(1, duration * self.thumbnails_per_minute), self.max_thumbnails))
|
||||
num_thumbs = int(min(max(1, (duration / 60) * self.thumbnails_per_minute), self.max_thumbnails))
|
||||
timestamps = [duration / (num_thumbs + 1) * i for i in range(1, num_thumbs + 1)]
|
||||
|
||||
thumbnails_media = []
|
||||
|
||||
@@ -4,7 +4,6 @@ from loguru import logger
|
||||
|
||||
from auto_archiver.core import Enricher
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.core.module import get_module
|
||||
|
||||
class WhisperEnricher(Enricher):
|
||||
"""
|
||||
@@ -15,7 +14,7 @@ class WhisperEnricher(Enricher):
|
||||
|
||||
def setup(self) -> None:
|
||||
self.stores = self.config['steps']['storages']
|
||||
self.s3 = get_module("s3_storage", self.config)
|
||||
self.s3 = self.module_factory.get_module("s3_storage", self.config)
|
||||
if not "s3_storage" in self.stores:
|
||||
logger.error("WhisperEnricher: To use the WhisperEnricher you need to use S3Storage so files are accessible publicly to the whisper service being called.")
|
||||
return
|
||||
@@ -29,8 +28,7 @@ class WhisperEnricher(Enricher):
|
||||
job_results = {}
|
||||
for i, m in enumerate(to_enrich.media):
|
||||
if m.is_video() or m.is_audio():
|
||||
# TODO: this used to pass all storage items to store now
|
||||
# Now only passing S3, the rest will get added later in the usual order (?)
|
||||
# Only storing S3, the rest will get added later in the usual order (?)
|
||||
m.store(url=url, metadata=to_enrich, storages=[self.s3])
|
||||
try:
|
||||
job_id = self.submit_job(m)
|
||||
|
||||
@@ -46,7 +46,7 @@ def dump_payload(p):
|
||||
|
||||
|
||||
def update_nested_dict(dictionary, update_dict):
|
||||
# takes 2 dicts and overwrites the first with the second only on the changed balues
|
||||
# takes 2 dicts and overwrites the first with the second only on the changed values
|
||||
for key, value in update_dict.items():
|
||||
if key in dictionary and isinstance(value, dict) and isinstance(dictionary[key], dict):
|
||||
update_nested_dict(dictionary[key], value)
|
||||
|
||||
@@ -3,12 +3,14 @@ pytest conftest file, for shared fixtures and configuration
|
||||
"""
|
||||
import os
|
||||
import pickle
|
||||
from datetime import datetime, timezone
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Dict, Tuple
|
||||
import hashlib
|
||||
|
||||
import pytest
|
||||
from auto_archiver.core.metadata import Metadata
|
||||
from auto_archiver.core.module import get_module, _LAZY_LOADED_MODULES
|
||||
from auto_archiver.core.module import ModuleFactory
|
||||
|
||||
# Test names inserted into this list will be run last. This is useful for expensive/costly tests
|
||||
# that you only want to run if everything else succeeds (e.g. API calls). The order here is important
|
||||
@@ -20,19 +22,19 @@ TESTS_TO_RUN_LAST = ['test_twitter_api_archiver']
|
||||
def setup_module(request):
|
||||
def _setup_module(module_name, config={}):
|
||||
|
||||
module_factory = ModuleFactory()
|
||||
|
||||
if isinstance(module_name, type):
|
||||
# get the module name:
|
||||
# if the class does not have a .name, use the name of the parent folder
|
||||
module_name = module_name.__module__.rsplit(".",2)[-2]
|
||||
|
||||
m = get_module(module_name, {module_name: config})
|
||||
|
||||
m = module_factory.get_module(module_name, {module_name: config})
|
||||
# add the tmp_dir to the module
|
||||
tmp_dir = TemporaryDirectory()
|
||||
m.tmp_dir = tmp_dir.name
|
||||
|
||||
|
||||
def cleanup():
|
||||
_LAZY_LOADED_MODULES.pop(module_name)
|
||||
tmp_dir.cleanup()
|
||||
request.addfinalizer(cleanup)
|
||||
|
||||
@@ -122,10 +124,36 @@ def pytest_runtest_setup(item):
|
||||
def unpickle():
|
||||
"""
|
||||
Returns a helper function that unpickles a file
|
||||
** gets the file from the test_files directory: tests/data/test_files **
|
||||
** gets the file from the test_files directory: tests/data/ **
|
||||
"""
|
||||
def _unpickle(path):
|
||||
test_data_dir = os.path.join(os.path.dirname(__file__), "data", "test_files")
|
||||
with open(os.path.join(test_data_dir, path), "rb") as f:
|
||||
with open(os.path.join("tests/data", path), "rb") as f:
|
||||
return pickle.load(f)
|
||||
return _unpickle
|
||||
return _unpickle
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_binary_dependencies(mocker):
|
||||
mock_shutil_which = mocker.patch("shutil.which")
|
||||
# Mock all binary dependencies as available
|
||||
mock_shutil_which.return_value = "/usr/bin/fake_binary"
|
||||
return mock_shutil_which
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_datetime():
|
||||
return datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_sleep(mocker):
|
||||
"""Globally mock time.sleep to avoid delays."""
|
||||
return mocker.patch("time.sleep")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metadata():
|
||||
metadata = Metadata()
|
||||
metadata.set("_processed_at", "2021-01-01T00:00:00")
|
||||
metadata.set_url("https://example.com")
|
||||
return metadata
|
||||
BIN
tests/data/metadata_enricher_exif.pickle
Normal file
BIN
tests/data/metadata_enricher_exif.pickle
Normal file
Binary file not shown.
BIN
tests/data/metadata_enricher_ytshort_expected.pickle
Normal file
BIN
tests/data/metadata_enricher_ytshort_expected.pickle
Normal file
Binary file not shown.
BIN
tests/data/metadata_enricher_ytshort_input.pickle
Normal file
BIN
tests/data/metadata_enricher_ytshort_input.pickle
Normal file
Binary file not shown.
59
tests/databases/test_api_db.py
Normal file
59
tests/databases/test_api_db.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata
|
||||
from auto_archiver.modules.api_db import AAApiDb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def api_db(setup_module):
|
||||
configs: dict = {
|
||||
"api_endpoint": "https://api.example.com",
|
||||
"api_token": "test-token",
|
||||
"public": False,
|
||||
"author_id": "Someone",
|
||||
"group_id": "123",
|
||||
"use_api_cache": True,
|
||||
"store_results": True,
|
||||
"tags": "[]",
|
||||
}
|
||||
return setup_module(AAApiDb, configs)
|
||||
|
||||
|
||||
def test_fetch_no_cache(api_db, metadata):
|
||||
# Test fetch
|
||||
api_db.use_api_cache = False
|
||||
assert api_db.fetch(metadata) is None
|
||||
|
||||
|
||||
def test_fetch_fail_status(api_db, metadata, mocker):
|
||||
# Test response fail in fetch method
|
||||
mock_get = mocker.patch("auto_archiver.modules.api_db.api_db.requests.get")
|
||||
mock_get.return_value.status_code = 400
|
||||
mock_get.return_value.json.return_value = {}
|
||||
mock_error = mocker.patch("loguru.logger.error")
|
||||
assert api_db.fetch(metadata) is False
|
||||
mock_error.assert_called_once_with("AA API FAIL (400): {}")
|
||||
|
||||
|
||||
def test_fetch(api_db, metadata, mocker):
|
||||
# Test successful fetch method
|
||||
mock_get = mocker.patch("auto_archiver.modules.api_db.api_db.requests.get")
|
||||
mock_datetime = mocker.patch("auto_archiver.core.metadata.datetime.datetime")
|
||||
mock_datetime.now.return_value = "2021-01-01T00:00:00"
|
||||
mock_get.return_value.status_code = 200
|
||||
mock_get.return_value.json.return_value = [{"result": {}}, {"result":
|
||||
{'media': [], 'metadata': {'_processed_at': '2021-01-01T00:00:00', 'url': 'https://example.com'},
|
||||
'status': 'no archiver'}}]
|
||||
assert api_db.fetch(metadata) == metadata
|
||||
|
||||
|
||||
def test_done_success(api_db, metadata, mocker):
|
||||
mock_post = mocker.patch("auto_archiver.modules.api_db.api_db.requests.post")
|
||||
mock_post.return_value.status_code = 201
|
||||
api_db.done(metadata)
|
||||
mock_post.assert_called_once()
|
||||
mock_post.assert_called_once_with("https://api.example.com/interop/submit-archive",
|
||||
json={'author_id': 'Someone', 'url': 'https://example.com',
|
||||
'public': False, 'group_id': '123', 'tags': ['[', ']'], 'result': '{"status": "no archiver", "metadata": {"_processed_at": "2021-01-01T00:00:00", "url": "https://example.com"}, "media": []}'},
|
||||
headers={'Authorization': 'Bearer test-token'})
|
||||
|
||||
110
tests/databases/test_atlos_db.py
Normal file
110
tests/databases/test_atlos_db.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
from auto_archiver.core import Metadata
|
||||
from auto_archiver.modules.atlos_db import AtlosDb
|
||||
|
||||
|
||||
class FakeAPIResponse:
|
||||
"""Simulate a response object."""
|
||||
|
||||
def __init__(self, data: dict, raise_error: bool = False) -> None:
|
||||
self._data = data
|
||||
self.raise_error = raise_error
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
if self.raise_error:
|
||||
raise Exception("HTTP error")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def atlos_db(setup_module) -> AtlosDb:
|
||||
"""Fixture for AtlosDb."""
|
||||
configs: dict = {
|
||||
"api_token": "abc123",
|
||||
"atlos_url": "https://platform.atlos.org",
|
||||
}
|
||||
return setup_module("atlos_db", configs)
|
||||
|
||||
|
||||
def test_failed_no_atlos_id(atlos_db, metadata, mocker):
|
||||
"""Test failed() skips posting when no atlos_id present."""
|
||||
post_mock = mocker.patch("requests.post")
|
||||
atlos_db.failed(metadata, "failure reason")
|
||||
post_mock.assert_not_called()
|
||||
|
||||
|
||||
def test_failed_with_atlos_id(atlos_db, metadata, mocker):
|
||||
"""Test failed() posts failure when atlos_id is present."""
|
||||
metadata.set("atlos_id", 42)
|
||||
fake_resp = FakeAPIResponse({}, raise_error=False)
|
||||
post_mock = mocker.patch("requests.post", return_value=fake_resp)
|
||||
atlos_db.failed(metadata, "failure reason")
|
||||
expected_url = (
|
||||
f"{atlos_db.atlos_url}/api/v2/source_material/metadata/42/auto_archiver"
|
||||
)
|
||||
expected_headers = {"Authorization": f"Bearer {atlos_db.api_token}"}
|
||||
expected_json = {
|
||||
"metadata": {"processed": True, "status": "error", "error": "failure reason"}
|
||||
}
|
||||
post_mock.assert_called_once_with(
|
||||
expected_url, headers=expected_headers, json=expected_json
|
||||
)
|
||||
|
||||
|
||||
def test_failed_http_error(atlos_db, metadata, mocker):
|
||||
"""Test failed() raises exception on HTTP error."""
|
||||
metadata.set("atlos_id", 42)
|
||||
fake_resp = FakeAPIResponse({}, raise_error=True)
|
||||
mocker.patch("requests.post", return_value=fake_resp)
|
||||
with pytest.raises(Exception, match="HTTP error"):
|
||||
atlos_db.failed(metadata, "failure reason")
|
||||
|
||||
|
||||
def test_fetch_returns_false(atlos_db):
|
||||
"""Test fetch() always returns False."""
|
||||
item = Metadata()
|
||||
assert atlos_db.fetch(item) is False
|
||||
|
||||
|
||||
def test_done_no_atlos_id(atlos_db, mocker):
|
||||
"""Test done() skips posting when no atlos_id present."""
|
||||
item = Metadata().set_url("http://example.com")
|
||||
post_mock = mocker.patch("requests.post")
|
||||
atlos_db.done(item)
|
||||
post_mock.assert_not_called()
|
||||
|
||||
|
||||
def test_done_with_atlos_id(atlos_db, metadata, mocker):
|
||||
"""Test done() posts success when atlos_id is present."""
|
||||
metadata.set("atlos_id", 99)
|
||||
now = datetime.now()
|
||||
metadata.set("timestamp", now)
|
||||
fake_resp = FakeAPIResponse({}, raise_error=False)
|
||||
post_mock = mocker.patch("requests.post", return_value=fake_resp)
|
||||
atlos_db.done(metadata)
|
||||
expected_url = (
|
||||
f"{atlos_db.atlos_url}/api/v2/source_material/metadata/99/auto_archiver"
|
||||
)
|
||||
expected_headers = {"Authorization": f"Bearer {atlos_db.api_token}"}
|
||||
expected_results = metadata.metadata.copy()
|
||||
expected_results["timestamp"] = now.isoformat()
|
||||
expected_json = {
|
||||
"metadata": {
|
||||
"processed": True,
|
||||
"status": "success",
|
||||
"results": expected_results,
|
||||
}
|
||||
}
|
||||
post_mock.assert_called_once_with(
|
||||
expected_url, headers=expected_headers, json=expected_json
|
||||
)
|
||||
|
||||
|
||||
def test_done_http_error(atlos_db, metadata, mocker):
|
||||
"""Test done() raises exception on HTTP error."""
|
||||
metadata.set("atlos_id", 123)
|
||||
fake_resp = FakeAPIResponse({}, raise_error=True)
|
||||
mocker.patch("requests.post", return_value=fake_resp)
|
||||
with pytest.raises(Exception, match="HTTP error"):
|
||||
atlos_db.done(metadata)
|
||||
@@ -1,6 +1,4 @@
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata, Media
|
||||
@@ -9,8 +7,8 @@ from auto_archiver.modules.gsheet_feeder import GWorksheet
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_gworksheet():
|
||||
mock_gworksheet = MagicMock(spec=GWorksheet)
|
||||
def mock_gworksheet(mocker):
|
||||
mock_gworksheet = mocker.MagicMock(spec=GWorksheet)
|
||||
mock_gworksheet.col_exists.return_value = True
|
||||
mock_gworksheet.get_cell.return_value = ""
|
||||
mock_gworksheet.get_row.return_value = {}
|
||||
@@ -18,14 +16,14 @@ def mock_gworksheet():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_metadata():
|
||||
metadata: Metadata = MagicMock(spec=Metadata)
|
||||
def mock_metadata(mocker):
|
||||
metadata: Metadata = mocker.MagicMock(spec=Metadata)
|
||||
metadata.get_url.return_value = "http://example.com"
|
||||
metadata.status = "done"
|
||||
metadata.get_title.return_value = "Example Title"
|
||||
metadata.get.return_value = "Example Content"
|
||||
metadata.get_timestamp.return_value = "2025-01-01T00:00:00"
|
||||
metadata.get_final_media.return_value = MagicMock(spec=Media)
|
||||
metadata.get_final_media.return_value = mocker.MagicMock(spec=Media)
|
||||
metadata.get_all_media.return_value = []
|
||||
metadata.get_media_by_id.return_value = None
|
||||
metadata.get_first_image.return_value = None
|
||||
@@ -47,21 +45,21 @@ def metadata():
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_media():
|
||||
def mock_media(mocker):
|
||||
"""Fixture for a mock Media object."""
|
||||
mock_media = MagicMock(spec=Media)
|
||||
mock_media = mocker.MagicMock(spec=Media)
|
||||
mock_media.urls = ["http://example.com/media"]
|
||||
mock_media.get.return_value = "not-calculated"
|
||||
return mock_media
|
||||
|
||||
@pytest.fixture
|
||||
def gsheets_db(mock_gworksheet, setup_module):
|
||||
def gsheets_db(mock_gworksheet, setup_module, mocker):
|
||||
db = setup_module("gsheet_db", {
|
||||
"allow_worksheets": "set()",
|
||||
"block_worksheets": "set()",
|
||||
"use_sheet_names_in_stored_paths": "True",
|
||||
})
|
||||
db._retrieve_gsheet = MagicMock(return_value=(mock_gworksheet, 1))
|
||||
db._retrieve_gsheet = mocker.MagicMock(return_value=(mock_gworksheet, 1))
|
||||
return db
|
||||
|
||||
|
||||
@@ -109,27 +107,26 @@ def test_aborted(gsheets_db, mock_metadata, mock_gworksheet):
|
||||
mock_gworksheet.set_cell.assert_called_once_with(1, 'status', '')
|
||||
|
||||
|
||||
def test_done(gsheets_db, metadata, mock_gworksheet, expected_calls):
|
||||
with patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00'):
|
||||
gsheets_db.done(metadata)
|
||||
def test_done(gsheets_db, metadata, mock_gworksheet, expected_calls, mocker):
|
||||
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
gsheets_db.done(metadata)
|
||||
mock_gworksheet.batch_set_cell.assert_called_once_with(expected_calls)
|
||||
|
||||
|
||||
def test_done_cached(gsheets_db, metadata, mock_gworksheet):
|
||||
with patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00'):
|
||||
gsheets_db.done(metadata, cached=True)
|
||||
def test_done_cached(gsheets_db, metadata, mock_gworksheet, mocker):
|
||||
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
gsheets_db.done(metadata, cached=True)
|
||||
|
||||
# Verify the status message includes "[cached]"
|
||||
call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
|
||||
assert any(call[2].startswith("[cached]") for call in call_args)
|
||||
|
||||
|
||||
def test_done_missing_media(gsheets_db, metadata, mock_gworksheet):
|
||||
def test_done_missing_media(gsheets_db, metadata, mock_gworksheet, mocker):
|
||||
# clear media from metadata
|
||||
metadata.media = []
|
||||
with patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp",
|
||||
return_value='2025-02-01T00:00:00+00:00'):
|
||||
gsheets_db.done(metadata)
|
||||
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
gsheets_db.done(metadata)
|
||||
# Verify nothing media-related gets updated
|
||||
call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
|
||||
media_fields = {'archive', 'screenshot', 'thumbnail', 'wacz', 'replaywebpage'}
|
||||
|
||||
@@ -2,7 +2,7 @@ import pytest
|
||||
|
||||
from auto_archiver.modules.hash_enricher import HashEnricher
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.core.module import get_module_lazy
|
||||
from auto_archiver.core.module import ModuleFactory
|
||||
|
||||
@pytest.mark.parametrize("algorithm, filename, expected_hash", [
|
||||
("SHA-256", "tests/data/testfile_1.txt", "1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"),
|
||||
@@ -22,7 +22,7 @@ def test_default_config_values(setup_module):
|
||||
|
||||
def test_config():
|
||||
# test default config
|
||||
c = get_module_lazy('hash_enricher').configs
|
||||
c = ModuleFactory().get_module_lazy('hash_enricher').configs
|
||||
assert c["algorithm"]["default"] == "SHA-256"
|
||||
assert c["chunksize"]["default"] == 16000000
|
||||
assert c["algorithm"]["choices"] == ["SHA-256", "SHA3-512"]
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import datetime
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -9,29 +8,21 @@ from auto_archiver.modules.meta_enricher import MetaEnricher
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_metadata():
|
||||
def mock_metadata(mocker):
|
||||
"""Creates a mock Metadata object."""
|
||||
mock: Metadata = MagicMock(spec=Metadata)
|
||||
mock: Metadata = mocker.MagicMock(spec=Metadata)
|
||||
mock.get_url.return_value = "https://example.com"
|
||||
mock.is_empty.return_value = False # Default to not empty
|
||||
mock.get_all_media.return_value = []
|
||||
return mock
|
||||
|
||||
@pytest.fixture
|
||||
def mock_media():
|
||||
def mock_media(mocker):
|
||||
"""Creates a mock Media object."""
|
||||
mock: Media = MagicMock(spec=Media)
|
||||
mock: Media = mocker.MagicMock(spec=Media)
|
||||
mock.filename = "mock_file.txt"
|
||||
return mock
|
||||
|
||||
@pytest.fixture
|
||||
def metadata():
|
||||
m = Metadata()
|
||||
m.set_url("https://example.com")
|
||||
m.set_title("Test Title")
|
||||
m.set_content("Test Content")
|
||||
return m
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def meta_enricher(setup_module):
|
||||
@@ -90,14 +81,14 @@ def test_enrich_file_sizes_no_media(meta_enricher, metadata):
|
||||
assert metadata.get("total_size") == "0.0 bytes"
|
||||
|
||||
|
||||
def test_enrich_archive_duration(meta_enricher, metadata):
|
||||
def test_enrich_archive_duration(meta_enricher, metadata, mocker):
|
||||
# Set fixed "processed at" time in the past
|
||||
processed_at = datetime.now(timezone.utc) - timedelta(minutes=10, seconds=30)
|
||||
metadata.set("_processed_at", processed_at)
|
||||
# patch datetime
|
||||
with patch("datetime.datetime") as mock_datetime:
|
||||
mock_now = datetime.now(timezone.utc)
|
||||
mock_datetime.now.return_value = mock_now
|
||||
meta_enricher.enrich_archive_duration(metadata)
|
||||
mock_datetime = mocker.patch("datetime.datetime")
|
||||
mock_now = datetime.now(timezone.utc)
|
||||
mock_datetime.now.return_value = mock_now
|
||||
meta_enricher.enrich_archive_duration(metadata)
|
||||
|
||||
assert metadata.get("archive_duration_seconds") == 630
|
||||
88
tests/enrichers/test_metadata_enricher.py
Normal file
88
tests/enrichers/test_metadata_enricher.py
Normal file
@@ -0,0 +1,88 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Media
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_media(mocker):
|
||||
"""Creates a mock Media object."""
|
||||
mock: Media = mocker.MagicMock(spec=Media)
|
||||
mock.filename = "mock_file.txt"
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def enricher(setup_module, mock_binary_dependencies):
|
||||
return setup_module("metadata_enricher", {})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"output,expected",
|
||||
[
|
||||
("Key1: Value1\nKey2: Value2", {"Key1": "Value1", "Key2": "Value2"}),
|
||||
("InvalidLine", {}),
|
||||
("", {}),
|
||||
],
|
||||
)
|
||||
def test_get_metadata(enricher, output, expected, mocker):
|
||||
mock_run = mocker.patch("subprocess.run")
|
||||
mock_run.return_value.stdout = output
|
||||
mock_run.return_value.stderr = ""
|
||||
mock_run.return_value.returncode = 0
|
||||
|
||||
result = enricher.get_metadata("test.jpg")
|
||||
assert result == expected
|
||||
mock_run.assert_called_once_with(
|
||||
["exiftool", "test.jpg"], capture_output=True, text=True
|
||||
)
|
||||
|
||||
|
||||
def test_get_metadata_exiftool_not_found(enricher, mocker):
|
||||
mock_run = mocker.patch("subprocess.run")
|
||||
mock_run.side_effect = FileNotFoundError
|
||||
result = enricher.get_metadata("test.jpg")
|
||||
assert result == {}
|
||||
|
||||
|
||||
def test_enrich_sets_metadata(enricher, mocker):
|
||||
media1 = mocker.Mock(filename="img1.jpg")
|
||||
media2 = mocker.Mock(filename="img2.jpg")
|
||||
metadata = mocker.Mock()
|
||||
metadata.media = [media1, media2]
|
||||
enricher.get_metadata = lambda f: {"key": "value"} if f == "img1.jpg" else {}
|
||||
|
||||
enricher.enrich(metadata)
|
||||
|
||||
media1.set.assert_called_once_with("metadata", {"key": "value"})
|
||||
media2.set.assert_not_called()
|
||||
assert metadata.media == [media1, media2]
|
||||
|
||||
|
||||
def test_enrich_empty_media(enricher, mocker):
|
||||
metadata = mocker.Mock()
|
||||
metadata.media = []
|
||||
# Should not raise errors
|
||||
enricher.enrich(metadata)
|
||||
|
||||
|
||||
def test_get_metadata_error_handling(enricher, mocker):
|
||||
mocker.patch("subprocess.run", side_effect=Exception("Test error"))
|
||||
mock_log = mocker.patch("loguru.logger.error")
|
||||
result = enricher.get_metadata("test.jpg")
|
||||
assert result == {}
|
||||
assert "Error occurred: " in mock_log.call_args[0][0]
|
||||
|
||||
|
||||
def test_metadata_pickle(enricher, unpickle, mocker):
|
||||
mock_run = mocker.patch("subprocess.run")
|
||||
# Uses pickled values
|
||||
mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
|
||||
metadata = unpickle("metadata_enricher_ytshort_input.pickle")
|
||||
expected = unpickle("metadata_enricher_ytshort_expected.pickle")
|
||||
enricher.enrich(metadata)
|
||||
expected_media = expected.media
|
||||
actual_media = metadata.media
|
||||
assert len(expected_media) == len(actual_media)
|
||||
assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata")
|
||||
|
||||
78
tests/enrichers/test_pdq_hash_enricher.py
Normal file
78
tests/enrichers/test_pdq_hash_enricher.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import pytest
|
||||
from PIL import UnidentifiedImageError
|
||||
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.modules.pdq_hash_enricher import PdqHashEnricher
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def enricher(setup_module):
|
||||
return setup_module("pdq_hash_enricher", {})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metadata_with_images():
|
||||
m = Metadata()
|
||||
m.set_url("https://example.com")
|
||||
m.add_media(Media(filename="image1.jpg", key="image1"))
|
||||
m.add_media(Media(filename="image2.jpg", key="image2"))
|
||||
return m
|
||||
|
||||
|
||||
def test_successful_enrich(metadata_with_images, mocker):
|
||||
with (
|
||||
mocker.patch("pdqhash.compute", return_value=([1, 0, 1, 0] * 64, 100)),
|
||||
mocker.patch("PIL.Image.open"),
|
||||
mocker.patch.object(Media, "is_image", return_value=True) as mock_is_image,
|
||||
):
|
||||
enricher = PdqHashEnricher()
|
||||
enricher.enrich(metadata_with_images)
|
||||
|
||||
# Ensure the hash is set for image media
|
||||
for media in metadata_with_images.media:
|
||||
assert media.get("pdq_hash") is not None
|
||||
|
||||
|
||||
def test_enrich_skip_non_image(metadata_with_images, mocker):
|
||||
mocker.patch.object(Media, "is_image", return_value=False)
|
||||
mock_pdq = mocker.patch("pdqhash.compute")
|
||||
|
||||
enricher = PdqHashEnricher()
|
||||
enricher.enrich(metadata_with_images)
|
||||
mock_pdq.assert_not_called()
|
||||
|
||||
|
||||
def test_enrich_handles_corrupted_image(metadata_with_images, mocker):
|
||||
mocker.patch("PIL.Image.open", side_effect=UnidentifiedImageError("Corrupted image"))
|
||||
mock_pdq = mocker.patch("pdqhash.compute")
|
||||
mock_logger = mocker.patch("loguru.logger.error")
|
||||
enricher = PdqHashEnricher()
|
||||
enricher.enrich(metadata_with_images)
|
||||
|
||||
assert mock_logger.call_count == len(metadata_with_images.media)
|
||||
mock_pdq.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"media_id, should_have_hash",
|
||||
[
|
||||
("screenshot", False),
|
||||
("warc-file-123", False),
|
||||
("regular-image", True),
|
||||
]
|
||||
)
|
||||
def test_enrich_excludes_by_filetype(media_id, should_have_hash, mocker):
|
||||
metadata = Metadata()
|
||||
metadata.set_url("https://example.com")
|
||||
metadata.add_media(Media(filename="image.jpg").set("id", media_id))
|
||||
|
||||
mocker.patch("pdqhash.compute", return_value=([1, 0, 1, 0] * 64, 100))
|
||||
mocker.patch("PIL.Image.open")
|
||||
mocker.patch.object(Media, "is_image", return_value=True)
|
||||
|
||||
enricher = PdqHashEnricher()
|
||||
enricher.enrich(metadata)
|
||||
|
||||
media_item = metadata.media[0]
|
||||
assert (media_item.get("pdq_hash") is not None) == should_have_hash
|
||||
|
||||
195
tests/enrichers/test_screenshot_enricher.py
Normal file
195
tests/enrichers/test_screenshot_enricher.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import base64
|
||||
|
||||
import pytest
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.modules.screenshot_enricher import ScreenshotEnricher
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_selenium_env(mocker):
|
||||
"""Patches Selenium calls and driver checks in one place."""
|
||||
|
||||
# Patch external dependencies
|
||||
mock_which = mocker.patch("shutil.which")
|
||||
mock_driver_class = mocker.patch("auto_archiver.utils.webdriver.CookieSettingDriver")
|
||||
mock_binary_paths = mocker.patch("selenium.webdriver.common.selenium_manager.SeleniumManager.binary_paths")
|
||||
mock_is_file = mocker.patch("pathlib.Path.is_file", return_value=True)
|
||||
mock_popen = mocker.patch("subprocess.Popen")
|
||||
mock_is_connectable = mocker.patch("selenium.webdriver.common.service.Service.is_connectable", return_value=True)
|
||||
mock_firefox_options = mocker.patch("selenium.webdriver.FirefoxOptions")
|
||||
# Define side effect for `shutil.which`
|
||||
def mock_which_side_effect(dep):
|
||||
return "/mock/geckodriver" if dep == "geckodriver" else None
|
||||
mock_which.side_effect = mock_which_side_effect
|
||||
|
||||
# Mock binary paths
|
||||
mock_binary_paths.return_value = {
|
||||
"driver_path": "/mock/driver",
|
||||
"browser_path": "/mock/browser",
|
||||
}
|
||||
# Mock `subprocess.Popen`
|
||||
mock_proc = mocker.MagicMock()
|
||||
mock_proc.poll.return_value = None
|
||||
mock_popen.return_value = mock_proc
|
||||
# Mock `CookieSettingDriver`
|
||||
mock_driver = mocker.MagicMock()
|
||||
mock_driver_class.return_value = mock_driver
|
||||
# Mock `FirefoxOptions`
|
||||
mock_options_instance = mocker.MagicMock()
|
||||
mock_firefox_options.return_value = mock_options_instance
|
||||
yield mock_driver, mock_driver_class, mock_options_instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def common_patches(tmp_path, mocker):
|
||||
"""Patches common utilities used across multiple tests."""
|
||||
mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=False)
|
||||
mocker.patch("os.path.join", return_value=str(tmp_path / "test.png"))
|
||||
mocker.patch("time.sleep")
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def screenshot_enricher(setup_module, mock_binary_dependencies) -> ScreenshotEnricher:
|
||||
configs: dict = {
|
||||
"width": 1280,
|
||||
"height": 720,
|
||||
"timeout": 60,
|
||||
"sleep_before_screenshot": 4,
|
||||
"http_proxy": "",
|
||||
"save_to_pdf": "False",
|
||||
"print_options": {},
|
||||
}
|
||||
return setup_module("screenshot_enricher", configs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metadata_with_video():
|
||||
m = Metadata()
|
||||
m.set_url("https://example.com")
|
||||
m.add_media(Media(filename="video.mp4").set("id", "video1"))
|
||||
return m
|
||||
|
||||
|
||||
def test_enrich_adds_screenshot(
|
||||
screenshot_enricher,
|
||||
metadata_with_video,
|
||||
mock_selenium_env,
|
||||
common_patches,
|
||||
tmp_path,
|
||||
):
|
||||
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
||||
screenshot_enricher.enrich(metadata_with_video)
|
||||
mock_driver_class.assert_called_once_with(
|
||||
cookies=None,
|
||||
cookiejar=None,
|
||||
facebook_accept_cookies=False,
|
||||
options=mock_options_instance,
|
||||
)
|
||||
# Verify the actual calls on the returned mock_driver
|
||||
mock_driver.get.assert_called_once_with("https://example.com")
|
||||
mock_driver.save_screenshot.assert_called_once_with(str(tmp_path / "test.png"))
|
||||
# Check that the media was added (2 = original video + screenshot)
|
||||
assert len(metadata_with_video.media) == 2
|
||||
assert metadata_with_video.media[1].properties.get("id") == "screenshot"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url,is_auth",
|
||||
[
|
||||
("https://example.com", False),
|
||||
("https://private.com", True),
|
||||
],
|
||||
)
|
||||
def test_enrich_auth_wall(
|
||||
screenshot_enricher,
|
||||
metadata_with_video,
|
||||
mock_selenium_env,
|
||||
common_patches,
|
||||
url,
|
||||
is_auth,
|
||||
mocker
|
||||
):
|
||||
# Testing with and without is_auth_wall
|
||||
mock_driver, mock_driver_class, _ = mock_selenium_env
|
||||
mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=is_auth)
|
||||
metadata_with_video.set_url(url)
|
||||
screenshot_enricher.enrich(metadata_with_video)
|
||||
|
||||
if is_auth:
|
||||
mock_driver.get.assert_not_called()
|
||||
assert len(metadata_with_video.media) == 1
|
||||
assert metadata_with_video.media[0].properties.get("id") == "video1"
|
||||
else:
|
||||
mock_driver.get.assert_called_once_with(url)
|
||||
assert len(metadata_with_video.media) == 2
|
||||
assert metadata_with_video.media[1].properties.get("id") == "screenshot"
|
||||
|
||||
|
||||
def test_handle_timeout_exception(
|
||||
screenshot_enricher, metadata_with_video, mock_selenium_env, mocker
|
||||
):
|
||||
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
||||
|
||||
mock_driver.get.side_effect = TimeoutException
|
||||
mock_log = mocker.patch("loguru.logger.info")
|
||||
screenshot_enricher.enrich(metadata_with_video)
|
||||
mock_log.assert_called_once_with("TimeoutException loading page for screenshot")
|
||||
assert len(metadata_with_video.media) == 1
|
||||
|
||||
|
||||
def test_handle_general_exception(
|
||||
screenshot_enricher, metadata_with_video, mock_selenium_env, mocker
|
||||
):
|
||||
"""Test proper handling of unexpected general exceptions"""
|
||||
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
||||
# Simulate a generic exception when save_screenshot is called
|
||||
mock_driver.get.return_value = None
|
||||
mock_driver.save_screenshot.side_effect = Exception("Unexpected Error")
|
||||
|
||||
mock_log = mocker.patch("loguru.logger.error")
|
||||
screenshot_enricher.enrich(metadata_with_video)
|
||||
# Verify that the exception was logged with the log
|
||||
mock_log.assert_called_once_with(
|
||||
"Got error while loading webdriver for screenshot enricher: Unexpected Error"
|
||||
)
|
||||
# And no new media was added due to the error
|
||||
assert len(metadata_with_video.media) == 1
|
||||
|
||||
|
||||
def test_pdf_creation(mocker, screenshot_enricher, metadata_with_video, mock_selenium_env):
|
||||
"""Test PDF creation when save_to_pdf is enabled"""
|
||||
mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
|
||||
# Override the save_to_pdf option
|
||||
screenshot_enricher.save_to_pdf = True
|
||||
# Mock the print_page method to return base64-encoded content
|
||||
mock_driver.print_page.return_value = base64.b64encode(b"fake_pdf_content").decode("utf-8")
|
||||
# Patch functions with mocker
|
||||
mock_os_path_join = mocker.patch("os.path.join", side_effect=lambda *args: f"{args[-1]}")
|
||||
mock_random_str = mocker.patch(
|
||||
"auto_archiver.modules.screenshot_enricher.screenshot_enricher.random_str",
|
||||
return_value="fixed123",
|
||||
)
|
||||
mock_open = mocker.patch("builtins.open", new_callable=mocker.mock_open)
|
||||
mock_log_error = mocker.patch("loguru.logger.error")
|
||||
|
||||
screenshot_enricher.enrich(metadata_with_video)
|
||||
# Verify screenshot and PDF creation
|
||||
mock_driver.save_screenshot.assert_called_once()
|
||||
mock_driver.print_page.assert_called_once_with(mock_driver.print_options)
|
||||
# Check that PDF file was opened and written
|
||||
mock_open.assert_any_call("pdf_fixed123.pdf", "wb")
|
||||
|
||||
# Ensure both screenshot and PDF were added as media
|
||||
assert len(metadata_with_video.media) == 3
|
||||
assert metadata_with_video.media[1].properties.get("id") == "screenshot"
|
||||
assert metadata_with_video.media[2].properties.get("id") == "pdf"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_files(tmp_path):
|
||||
yield
|
||||
for file in tmp_path.iterdir():
|
||||
file.unlink()
|
||||
54
tests/enrichers/test_ssl_enricher.py
Normal file
54
tests/enrichers/test_ssl_enricher.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import ssl
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata, Media
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def enricher(setup_module):
|
||||
configs: dict = {
|
||||
"skip_when_nothing_archived": "True",
|
||||
}
|
||||
return setup_module("ssl_enricher", configs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metadata():
|
||||
m = Metadata()
|
||||
m.set_url("https://example.com")
|
||||
m.add_media(Media("tests/data/testfile_1.txt"))
|
||||
m.add_media(Media("tests/data/testfile_2.txt"))
|
||||
return m
|
||||
|
||||
|
||||
def test_http_raises(metadata, enricher):
|
||||
metadata.set_url("http://example.com")
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
enricher.enrich(metadata)
|
||||
assert "Invalid URL scheme" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_empty_metadata(metadata, enricher):
|
||||
metadata.media = []
|
||||
assert enricher.enrich(metadata) is None
|
||||
|
||||
|
||||
def test_ssl_enrich(metadata, enricher, mocker):
|
||||
mocker.patch("ssl.get_server_certificate", return_value="TEST_CERT")
|
||||
mock_file = mocker.patch("builtins.open", mocker.mock_open())
|
||||
media_len_before = len(metadata.media)
|
||||
enricher.enrich(metadata)
|
||||
|
||||
ssl.get_server_certificate.assert_called_once_with(("example.com", 443))
|
||||
mock_file.assert_called_once_with(f"{enricher.tmp_dir}/example-com.pem", "w")
|
||||
mock_file().write.assert_called_once_with("TEST_CERT")
|
||||
assert len(metadata.media) == media_len_before + 1
|
||||
# Ensure the certificate is added to metadata
|
||||
assert any(media.filename.endswith("example-com.pem") for media in metadata.media)
|
||||
|
||||
|
||||
def test_ssl_error_handling(enricher, metadata, mocker):
|
||||
mocker.patch("ssl.get_server_certificate", side_effect=ssl.SSLError("SSL error"))
|
||||
with pytest.raises(ssl.SSLError, match="SSL error"):
|
||||
enricher.enrich(metadata)
|
||||
|
||||
148
tests/enrichers/test_thumbnail_enricher.py
Normal file
148
tests/enrichers/test_thumbnail_enricher.py
Normal file
@@ -0,0 +1,148 @@
|
||||
import pytest
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.modules.thumbnail_enricher import ThumbnailEnricher
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def thumbnail_enricher(setup_module, mock_binary_dependencies) -> ThumbnailEnricher:
|
||||
config: dict = {
|
||||
"thumbnails_per_minute": 60,
|
||||
"max_thumbnails": 4,
|
||||
}
|
||||
return setup_module("thumbnail_enricher", config)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metadata_with_video():
|
||||
m = Metadata()
|
||||
m.set_url("https://example.com")
|
||||
m.add_media(Media(filename="video.mp4").set("id", "video1"))
|
||||
return m
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ffmpeg_environment(mocker):
|
||||
# Mocking all the ffmpeg calls in one place
|
||||
mock_ffmpeg_input = mocker.patch("ffmpeg.input")
|
||||
mock_makedirs = mocker.patch("os.makedirs")
|
||||
mocker.patch.object(Media, "is_video", return_value=True),
|
||||
mock_probe = mocker.patch(
|
||||
"ffmpeg.probe",
|
||||
return_value={
|
||||
"streams": [
|
||||
{"codec_type": "video", "duration": "120"}
|
||||
] # Default 2-minute duration, but can override in tests
|
||||
},
|
||||
)
|
||||
mock_output = mocker.MagicMock()
|
||||
mock_ffmpeg_input.return_value.filter.return_value.output.return_value = (
|
||||
mock_output
|
||||
)
|
||||
|
||||
return {
|
||||
"mock_ffmpeg_input": mock_ffmpeg_input,
|
||||
"mock_makedirs": mock_makedirs,
|
||||
"mock_output": mock_output,
|
||||
"mock_probe": mock_probe,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("thumbnails_per_minute, max_thumbnails, expected_count", [
|
||||
(10, 5, 5), # Capped at max_thumbnails
|
||||
(1, 10, 2), # Less than max_thumbnails
|
||||
(60, 7, 7), # Matches exactly
|
||||
])
|
||||
def test_enrich_thumbnail_limits(
|
||||
thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment,
|
||||
thumbnails_per_minute, max_thumbnails, expected_count
|
||||
):
|
||||
thumbnail_enricher.thumbnails_per_minute = thumbnails_per_minute
|
||||
thumbnail_enricher.max_thumbnails = max_thumbnails
|
||||
|
||||
thumbnail_enricher.enrich(metadata_with_video)
|
||||
|
||||
assert mock_ffmpeg_environment["mock_output"].run.call_count == expected_count
|
||||
thumbnails = metadata_with_video.media[0].get("thumbnails")
|
||||
assert len(thumbnails) == expected_count
|
||||
|
||||
def test_enrich_handles_probe_failure(thumbnail_enricher, metadata_with_video, mocker):
|
||||
|
||||
mocker.patch("ffmpeg.probe", side_effect=Exception("Probe error"))
|
||||
mocker.patch("os.makedirs")
|
||||
mock_logger = mocker.patch("loguru.logger.error")
|
||||
mocker.patch.object(Media, "is_video", return_value=True)
|
||||
|
||||
thumbnail_enricher.enrich(metadata_with_video)
|
||||
# Ensure error was logged
|
||||
mock_logger.assert_called_with(
|
||||
f"error getting duration of video video.mp4: Probe error"
|
||||
)
|
||||
# Ensure no thumbnails were created
|
||||
thumbnails = metadata_with_video.media[0].get("thumbnails")
|
||||
assert thumbnails is None
|
||||
|
||||
|
||||
def test_enrich_skips_non_video_files(thumbnail_enricher, metadata_with_video, mocker):
|
||||
mocker.patch.object(Media, "is_video", return_value=False)
|
||||
mock_ffmpeg = mocker.patch("ffmpeg.input")
|
||||
thumbnail_enricher.enrich(metadata_with_video)
|
||||
mock_ffmpeg.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("thumbnails_per_minute,max_thumbnails,expected_count", [
|
||||
(60, 5, 5), # caught by max
|
||||
(60, 20, 10), # caught by t/min
|
||||
(0, 20, 1), # test min caught (1)
|
||||
(11, 20, 1), # test min caught (1)
|
||||
(12, 20, 2), # test caught by t/min
|
||||
])
|
||||
def test_enrich_handles_short_video(
|
||||
thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment, thumbnails_per_minute, max_thumbnails, expected_count, mocker
|
||||
):
|
||||
# override mock duration
|
||||
fake_duration = 10
|
||||
mocker.patch(
|
||||
"ffmpeg.probe",
|
||||
return_value={ "streams": [{"codec_type": "video", "duration": str(fake_duration)}]},
|
||||
)
|
||||
thumbnail_enricher.thumbnails_per_minute = thumbnails_per_minute
|
||||
thumbnail_enricher.max_thumbnails = max_thumbnails
|
||||
|
||||
thumbnail_enricher.enrich(metadata_with_video)
|
||||
assert mock_ffmpeg_environment["mock_output"].run.call_count == expected_count
|
||||
thumbnails = metadata_with_video.media[0].get("thumbnails")
|
||||
assert len(thumbnails) == expected_count
|
||||
|
||||
|
||||
def test_uses_existing_duration(
|
||||
thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment
|
||||
):
|
||||
metadata_with_video.media[0].set("duration", 60)
|
||||
thumbnail_enricher.enrich(metadata_with_video)
|
||||
mock_ffmpeg_environment["mock_probe"].assert_not_called()
|
||||
assert mock_ffmpeg_environment["mock_output"].run.call_count == 4
|
||||
|
||||
|
||||
def test_enrich_metadata_structure(thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment, mocker):
|
||||
fake_duration = 120
|
||||
mocker.patch("ffmpeg.probe", return_value={'streams': [{'codec_type': 'video', 'duration': str(fake_duration)}]})
|
||||
thumbnail_enricher.thumbnails_per_minute = 2
|
||||
thumbnail_enricher.max_thumbnails = 4
|
||||
|
||||
thumbnail_enricher.enrich(metadata_with_video)
|
||||
|
||||
media_item = metadata_with_video.media[0]
|
||||
thumbnails = media_item.get("thumbnails")
|
||||
|
||||
# Assert normal metadata
|
||||
assert media_item.get("id") == "video1"
|
||||
assert media_item.get("duration") == fake_duration
|
||||
# Evenly spaced timestamps
|
||||
expected_timestamps = ["24.000s", "48.000s", "72.000s", "96.000s"]
|
||||
assert thumbnails is not None
|
||||
assert len(thumbnails) == 4
|
||||
|
||||
for index, thumbnail in enumerate(thumbnails):
|
||||
assert thumbnail.filename is not None
|
||||
assert thumbnail.properties.get("id") == f"thumbnail_{index}"
|
||||
assert thumbnail.properties.get("timestamp") == expected_timestamps[index]
|
||||
112
tests/enrichers/test_wacz_enricher.py
Normal file
112
tests/enrichers/test_wacz_enricher.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import os
|
||||
from zipfile import ZipFile
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata, Media
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wacz_enricher(setup_module, mock_binary_dependencies):
|
||||
configs: dict = {
|
||||
"profile": None,
|
||||
"docker_commands": None,
|
||||
"timeout": 120,
|
||||
"extract_media": False,
|
||||
"extract_screenshot": True,
|
||||
"socks_proxy_host": None,
|
||||
"socks_proxy_port": None,
|
||||
"proxy_server": None,
|
||||
}
|
||||
wacz = setup_module("wacz_enricher", configs)
|
||||
return wacz
|
||||
|
||||
|
||||
def test_setup_without_docker(wacz_enricher, mocker):
|
||||
mocker.patch.dict(os.environ, {"RUNNING_IN_DOCKER": "1"}, clear=True)
|
||||
wacz_enricher.setup()
|
||||
assert not wacz_enricher.docker_in_docker
|
||||
|
||||
|
||||
def test_setup_with_docker(wacz_enricher, mocker):
|
||||
mocker.patch.dict(os.environ, {"WACZ_ENABLE_DOCKER": "1"}, clear=True)
|
||||
wacz_enricher.setup()
|
||||
assert wacz_enricher.use_docker
|
||||
|
||||
|
||||
def test_already_ran(wacz_enricher, metadata, mocker):
|
||||
metadata.add_media(Media("test.wacz"), id="browsertrix")
|
||||
mock_log = mocker.patch("loguru.logger.info")
|
||||
assert wacz_enricher.enrich(metadata) is True
|
||||
assert "WACZ enricher had already been executed" in mock_log.call_args[0][0]
|
||||
|
||||
|
||||
def test_basic_call_execution(wacz_enricher, mocker):
|
||||
mock_run = mocker.patch("subprocess.run")
|
||||
mock_run.return_value = mocker.Mock(returncode=0)
|
||||
metadata = Metadata().set_url("https://example.com")
|
||||
wacz_enricher.enrich(metadata)
|
||||
assert mock_run.called
|
||||
# Checks that the url is passed to the cmd
|
||||
assert "--url https://example.com" in " ".join(mock_run.call_args[0][0])
|
||||
|
||||
|
||||
def test_download_success(wacz_enricher, mocker) -> None:
|
||||
"""Test download returns metadata on successful enrichment."""
|
||||
basic_metadata = Metadata().set_url("https://example.com")
|
||||
mocker.patch.object(wacz_enricher, "enrich", return_value=True)
|
||||
result = wacz_enricher.download(basic_metadata)
|
||||
assert result is not None
|
||||
assert isinstance(result, Metadata)
|
||||
assert result.status == "wacz: success"
|
||||
|
||||
|
||||
def test_enrich_already_executed(wacz_enricher, mocker) -> None:
|
||||
"""Test enrich if already executed."""
|
||||
mock_log = mocker.patch("loguru.logger.info")
|
||||
metadata = Metadata().set_url("https://example.com")
|
||||
media = Media(filename="some_file.wacz")
|
||||
metadata.add_media(media, id="browsertrix")
|
||||
result = wacz_enricher.enrich(metadata)
|
||||
assert result is True
|
||||
assert "WACZ enricher had already been executed:" in mock_log.call_args[0][0]
|
||||
|
||||
|
||||
def test_enrich_subprocess_exception(wacz_enricher, mocker, tmp_path) -> None:
|
||||
"""Test enrich returns False when subprocess fails."""
|
||||
wacz_enricher.tmp_dir = str(tmp_path)
|
||||
wacz_enricher.extract_media = False
|
||||
wacz_enricher.extract_screenshot = True
|
||||
mocker.patch("auto_archiver.utils.misc.random_str", return_value="TESTCOL")
|
||||
mocker.patch("subprocess.run", side_effect=Exception("fail"))
|
||||
basic_metadata = Metadata().set_url("https://example.com")
|
||||
result = wacz_enricher.enrich(basic_metadata)
|
||||
assert result is False
|
||||
|
||||
|
||||
def test_extract_media(wacz_enricher, metadata, tmp_path, mocker) -> None:
|
||||
"""Test extract_media_from_wacz extracts screenshot media."""
|
||||
wacz_enricher.tmp_dir = str(tmp_path)
|
||||
|
||||
# Create a *real* zip file so ZipFile won't fail.
|
||||
wacz_file = tmp_path / "dummy.wacz"
|
||||
with ZipFile(wacz_file, "w") as zf:
|
||||
zf.writestr("dummy.txt", "test content")
|
||||
|
||||
mocker.patch("os.listdir", return_value=[])
|
||||
warc_data = (
|
||||
b"WARC/1.0\r\n"
|
||||
b"WARC-Type: resource\r\n"
|
||||
b"Content-Type: image/png\r\n"
|
||||
b"WARC-Target-URI: http://example.com/image.png\r\n"
|
||||
b"Content-Length: 12\r\n"
|
||||
b"\r\n"
|
||||
b"image-bytes"
|
||||
b"\r\n\r\nWARC/1.0\r\n\r\n"
|
||||
)
|
||||
mock_file = mocker.mock_open(read_data=warc_data)
|
||||
mocker.patch("builtins.open", mock_file)
|
||||
metadata.add_media(Media("something.wacz"), "browsertrix")
|
||||
wacz_enricher.extract_media_from_wacz(metadata, str(wacz_file))
|
||||
assert len(metadata.media) == 2
|
||||
assert metadata.media[1].properties.get("id") == "browsertrix-screenshot"
|
||||
168
tests/enrichers/test_wayback_enricher.py
Normal file
168
tests/enrichers/test_wayback_enricher.py
Normal file
@@ -0,0 +1,168 @@
|
||||
import json
|
||||
import requests
|
||||
import pytest
|
||||
from auto_archiver.modules.wayback_extractor_enricher import WaybackExtractorEnricher
|
||||
from auto_archiver.core import Metadata
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_is_auth_wall(mocker):
|
||||
"""Fixture to mock is_auth_wall behavior."""
|
||||
def _mock_is_auth_wall(return_value: bool):
|
||||
return mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=return_value)
|
||||
return _mock_is_auth_wall
|
||||
|
||||
@pytest.fixture
|
||||
def mock_post_success(mocker):
|
||||
"""Fixture to mock POST requests with a successful response."""
|
||||
def _mock_post(json_data: dict = None, status_code: int = 200):
|
||||
json_data = json_data or {"job_id": "job123"}
|
||||
resp = mocker.Mock(status_code=status_code)
|
||||
resp.json.return_value = json_data
|
||||
return mocker.patch("requests.post", return_value=resp)
|
||||
return _mock_post
|
||||
|
||||
@pytest.fixture
|
||||
def mock_get_success(mocker):
|
||||
"""Fixture to mock GET requests returning a completed archive status."""
|
||||
def _mock_get(json_data: dict = None, status_code: int = 200):
|
||||
json_data = json_data or {
|
||||
"status": "success",
|
||||
"timestamp": "20250101010101",
|
||||
"original_url": "https://example.com"
|
||||
}
|
||||
resp = mocker.Mock(status_code=status_code)
|
||||
resp.json.return_value = json_data
|
||||
return mocker.patch("requests.get", return_value=resp)
|
||||
return _mock_get
|
||||
|
||||
@pytest.fixture
|
||||
def wayback_extractor_enricher(setup_module) -> WaybackExtractorEnricher:
|
||||
configs: dict = {
|
||||
"timeout": 5,
|
||||
"if_not_archived_within": None,
|
||||
"key": "somekey",
|
||||
"secret": "secret",
|
||||
"proxy_http": None,
|
||||
"proxy_https": None,
|
||||
}
|
||||
return setup_module("wayback_extractor_enricher", configs)
|
||||
|
||||
|
||||
def test_download_success(
|
||||
wayback_extractor_enricher,
|
||||
mock_is_auth_wall,
|
||||
mock_post_success,
|
||||
mock_get_success
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
mock_post_success()
|
||||
mock_get_success()
|
||||
# Basic metadata to allow merge
|
||||
metadata = Metadata().set_url("https://example.com")
|
||||
result = wayback_extractor_enricher.download(metadata)
|
||||
assert result.get("wayback") == "https://web.archive.org/web/20250101010101/https://example.com"
|
||||
|
||||
def test_enrich_auth_wall(wayback_extractor_enricher, metadata, mock_is_auth_wall):
|
||||
mock_is_auth_wall(True)
|
||||
result = wayback_extractor_enricher.enrich(metadata)
|
||||
assert result is None
|
||||
|
||||
def test_enrich_already_enriched(wayback_extractor_enricher, metadata):
|
||||
metadata.set("wayback", "existing")
|
||||
result = wayback_extractor_enricher.enrich(metadata)
|
||||
assert result is True
|
||||
|
||||
def test_enrich_post_failure(
|
||||
wayback_extractor_enricher,
|
||||
metadata,
|
||||
mock_is_auth_wall,
|
||||
mock_post_success
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
mock_post_success(json_data={"error": "server error"}, status_code=500)
|
||||
result = wayback_extractor_enricher.enrich(metadata)
|
||||
assert result is False
|
||||
assert "Internet archive failed with status of 500" in metadata.get("wayback")
|
||||
|
||||
def test_enrich_post_json_decode_error(
|
||||
wayback_extractor_enricher,
|
||||
metadata,
|
||||
mock_is_auth_wall,
|
||||
mocker
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
resp = mocker.Mock(status_code=200)
|
||||
resp.json.side_effect = json.decoder.JSONDecodeError("msg", "doc", 0)
|
||||
resp.text = "invalid json"
|
||||
mocker.patch("requests.post", return_value=resp)
|
||||
assert wayback_extractor_enricher.enrich(metadata) is False
|
||||
|
||||
def test_enrich_no_job_id(
|
||||
wayback_extractor_enricher,
|
||||
metadata,
|
||||
mock_is_auth_wall,
|
||||
mock_post_success
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
mock_post_success(json_data={})
|
||||
assert wayback_extractor_enricher.enrich(metadata) is False
|
||||
|
||||
def test_enrich_get_success(
|
||||
wayback_extractor_enricher,
|
||||
metadata,
|
||||
mock_is_auth_wall,
|
||||
mock_post_success,
|
||||
mock_get_success
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
mock_post_success()
|
||||
mock_get_success()
|
||||
assert wayback_extractor_enricher.enrich(metadata) is True
|
||||
assert metadata.get("wayback") == "https://web.archive.org/web/20250101010101/https://example.com"
|
||||
assert metadata.get("check wayback") == "https://web.archive.org/web/*/https://example.com"
|
||||
|
||||
def test_enrich_get_failure(
|
||||
wayback_extractor_enricher,
|
||||
metadata,
|
||||
mock_is_auth_wall,
|
||||
mock_post_success,
|
||||
mock_get_success
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
mock_post_success()
|
||||
mock_get_success(json_data={"status": "failed"}, status_code=400)
|
||||
assert wayback_extractor_enricher.enrich(metadata) is False
|
||||
|
||||
def test_enrich_get_request_exception(
|
||||
wayback_extractor_enricher,
|
||||
metadata,
|
||||
mock_is_auth_wall,
|
||||
mock_post_success,
|
||||
mocker
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
mock_post_success()
|
||||
mocker.patch("requests.get", side_effect=requests.exceptions.RequestException("error"))
|
||||
mocker.patch("time.sleep", return_value=None)
|
||||
# check it still enriches the job_id information
|
||||
assert wayback_extractor_enricher.enrich(metadata) is True
|
||||
assert metadata.get("wayback").get("job_id") == "job123"
|
||||
|
||||
def test_enrich_get_json_decode_error(
|
||||
wayback_extractor_enricher,
|
||||
metadata,
|
||||
mock_is_auth_wall,
|
||||
mock_post_success,
|
||||
mocker
|
||||
):
|
||||
mock_is_auth_wall(False)
|
||||
mock_post_success()
|
||||
resp = mocker.Mock()
|
||||
resp.json.side_effect = json.decoder.JSONDecodeError("msg", "doc", 0)
|
||||
resp.text = "invalid json"
|
||||
mocker.patch("requests.get", return_value=resp)
|
||||
mocker.patch("time.sleep", return_value=None)
|
||||
# check it still enriches the job_id information
|
||||
assert wayback_extractor_enricher.enrich(metadata) is True
|
||||
assert metadata.get("wayback").get("job_id") == "job123"
|
||||
133
tests/enrichers/test_whisper_enricher.py
Normal file
133
tests/enrichers/test_whisper_enricher.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.modules.s3_storage import S3Storage
|
||||
from auto_archiver.modules.whisper_enricher import WhisperEnricher
|
||||
|
||||
TEST_S3_URL = "http://cdn.example.com/test.mp4"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def enricher(mocker):
|
||||
"""Fixture with mocked S3 and API dependencies"""
|
||||
config = {
|
||||
"api_endpoint": "http://testapi",
|
||||
"api_key": "whisper-key",
|
||||
"include_srt": False,
|
||||
"timeout": 5,
|
||||
"action": "translate",
|
||||
"steps": {"storages": ["s3_storage"]}
|
||||
}
|
||||
mock_s3 = mocker.MagicMock(spec=S3Storage)
|
||||
mock_s3.get_cdn_url.return_value = TEST_S3_URL
|
||||
instance = WhisperEnricher()
|
||||
instance.name = "whisper_enricher"
|
||||
instance.display_name = "Whisper Enricher"
|
||||
instance.config_setup({instance.name: config})
|
||||
# bypassing the setup method and mocking S3 setup
|
||||
instance.stores = config['steps']['storages']
|
||||
instance.s3 = mock_s3
|
||||
yield instance, mock_s3
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def metadata():
|
||||
metadata = Metadata()
|
||||
metadata.set_url("http://test.url")
|
||||
metadata.set_title("test title")
|
||||
return metadata
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_requests(mocker):
|
||||
mock_requests = mocker.patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests")
|
||||
mock_response = mocker.MagicMock()
|
||||
mock_response.status_code = 201
|
||||
mock_response.json.return_value = {"id": "job123"}
|
||||
mock_requests.post.return_value = mock_response
|
||||
yield mock_requests
|
||||
|
||||
|
||||
def test_successful_job_submission(enricher, metadata, mock_requests, mocker):
|
||||
"""Test successful media processing with S3 configured"""
|
||||
whisper, mock_s3 = enricher
|
||||
# Configure mock S3 URL to match test expectation
|
||||
mock_s3.get_cdn_url.return_value = TEST_S3_URL
|
||||
|
||||
# Create test media with matching CDN URL
|
||||
m = Media("test.mp4")
|
||||
m.mimetype = "video/mp4"
|
||||
m.add_url(mock_s3.get_cdn_url.return_value)
|
||||
metadata.media = [m]
|
||||
|
||||
# Mock the complete API interaction chain
|
||||
mock_status_response = mocker.MagicMock()
|
||||
mock_status_response.status_code = 200
|
||||
mock_status_response.json.return_value = {
|
||||
"status": "success",
|
||||
"meta": {}
|
||||
}
|
||||
mock_artifacts_response = mocker.MagicMock()
|
||||
mock_artifacts_response.status_code = 200
|
||||
mock_artifacts_response.json.return_value = [{
|
||||
"data": [{"start": 0, "end": 5, "text": "test transcript"}]
|
||||
}]
|
||||
# Set up mock response sequence
|
||||
mock_requests.get.side_effect = [
|
||||
mock_status_response, # First call: status check
|
||||
mock_artifacts_response # Second call: artifacts check
|
||||
]
|
||||
|
||||
# Run enrichment (without opening file)
|
||||
whisper.enrich(metadata)
|
||||
# Check API interactions
|
||||
mock_requests.post.assert_called_once_with(
|
||||
"http://testapi/jobs",
|
||||
json={"url": "http://cdn.example.com/test.mp4", "type": "translate"},
|
||||
headers={"Authorization": "Bearer whisper-key"}
|
||||
)
|
||||
# Verify job status checks
|
||||
assert mock_requests.get.call_count == 2
|
||||
assert "artifact_0_text" in metadata.media[0].get("whisper_model")
|
||||
assert metadata.media[0].get("whisper_model") == {'artifact_0_text': 'test transcript',
|
||||
'job_artifacts_check': 'http://testapi/jobs/job123/artifacts',
|
||||
'job_id': 'job123',
|
||||
'job_status_check': 'http://testapi/jobs/job123'}
|
||||
|
||||
|
||||
def test_submit_job(enricher, mocker):
|
||||
"""Test job submission method"""
|
||||
whisper, _ = enricher
|
||||
m = Media("test.mp4")
|
||||
m.add_url(TEST_S3_URL)
|
||||
mock_requests = mocker.patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests")
|
||||
mock_response = mocker.MagicMock()
|
||||
mock_response.status_code = 201
|
||||
mock_response.json.return_value = {"id": "job123"}
|
||||
mock_requests.post.return_value = mock_response
|
||||
job_id = whisper.submit_job(m)
|
||||
assert job_id == "job123"
|
||||
|
||||
|
||||
def test_submit_raises_status(enricher, mocker):
|
||||
whisper, _ = enricher
|
||||
m = Media("test.mp4")
|
||||
m.add_url(TEST_S3_URL)
|
||||
mock_requests = mocker.patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests")
|
||||
mock_response = mocker.MagicMock()
|
||||
mock_response.status_code = 400
|
||||
mock_response.json.return_value = {"id": "job123"}
|
||||
mock_requests.post.return_value = mock_response
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
whisper.submit_job(m)
|
||||
assert str(exc_info.value) == "calling the whisper api http://testapi returned a non-success code: 400"
|
||||
|
||||
|
||||
# @pytest.mark.parametrize("test_url, status", ["http://cdn.example.com/test.mp4",])
|
||||
def test_submit_job_fails(enricher):
|
||||
"""Test assertion fails with non-S3 URL"""
|
||||
whisper, mock_s3 = enricher
|
||||
m = Media("test.mp4")
|
||||
m.add_url("http://cdn.wrongurl.com/test.mp4")
|
||||
with pytest.raises(AssertionError):
|
||||
whisper.submit_job(m)
|
||||
@@ -1,15 +1,12 @@
|
||||
from datetime import datetime
|
||||
from typing import Type
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from auto_archiver.core import Metadata
|
||||
from auto_archiver.modules.instagram_api_extractor.instagram_api_extractor import InstagramAPIExtractor
|
||||
from .test_extractor_base import TestExtractorBase
|
||||
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_user_response():
|
||||
return {
|
||||
@@ -115,74 +112,74 @@ class TestInstagramAPIExtractor(TestExtractorBase):
|
||||
# test gets text (metadata title)
|
||||
pass
|
||||
|
||||
def test_download_profile_basic(self, metadata, mock_user_response):
|
||||
def test_download_profile_basic(self, metadata, mock_user_response, mocker):
|
||||
"""Test basic profile download without full_profile"""
|
||||
with patch.object(self.extractor, 'call_api') as mock_call, \
|
||||
patch.object(self.extractor, 'download_from_url') as mock_download:
|
||||
# Mock API responses
|
||||
mock_call.return_value = mock_user_response
|
||||
mock_download.return_value = "profile.jpg"
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_download = mocker.patch.object(self.extractor, 'download_from_url')
|
||||
# Mock API responses
|
||||
mock_call.return_value = mock_user_response
|
||||
mock_download.return_value = "profile.jpg"
|
||||
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.status == "insta profile: success"
|
||||
assert result.get_title() == "Test User"
|
||||
assert result.get("data") == self.extractor.cleanup_dict(mock_user_response["user"])
|
||||
# Verify profile picture download
|
||||
mock_call.assert_called_once_with("v2/user/by/username", {"username": "test_user"})
|
||||
mock_download.assert_called_once_with("http://example.com/profile.jpg")
|
||||
assert len(result.media) == 1
|
||||
assert result.media[0].filename == "profile.jpg"
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.status == "insta profile: success"
|
||||
assert result.get_title() == "Test User"
|
||||
assert result.get("data") == self.extractor.cleanup_dict(mock_user_response["user"])
|
||||
# Verify profile picture download
|
||||
mock_call.assert_called_once_with("v2/user/by/username", {"username": "test_user"})
|
||||
mock_download.assert_called_once_with("http://example.com/profile.jpg")
|
||||
assert len(result.media) == 1
|
||||
assert result.media[0].filename == "profile.jpg"
|
||||
|
||||
def test_download_profile_full(self, metadata, mock_user_response, mock_story_response):
|
||||
def test_download_profile_full(self, metadata, mock_user_response, mock_story_response, mocker):
|
||||
"""Test full profile download with stories/posts"""
|
||||
with patch.object(self.extractor, 'call_api') as mock_call, \
|
||||
patch.object(self.extractor, 'download_all_posts') as mock_posts, \
|
||||
patch.object(self.extractor, 'download_all_highlights') as mock_highlights, \
|
||||
patch.object(self.extractor, 'download_all_tagged') as mock_tagged, \
|
||||
patch.object(self.extractor, '_download_stories_reusable') as mock_stories:
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')
|
||||
mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
|
||||
mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
|
||||
mock_stories = mocker.patch.object(self.extractor, '_download_stories_reusable')
|
||||
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
mock_story_response
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_stories.return_value = mock_story_response
|
||||
mock_posts.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
mock_story_response
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_stories.return_value = mock_story_response
|
||||
mock_posts.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.get("#stories") == len(mock_story_response)
|
||||
mock_posts.assert_called_once_with(result, "123")
|
||||
assert "errors" not in result.metadata
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
assert result.get("#stories") == len(mock_story_response)
|
||||
mock_posts.assert_called_once_with(result, "123")
|
||||
assert "errors" not in result.metadata
|
||||
|
||||
def test_download_profile_not_found(self, metadata):
|
||||
def test_download_profile_not_found(self, metadata, mocker):
|
||||
"""Test profile not found error"""
|
||||
with patch.object(self.extractor, 'call_api') as mock_call:
|
||||
mock_call.return_value = {"user": None}
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
self.extractor.download_profile(metadata, "invalid_user")
|
||||
assert "User invalid_user not found" in str(exc_info.value)
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_call.return_value = {"user": None}
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
self.extractor.download_profile(metadata, "invalid_user")
|
||||
assert "User invalid_user not found" in str(exc_info.value)
|
||||
|
||||
def test_download_profile_error_handling(self, metadata, mock_user_response):
|
||||
def test_download_profile_error_handling(self, metadata, mock_user_response, mocker):
|
||||
"""Test error handling in full profile mode"""
|
||||
with (patch.object(self.extractor, 'call_api') as mock_call, \
|
||||
patch.object(self.extractor, 'download_all_highlights') as mock_highlights, \
|
||||
patch.object(self.extractor, 'download_all_tagged') as mock_tagged, \
|
||||
patch.object(self.extractor, '_download_stories_reusable') as stories_tagged, \
|
||||
patch.object(self.extractor, 'download_all_posts') as mock_posts
|
||||
):
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
Exception("Stories API failed"),
|
||||
Exception("Posts API failed")
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
stories_tagged.return_value = None
|
||||
mock_posts.return_value = None
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
mock_call = mocker.patch.object(self.extractor, 'call_api')
|
||||
mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
|
||||
mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
|
||||
stories_tagged = mocker.patch.object(self.extractor, '_download_stories_reusable')
|
||||
mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')
|
||||
|
||||
assert result.is_success()
|
||||
assert "Error downloading stories for test_user" in result.metadata["errors"]
|
||||
self.extractor.full_profile = True
|
||||
mock_call.side_effect = [
|
||||
mock_user_response,
|
||||
Exception("Stories API failed"),
|
||||
Exception("Posts API failed")
|
||||
]
|
||||
mock_highlights.return_value = None
|
||||
mock_tagged.return_value = None
|
||||
stories_tagged.return_value = None
|
||||
mock_posts.return_value = None
|
||||
result = self.extractor.download_profile(metadata, "test_user")
|
||||
|
||||
assert result.is_success()
|
||||
assert "Error downloading stories for test_user" in result.metadata["errors"]
|
||||
@@ -1,94 +1,108 @@
|
||||
import os
|
||||
from typing import Type
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata
|
||||
from auto_archiver.core.extractor import Extractor
|
||||
from auto_archiver.modules.instagram_tbot_extractor import InstagramTbotExtractor
|
||||
from tests.extractors.test_extractor_base import TestExtractorBase
|
||||
|
||||
TESTFILES = os.path.join(os.path.dirname(__file__), "testfiles")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session_file(tmpdir):
|
||||
"""Fixture to create a test session file."""
|
||||
session_file = os.path.join(tmpdir, "test_session.session")
|
||||
with open(session_file, "w") as f:
|
||||
f.write("mock_session_data")
|
||||
return session_file.replace(".session", "")
|
||||
def patch_extractor_methods(request, setup_module, mocker):
|
||||
mocker.patch.object(InstagramTbotExtractor, '_prepare_session_file', return_value=None)
|
||||
mocker.patch.object(InstagramTbotExtractor, '_initialize_telegram_client', return_value=None)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def patch_extractor_methods(request, setup_module):
|
||||
with patch.object(InstagramTbotExtractor, '_prepare_session_file', return_value=None), \
|
||||
patch.object(InstagramTbotExtractor, '_initialize_telegram_client', return_value=None):
|
||||
if hasattr(request, 'cls') and hasattr(request.cls, 'config'):
|
||||
request.cls.extractor = setup_module("instagram_tbot_extractor", request.cls.config)
|
||||
|
||||
yield
|
||||
|
||||
@pytest.fixture
|
||||
def metadata_sample():
|
||||
m = Metadata()
|
||||
m.set_title("Test Title")
|
||||
m.set_timestamp("2021-01-01T00:00:00Z")
|
||||
m.set_timestamp("2021-01-01T00:00:00")
|
||||
m.set_url("https://www.instagram.com/p/1234567890")
|
||||
return m
|
||||
|
||||
|
||||
class TestInstagramTbotExtractor:
|
||||
@pytest.fixture
|
||||
def mock_telegram_client(mocker):
|
||||
"""Fixture to mock TelegramClient interactions."""
|
||||
mock_client = mocker.patch("auto_archiver.modules.instagram_tbot_extractor.client")
|
||||
instance = mocker.MagicMock()
|
||||
mock_client.return_value = instance
|
||||
return instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def extractor(setup_module, patch_extractor_methods, mocker):
|
||||
extractor_module = "instagram_tbot_extractor"
|
||||
extractor: InstagramTbotExtractor
|
||||
config = {
|
||||
"api_id": 12345,
|
||||
"api_hash": "test_api_hash",
|
||||
"session_file": "test_session",
|
||||
"timeout": 4
|
||||
}
|
||||
extractor = setup_module(extractor_module, config)
|
||||
extractor.client = mocker.MagicMock()
|
||||
extractor.session_file = "test_session"
|
||||
return extractor
|
||||
|
||||
|
||||
def test_non_instagram_url(extractor, metadata_sample):
|
||||
metadata_sample.set_url("https://www.youtube.com")
|
||||
assert extractor.download(metadata_sample) is False
|
||||
|
||||
|
||||
def test_download_success(extractor, metadata_sample, mocker):
|
||||
mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101))
|
||||
mocker.patch.object(extractor, "_process_messages", return_value="Sample Instagram post caption")
|
||||
result = extractor.download(metadata_sample)
|
||||
assert result.is_success()
|
||||
assert result.status == "insta-via-bot: success"
|
||||
assert result.metadata.get("title") == "Sample Instagram post caption"
|
||||
|
||||
|
||||
def test_download_invalid(extractor, metadata_sample, mocker):
|
||||
mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101))
|
||||
mocker.patch.object(extractor, "_process_messages", return_value="You must enter a URL to a post")
|
||||
assert extractor.download(metadata_sample) is False
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Requires authentication.")
|
||||
class TestInstagramTbotExtractorReal(TestExtractorBase):
|
||||
# To run these tests set the TELEGRAM_API_ID and TELEGRAM_API_HASH environment variables, and ensure the session file exists.
|
||||
# Note these are true at this point in time, but changes to source media could be reason for failure.
|
||||
extractor_module = "instagram_tbot_extractor"
|
||||
extractor: InstagramTbotExtractor
|
||||
config = {
|
||||
"api_id": os.environ.get("TELEGRAM_API_ID"),
|
||||
"api_hash": os.environ.get("TELEGRAM_API_HASH"),
|
||||
"session_file": "secrets/anon-insta",
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def mock_telegram_client(self):
|
||||
"""Fixture to mock TelegramClient interactions."""
|
||||
with patch("auto_archiver.modules.instagram_tbot_extractor._initialize_telegram_client") as mock_client:
|
||||
instance = MagicMock()
|
||||
mock_client.return_value = instance
|
||||
yield instance
|
||||
|
||||
def test_extractor_is_initialized(self):
|
||||
assert self.extractor is not None
|
||||
|
||||
|
||||
@patch("time.sleep")
|
||||
@pytest.mark.parametrize("url, expected_status, bot_responses", [
|
||||
("https://www.instagram.com/p/C4QgLbrIKXG", "insta-via-bot: success", [MagicMock(id=101, media=None, message="Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou")]),
|
||||
("https://www.instagram.com/reel/DEVLK8qoIbg/", "insta-via-bot: success", [MagicMock(id=101, media=None, message="Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol")]),
|
||||
# todo tbot not working for stories :(
|
||||
("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, [MagicMock(id=101, media=None, message="Media not found or unavailable")]),
|
||||
("https://www.youtube.com/watch?v=ymCMy8OffHM", False, []),
|
||||
("https://www.instagram.com/p/INVALID", False, [MagicMock(id=101, media=None, message="You must enter a URL to a post")]),
|
||||
@pytest.mark.parametrize("url, expected_status, message, len_media", [
|
||||
("https://www.instagram.com/p/C4QgLbrIKXG", "insta-via-bot: success",
|
||||
"Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou",
|
||||
6),
|
||||
("https://www.instagram.com/reel/DEVLK8qoIbg/", "insta-via-bot: success",
|
||||
"Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol",
|
||||
3),
|
||||
# instagram tbot not working (potentially intermittently?) for stories - replace with a live story to retest
|
||||
# ("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, "Media not found or unavailable"),
|
||||
# Seems to be working intermittently for highlights
|
||||
# ("https://www.instagram.com/stories/highlights/17868810693068139/", "insta-via-bot: success", None, 50),
|
||||
# Marking invalid url as success
|
||||
("https://www.instagram.com/p/INVALID", "insta-via-bot: success", "Media not found or unavailable", 0),
|
||||
("https://www.youtube.com/watch?v=ymCMy8OffHM", False, None, 0),
|
||||
])
|
||||
def test_download(self, mock_sleep, url, expected_status, bot_responses, metadata_sample):
|
||||
def test_download(self, url, expected_status, message, len_media, metadata_sample):
|
||||
"""Test the `download()` method with various Instagram URLs."""
|
||||
metadata_sample.set_url(url)
|
||||
self.extractor.client = MagicMock()
|
||||
|
||||
result = self.extractor.download(metadata_sample)
|
||||
pass
|
||||
# TODO fully mock or use as authenticated test
|
||||
# if expected_status:
|
||||
# assert result.is_success()
|
||||
# assert result.status == expected_status
|
||||
# assert result.metadata.get("title") in [msg.message[:128] for msg in bot_responses if msg.message]
|
||||
# else:
|
||||
# assert result is False
|
||||
|
||||
|
||||
|
||||
|
||||
# Test story
|
||||
# Test expired story
|
||||
# Test requires login/ access (?)
|
||||
# Test post
|
||||
# Test multiple images?
|
||||
if expected_status:
|
||||
assert result.is_success()
|
||||
assert result.status == expected_status
|
||||
assert result.metadata.get("title") == message
|
||||
assert len(result.media) == len_media
|
||||
else:
|
||||
assert result is False
|
||||
|
||||
108
tests/feeders/test_atlos_feeder.py
Normal file
108
tests/feeders/test_atlos_feeder.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import pytest
|
||||
from auto_archiver.modules.atlos_feeder import AtlosFeeder
|
||||
|
||||
|
||||
class FakeAPIResponse:
|
||||
"""Simulate a response object."""
|
||||
|
||||
def __init__(self, data: dict, raise_error: bool = False) -> None:
|
||||
self._data = data
|
||||
self.raise_error = raise_error
|
||||
|
||||
def json(self) -> dict:
|
||||
return self._data
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
if self.raise_error:
|
||||
raise Exception("HTTP error")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def atlos_feeder(setup_module) -> AtlosFeeder:
|
||||
"""Fixture for AtlosFeeder."""
|
||||
configs: dict = {
|
||||
"api_token": "abc123",
|
||||
"atlos_url": "https://platform.atlos.org",
|
||||
}
|
||||
return setup_module("atlos_feeder", configs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_atlos_api(mocker):
|
||||
"""Fixture to mock requests to Atlos API."""
|
||||
def _mock_responses(responses):
|
||||
mocker.patch(
|
||||
"requests.get",
|
||||
side_effect=[FakeAPIResponse(data) for data in responses],
|
||||
)
|
||||
return _mock_responses
|
||||
|
||||
|
||||
def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):
|
||||
"""Test valid items are yielded and invalid ones ignored."""
|
||||
mock_atlos_api([
|
||||
{
|
||||
"next": None,
|
||||
"results": [
|
||||
{"source_url": "http://example.com", "id": 1,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
{"source_url": "", "id": 2,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
{"source_url": "http://example.org", "id": 3,
|
||||
"metadata": {"auto_archiver": {"processed": True}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
],
|
||||
}
|
||||
])
|
||||
|
||||
items = list(atlos_feeder)
|
||||
assert len(items) == 1
|
||||
assert items[0].get_url() == "http://example.com"
|
||||
assert items[0].get("atlos_id") == 1
|
||||
|
||||
|
||||
def test_atlos_feeder_multiple_pages(atlos_feeder, mock_atlos_api):
|
||||
"""Test iteration over multiple pages with valid items."""
|
||||
mock_atlos_api([
|
||||
{
|
||||
"next": "cursor2",
|
||||
"results": [
|
||||
{"source_url": "http://example1.com", "id": 10,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"next": None,
|
||||
"results": [
|
||||
{"source_url": "http://example2.com", "id": 20,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
],
|
||||
},
|
||||
])
|
||||
|
||||
items = list(atlos_feeder)
|
||||
assert len(items) == 2
|
||||
assert items[0].get_url() == "http://example1.com"
|
||||
assert items[0].get("atlos_id") == 10
|
||||
assert items[1].get_url() == "http://example2.com"
|
||||
assert items[1].get("atlos_id") == 20
|
||||
|
||||
|
||||
def test_atlos_feeder_no_results(atlos_feeder, mock_atlos_api):
|
||||
"""Test iteration stops when no results are returned."""
|
||||
mock_atlos_api([{"next": None, "results": []}])
|
||||
assert list(atlos_feeder) == []
|
||||
|
||||
|
||||
def test_atlos_feeder_http_error(atlos_feeder, mocker):
|
||||
"""Test raises an exception on HTTP error."""
|
||||
mocker.patch(
|
||||
"requests.get",
|
||||
return_value=FakeAPIResponse({"next": None, "results": []}, raise_error=True),
|
||||
)
|
||||
with pytest.raises(Exception, match="HTTP error"):
|
||||
list(atlos_feeder)
|
||||
@@ -2,27 +2,23 @@ from typing import Type
|
||||
|
||||
import gspread
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from auto_archiver.modules.gsheet_feeder import GsheetsFeeder
|
||||
from auto_archiver.core import Metadata, Feeder
|
||||
|
||||
|
||||
def test_setup_without_sheet_and_sheet_id(setup_module):
|
||||
def test_setup_without_sheet_and_sheet_id(setup_module, mocker):
|
||||
# Ensure setup() raises AssertionError if neither sheet nor sheet_id is set.
|
||||
with patch("gspread.service_account"):
|
||||
with pytest.raises(AssertionError):
|
||||
setup_module(
|
||||
"gsheet_feeder",
|
||||
{"service_account": "dummy.json", "sheet": None, "sheet_id": None},
|
||||
)
|
||||
mocker.patch("gspread.service_account")
|
||||
with pytest.raises(AssertionError):
|
||||
setup_module(
|
||||
"gsheet_feeder",
|
||||
{"service_account": "dummy.json", "sheet": None, "sheet_id": None},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def gsheet_feeder(setup_module) -> GsheetsFeeder:
|
||||
with patch("gspread.service_account"):
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
{
|
||||
def gsheet_feeder(setup_module, mocker) -> GsheetsFeeder:
|
||||
config: dict = {
|
||||
"service_account": "dummy.json",
|
||||
"sheet": "test-auto-archiver",
|
||||
"sheet_id": None,
|
||||
@@ -46,9 +42,13 @@ def gsheet_feeder(setup_module) -> GsheetsFeeder:
|
||||
"allow_worksheets": set(),
|
||||
"block_worksheets": set(),
|
||||
"use_sheet_names_in_stored_paths": True,
|
||||
},
|
||||
)
|
||||
feeder.gsheets_client = MagicMock()
|
||||
}
|
||||
mocker.patch("gspread.service_account")
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
config
|
||||
)
|
||||
feeder.gsheets_client = mocker.MagicMock()
|
||||
return feeder
|
||||
|
||||
|
||||
@@ -129,56 +129,56 @@ def test__set_metadata_with_folder(gsheet_feeder: GsheetsFeeder):
|
||||
],
|
||||
)
|
||||
def test_open_sheet_with_name_or_id(
|
||||
setup_module, sheet, sheet_id, expected_method, expected_arg, description
|
||||
setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker
|
||||
):
|
||||
"""Ensure open_sheet() correctly opens by name or ID based on configuration."""
|
||||
with patch("gspread.service_account") as mock_service_account:
|
||||
mock_client = MagicMock()
|
||||
mock_service_account.return_value = mock_client
|
||||
mock_client.open.return_value = "MockSheet"
|
||||
mock_client.open_by_key.return_value = "MockSheet"
|
||||
mock_service_account = mocker.patch("gspread.service_account")
|
||||
mock_client = mocker.MagicMock()
|
||||
mock_service_account.return_value = mock_client
|
||||
mock_client.open.return_value = "MockSheet"
|
||||
mock_client.open_by_key.return_value = "MockSheet"
|
||||
|
||||
# Setup module with parameterized values
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
{"service_account": "dummy.json", "sheet": sheet, "sheet_id": sheet_id},
|
||||
)
|
||||
sheet_result = feeder.open_sheet()
|
||||
# Validate the correct method was called
|
||||
getattr(mock_client, expected_method).assert_called_once_with(
|
||||
expected_arg
|
||||
), f"Failed: {description}"
|
||||
assert sheet_result == "MockSheet", f"Failed: {description}"
|
||||
# Setup module with parameterized values
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
{"service_account": "dummy.json", "sheet": sheet, "sheet_id": sheet_id},
|
||||
)
|
||||
sheet_result = feeder.open_sheet()
|
||||
# Validate the correct method was called
|
||||
getattr(mock_client, expected_method).assert_called_once_with(
|
||||
expected_arg
|
||||
), f"Failed: {description}"
|
||||
assert sheet_result == "MockSheet", f"Failed: {description}"
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_module")
|
||||
def test_open_sheet_with_sheet_id(setup_module):
|
||||
def test_open_sheet_with_sheet_id(setup_module, mocker):
|
||||
"""Ensure open_sheet() correctly opens a sheet by ID."""
|
||||
with patch("gspread.service_account") as mock_service_account:
|
||||
mock_client = MagicMock()
|
||||
mock_service_account.return_value = mock_client
|
||||
mock_client.open_by_key.return_value = "MockSheet"
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
{"service_account": "dummy.json", "sheet": None, "sheet_id": "ABC123"},
|
||||
)
|
||||
sheet = feeder.open_sheet()
|
||||
mock_client.open_by_key.assert_called_once_with("ABC123")
|
||||
assert sheet == "MockSheet"
|
||||
mock_service_account = mocker.patch("gspread.service_account")
|
||||
mock_client = mocker.MagicMock()
|
||||
mock_service_account.return_value = mock_client
|
||||
mock_client.open_by_key.return_value = "MockSheet"
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
{"service_account": "dummy.json", "sheet": None, "sheet_id": "ABC123"},
|
||||
)
|
||||
sheet = feeder.open_sheet()
|
||||
mock_client.open_by_key.assert_called_once_with("ABC123")
|
||||
assert sheet == "MockSheet"
|
||||
|
||||
|
||||
def test_should_process_sheet(setup_module):
|
||||
with patch("gspread.service_account"):
|
||||
gdb = setup_module(
|
||||
"gsheet_feeder",
|
||||
{
|
||||
"service_account": "dummy.json",
|
||||
"sheet": "TestSheet",
|
||||
"sheet_id": None,
|
||||
"allow_worksheets": {"TestSheet", "Sheet2"},
|
||||
"block_worksheets": {"Sheet3"},
|
||||
},
|
||||
)
|
||||
def test_should_process_sheet(setup_module, mocker):
|
||||
mocker.patch("gspread.service_account")
|
||||
gdb = setup_module(
|
||||
"gsheet_feeder",
|
||||
{
|
||||
"service_account": "dummy.json",
|
||||
"sheet": "TestSheet",
|
||||
"sheet_id": None,
|
||||
"allow_worksheets": {"TestSheet", "Sheet2"},
|
||||
"block_worksheets": {"Sheet3"},
|
||||
},
|
||||
)
|
||||
assert gdb.should_process_sheet("TestSheet") == True
|
||||
assert gdb.should_process_sheet("Sheet3") == False
|
||||
# False if allow_worksheets is set
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
# Note this isn't a feeder, but contained as utility of the gsheet feeder module
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from auto_archiver.modules.gsheet_feeder import GWorksheet
|
||||
|
||||
|
||||
class TestGWorksheet:
|
||||
@pytest.fixture
|
||||
def mock_worksheet(self):
|
||||
mock_ws = MagicMock()
|
||||
def mock_worksheet(self, mocker):
|
||||
mock_ws = mocker.MagicMock()
|
||||
mock_ws.get_values.return_value = [
|
||||
["Link", "Archive Status", "Archive Location", "Archive Date"],
|
||||
["url1", "archived", "filepath1", "2023-01-01"],
|
||||
@@ -136,8 +136,8 @@ class TestGWorksheet:
|
||||
assert gworksheet.to_a1(row, col) == expected
|
||||
|
||||
# Test empty worksheet
|
||||
def test_empty_worksheet_initialization(self):
|
||||
mock_ws = MagicMock()
|
||||
def test_empty_worksheet_initialization(self, mocker):
|
||||
mock_ws = mocker.MagicMock()
|
||||
mock_ws.get_values.return_value = []
|
||||
g = GWorksheet(mock_ws)
|
||||
assert g.headers == []
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from typing import Type
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from auto_archiver.core import Media
|
||||
from auto_archiver.modules.s3_storage import S3Storage
|
||||
|
||||
@@ -11,7 +10,6 @@ class TestS3Storage:
|
||||
"""
|
||||
module_name: str = "s3_storage"
|
||||
storage: Type[S3Storage]
|
||||
s3: MagicMock
|
||||
config: dict = {
|
||||
"path_generator": "flat",
|
||||
"filename_generator": "static",
|
||||
@@ -25,13 +23,14 @@ class TestS3Storage:
|
||||
"private": False,
|
||||
}
|
||||
|
||||
@patch('boto3.client')
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_storage(self, setup_module):
|
||||
def setup_storage(self, setup_module, mocker):
|
||||
self.s3 = S3Storage()
|
||||
self.storage = setup_module(self.module_name, self.config)
|
||||
|
||||
def test_client_initialization(self):
|
||||
"""Test that S3 client is initialized with correct parameters"""
|
||||
|
||||
assert self.storage.s3 is not None
|
||||
assert self.storage.s3.meta.region_name == 'test-region'
|
||||
|
||||
@@ -44,81 +43,63 @@ class TestS3Storage:
|
||||
media.key = "another/path.jpg"
|
||||
assert self.storage.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"
|
||||
|
||||
def test_uploadf_sets_acl_public(self):
|
||||
def test_uploadf_sets_acl_public(self, mocker):
|
||||
media = Media("test.txt")
|
||||
mock_file = MagicMock()
|
||||
with patch.object(self.storage.s3, 'upload_fileobj') as mock_s3_upload, \
|
||||
patch.object(self.storage, 'is_upload_needed', return_value=True):
|
||||
self.storage.uploadf(mock_file, media)
|
||||
mock_s3_upload.assert_called_once_with(
|
||||
mock_file,
|
||||
Bucket='test-bucket',
|
||||
Key=media.key,
|
||||
ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
|
||||
)
|
||||
mock_file = mocker.MagicMock()
|
||||
mock_s3_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
|
||||
mocker.patch.object(self.storage, 'is_upload_needed', return_value=True)
|
||||
self.storage.uploadf(mock_file, media)
|
||||
mock_s3_upload.assert_called_once_with(
|
||||
mock_file,
|
||||
Bucket='test-bucket',
|
||||
Key=media.key,
|
||||
ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
|
||||
)
|
||||
|
||||
def test_upload_decision_logic(self):
|
||||
def test_upload_decision_logic(self, mocker):
|
||||
"""Test is_upload_needed under different conditions"""
|
||||
media = Media("test.txt")
|
||||
# Test default state (random_no_duplicate=False)
|
||||
assert self.storage.is_upload_needed(media) is True
|
||||
# Set duplicate checking config to true:
|
||||
|
||||
self.storage.random_no_duplicate = True
|
||||
with patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash') as mock_calc_hash, \
|
||||
patch.object(self.storage, 'file_in_folder') as mock_file_in_folder:
|
||||
mock_calc_hash.return_value = 'beepboop123beepboop123beepboop123'
|
||||
mock_file_in_folder.return_value = 'existing_key.txt'
|
||||
# Test duplicate result
|
||||
assert self.storage.is_upload_needed(media) is False
|
||||
assert media.key == 'existing_key.txt'
|
||||
mock_file_in_folder.assert_called_with(
|
||||
# (first 24 chars of hash)
|
||||
'no-dups/beepboop123beepboop123be'
|
||||
)
|
||||
mock_calc_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value='beepboop123beepboop123beepboop123')
|
||||
mock_file_in_folder = mocker.patch.object(self.storage, 'file_in_folder', return_value='existing_key.txt')
|
||||
assert self.storage.is_upload_needed(media) is False
|
||||
assert media.key == 'existing_key.txt'
|
||||
mock_file_in_folder.assert_called_with('no-dups/beepboop123beepboop123be')
|
||||
|
||||
|
||||
@patch.object(S3Storage, 'file_in_folder')
|
||||
def test_skips_upload_when_duplicate_exists(self, mock_file_in_folder):
|
||||
def test_skips_upload_when_duplicate_exists(self, mocker):
|
||||
"""Test that upload skips when file_in_folder finds existing object"""
|
||||
self.storage.random_no_duplicate = True
|
||||
mock_file_in_folder.return_value = "existing_folder/existing_file.txt"
|
||||
# Create test media with calculated hash
|
||||
mock_file_in_folder = mocker.patch.object(S3Storage, 'file_in_folder', return_value="existing_folder/existing_file.txt")
|
||||
media = Media("test.txt")
|
||||
media.key = "original_path.txt"
|
||||
with patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash') as mock_calculate_hash:
|
||||
mock_calculate_hash.return_value = "beepboop123beepboop123beepboop123"
|
||||
# Verify upload
|
||||
assert self.storage.is_upload_needed(media) is False
|
||||
assert media.key == "existing_folder/existing_file.txt"
|
||||
assert media.get("previously archived") is True
|
||||
with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
|
||||
result = self.storage.uploadf(None, media)
|
||||
mock_upload.assert_not_called()
|
||||
assert result is True
|
||||
mock_calculate_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value="beepboop123beepboop123beepboop123")
|
||||
assert self.storage.is_upload_needed(media) is False
|
||||
assert media.key == "existing_folder/existing_file.txt"
|
||||
assert media.get("previously archived") is True
|
||||
mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
|
||||
result = self.storage.uploadf(None, media)
|
||||
mock_upload.assert_not_called()
|
||||
assert result is True
|
||||
|
||||
@patch.object(S3Storage, 'is_upload_needed')
|
||||
def test_uploads_with_correct_parameters(self, mock_upload_needed):
|
||||
def test_uploads_with_correct_parameters(self, mocker):
|
||||
media = Media("test.txt")
|
||||
media.key = "original_key.txt"
|
||||
mock_upload_needed.return_value = True
|
||||
mocker.patch.object(S3Storage, 'is_upload_needed', return_value=True)
|
||||
media.mimetype = 'image/png'
|
||||
mock_file = MagicMock()
|
||||
mock_file = mocker.MagicMock()
|
||||
mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
|
||||
self.storage.uploadf(mock_file, media)
|
||||
mock_upload.assert_called_once_with(
|
||||
mock_file,
|
||||
Bucket='test-bucket',
|
||||
Key='original_key.txt',
|
||||
ExtraArgs={
|
||||
'ACL': 'public-read',
|
||||
'ContentType': 'image/png'
|
||||
}
|
||||
)
|
||||
|
||||
with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
|
||||
self.storage.uploadf(mock_file, media)
|
||||
# verify call occured with these params
|
||||
mock_upload.assert_called_once_with(
|
||||
mock_file,
|
||||
Bucket='test-bucket',
|
||||
Key='original_key.txt',
|
||||
ExtraArgs={
|
||||
'ACL': 'public-read',
|
||||
'ContentType': 'image/png'
|
||||
}
|
||||
)
|
||||
|
||||
def test_file_in_folder_exists(self):
|
||||
with patch.object(self.storage.s3, 'list_objects') as mock_list_objects:
|
||||
mock_list_objects.return_value = {'Contents': [{'Key': 'path/to/file.txt'}]}
|
||||
assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'
|
||||
def test_file_in_folder_exists(self, mocker):
|
||||
mock_list_objects = mocker.patch.object(self.storage.s3, 'list_objects', return_value={'Contents': [{'Key': 'path/to/file.txt'}]})
|
||||
assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'
|
||||
|
||||
142
tests/storages/test_atlos_storage.py
Normal file
142
tests/storages/test_atlos_storage.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import os
|
||||
import hashlib
|
||||
import pytest
|
||||
from auto_archiver.core import Media, Metadata
|
||||
from auto_archiver.modules.atlos_storage import AtlosStorage
|
||||
|
||||
|
||||
class FakeAPIResponse:
|
||||
"""Simulate a response object."""
|
||||
|
||||
def __init__(self, data: dict, raise_error: bool = False) -> None:
|
||||
self._data = data
|
||||
self.raise_error = raise_error
|
||||
|
||||
def json(self) -> dict:
|
||||
return self._data
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
if self.raise_error:
|
||||
raise Exception("HTTP error")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def atlos_storage(setup_module) -> AtlosStorage:
|
||||
"""Fixture for AtlosStorage."""
|
||||
configs: dict = {
|
||||
"api_token": "abc123",
|
||||
"atlos_url": "https://platform.atlos.org",
|
||||
}
|
||||
return setup_module("atlos_storage", configs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def media(tmp_path) -> Media:
|
||||
"""Fixture for Media."""
|
||||
content = b"media content"
|
||||
file_path = tmp_path / "media.txt"
|
||||
file_path.write_bytes(content)
|
||||
media = Media(filename=str(file_path))
|
||||
media.properties = {"something": "Title"}
|
||||
media.key = "key"
|
||||
return media
|
||||
|
||||
|
||||
def test_get_cdn_url(atlos_storage: AtlosStorage) -> None:
|
||||
"""Test get_cdn_url returns the configured atlos_url."""
|
||||
media = Media(filename="dummy.mp4")
|
||||
url = atlos_storage.get_cdn_url(media)
|
||||
assert url == atlos_storage.atlos_url
|
||||
|
||||
|
||||
def test_hash(tmp_path, atlos_storage: AtlosStorage) -> None:
|
||||
"""Test _hash() computes the correct SHA-256 hash of a file."""
|
||||
content = b"hello world"
|
||||
file_path = tmp_path / "test.txt"
|
||||
file_path.write_bytes(content)
|
||||
media = Media(filename="dummy.mp4")
|
||||
media.filename = str(file_path)
|
||||
expected_hash = hashlib.sha256(content).hexdigest()
|
||||
assert atlos_storage._hash(media) == expected_hash
|
||||
|
||||
|
||||
def test_upload_no_atlos_id(tmp_path, atlos_storage: AtlosStorage, media: Media, mocker) -> None:
|
||||
"""Test upload() returns False when metadata lacks atlos_id."""
|
||||
metadata = Metadata() # atlos_id not set
|
||||
post_mock = mocker.patch("requests.post")
|
||||
result = atlos_storage.upload(media, metadata)
|
||||
assert result is False
|
||||
post_mock.assert_not_called()
|
||||
|
||||
|
||||
def test_upload_already_uploaded(atlos_storage: AtlosStorage,
|
||||
metadata: Metadata,
|
||||
media: Media,
|
||||
tmp_path,
|
||||
mocker) -> None:
|
||||
"""Test upload() returns True if media hash already exists."""
|
||||
content = b"media content"
|
||||
metadata.set("atlos_id", 101)
|
||||
media_hash = hashlib.sha256(content).hexdigest()
|
||||
fake_get = FakeAPIResponse({
|
||||
"result": {"artifacts": [{"file_hash_sha256": media_hash}]}
|
||||
})
|
||||
get_mock = mocker.patch("requests.get", return_value=fake_get)
|
||||
post_mock = mocker.patch("requests.post")
|
||||
result = atlos_storage.upload(media, metadata)
|
||||
assert result is True
|
||||
get_mock.assert_called_once()
|
||||
post_mock.assert_not_called()
|
||||
|
||||
|
||||
def test_upload_not_uploaded(tmp_path, atlos_storage: AtlosStorage,
|
||||
metadata: Metadata,
|
||||
media: Media,
|
||||
mocker) -> None:
|
||||
"""Test upload() uploads media when not already present."""
|
||||
metadata.set("atlos_id", 202)
|
||||
fake_get = FakeAPIResponse({
|
||||
"result": {"artifacts": [{"file_hash_sha256": "different_hash"}]}
|
||||
})
|
||||
get_mock = mocker.patch("requests.get", return_value=fake_get)
|
||||
fake_post = FakeAPIResponse({}, raise_error=False)
|
||||
post_mock = mocker.patch("requests.post", return_value=fake_post)
|
||||
result = atlos_storage.upload(media, metadata)
|
||||
assert result is True
|
||||
get_mock.assert_called_once()
|
||||
post_mock.assert_called_once()
|
||||
expected_url = f"{atlos_storage.atlos_url}/api/v2/source_material/upload/202"
|
||||
expected_headers = {"Authorization": f"Bearer {atlos_storage.api_token}"}
|
||||
expected_params = {"title": media.properties}
|
||||
call_kwargs = post_mock.call_args.kwargs
|
||||
assert call_kwargs["headers"] == expected_headers
|
||||
assert call_kwargs["params"] == expected_params
|
||||
# Verify the URL passed to requests.post.
|
||||
posted_url = call_kwargs.get("url") or post_mock.call_args.args[0]
|
||||
assert posted_url == expected_url
|
||||
# Verify files parameter contains the correct filename.
|
||||
file_tuple = call_kwargs["files"]["file"]
|
||||
assert file_tuple[0] == os.path.basename(media.filename)
|
||||
|
||||
|
||||
def test_upload_post_http_error(tmp_path,
|
||||
atlos_storage: AtlosStorage,
|
||||
metadata: Metadata,
|
||||
media: Media,
|
||||
mocker) -> None:
|
||||
"""Test upload() propagates HTTP error during POST."""
|
||||
metadata.set("atlos_id", 303)
|
||||
fake_get = FakeAPIResponse({
|
||||
"result": {"artifacts": []}
|
||||
})
|
||||
mocker.patch("requests.get", return_value=fake_get)
|
||||
fake_post = FakeAPIResponse({}, raise_error=True)
|
||||
mocker.patch("requests.post", return_value=fake_post)
|
||||
with pytest.raises(Exception, match="HTTP error"):
|
||||
atlos_storage.upload(media, metadata)
|
||||
|
||||
|
||||
def test_uploadf_not_implemented(atlos_storage: AtlosStorage) -> None:
|
||||
"""Test uploadf() returns None (not implemented)."""
|
||||
result = atlos_storage.uploadf(None, "dummy")
|
||||
assert result is None
|
||||
@@ -1,44 +1,57 @@
|
||||
from typing import Type
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from oauth2client import service_account
|
||||
|
||||
from auto_archiver.core import Media
|
||||
from auto_archiver.modules.gdrive_storage import GDriveStorage
|
||||
from auto_archiver.core.metadata import Metadata
|
||||
from tests.storages.test_storage_base import TestStorageBase
|
||||
|
||||
|
||||
class TestGDriveStorage:
|
||||
"""
|
||||
Test suite for GDriveStorage.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def gdrive_storage(setup_module, mocker):
|
||||
module_name: str = "gdrive_storage"
|
||||
storage: Type[GDriveStorage]
|
||||
storage: GDriveStorage
|
||||
config: dict = {'path_generator': 'url',
|
||||
'filename_generator': 'static',
|
||||
'root_folder_id': "fake_root_folder_id",
|
||||
'oauth_token': None,
|
||||
'service_account': 'fake_service_account.json'
|
||||
}
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def gdrive(self, setup_module):
|
||||
with patch('google.oauth2.service_account.Credentials.from_service_account_file') as mock_creds:
|
||||
self.storage = setup_module(self.module_name, self.config)
|
||||
|
||||
def test_initialize_fails_with_non_existent_creds(self):
|
||||
"""
|
||||
Test that the Google Drive service raises a FileNotFoundError when the service account file does not exist.
|
||||
"""
|
||||
# Act and Assert
|
||||
with pytest.raises(FileNotFoundError) as exc_info:
|
||||
self.storage.setup()
|
||||
assert "No such file or directory" in str(exc_info.value)
|
||||
mocker.patch('google.oauth2.service_account.Credentials.from_service_account_file')
|
||||
return setup_module(module_name, config)
|
||||
|
||||
|
||||
def test_path_parts(self):
|
||||
media = Media(filename="test.jpg")
|
||||
media.key = "folder1/folder2/test.jpg"
|
||||
def test_initialize_fails_with_non_existent_creds(setup_module):
|
||||
"""Test that the Google Drive service raises a FileNotFoundError when the service account file does not exist.
|
||||
(and isn't mocked)
|
||||
"""
|
||||
config: dict = {'path_generator': 'url',
|
||||
'filename_generator': 'static',
|
||||
'root_folder_id': "fake_root_folder_id",
|
||||
'oauth_token': None,
|
||||
'service_account': 'fake_service_account.json'
|
||||
}
|
||||
with pytest.raises(FileNotFoundError) as exc_info:
|
||||
setup_module("gdrive_storage", config)
|
||||
assert "No such file or directory" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_id_from_parent_and_name(gdrive_storage, mocker):
|
||||
"""Test _get_id_from_parent_and_name returns correct id from an API result."""
|
||||
fake_list = mocker.MagicMock()
|
||||
fake_list.execute.return_value = {"files": [{"id": "123", "name": "testname"}]}
|
||||
fake_service = mocker.MagicMock()
|
||||
# mock the files.list return value
|
||||
fake_service.files.return_value.list.return_value = fake_list
|
||||
gdrive_storage.service = fake_service
|
||||
result = gdrive_storage._get_id_from_parent_and_name("parent", "mock", retries=1, use_mime_type=False)
|
||||
assert result == "123"
|
||||
|
||||
def test_path_parts():
|
||||
media = Media(filename="test.jpg")
|
||||
media.key = "folder1/folder2/test.jpg"
|
||||
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Requires real credentials")
|
||||
|
||||
54
tests/storages/test_local_storage.py
Normal file
54
tests/storages/test_local_storage.py
Normal file
@@ -0,0 +1,54 @@
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Media
|
||||
from auto_archiver.modules.local_storage import LocalStorage
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def local_storage(setup_module) -> LocalStorage:
|
||||
configs: dict = {
|
||||
"path_generator": "flat",
|
||||
"filename_generator": "static",
|
||||
"save_to": "./local_archive",
|
||||
"save_absolute": False,
|
||||
}
|
||||
return setup_module("local_storage", configs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_media(tmp_path) -> Media:
|
||||
"""Fixture creating a Media object with temporary source file"""
|
||||
src_file = tmp_path / "source.txt"
|
||||
src_file.write_text("test content")
|
||||
return Media(key="subdir/test.txt", filename=str(src_file))
|
||||
|
||||
|
||||
def test_get_cdn_url_relative(local_storage):
|
||||
media = Media(key="test.txt", filename="dummy.txt")
|
||||
expected = os.path.join(local_storage.save_to, media.key)
|
||||
assert local_storage.get_cdn_url(media) == expected
|
||||
|
||||
|
||||
|
||||
def test_get_cdn_url_absolute(local_storage):
|
||||
media = Media(key="test.txt", filename="dummy.txt")
|
||||
local_storage.save_absolute = True
|
||||
expected = os.path.abspath(os.path.join(local_storage.save_to, media.key))
|
||||
assert local_storage.get_cdn_url(media) == expected
|
||||
|
||||
def test_upload_file_contents_and_metadata(local_storage, sample_media):
|
||||
dest = os.path.join(local_storage.save_to, sample_media.key)
|
||||
assert local_storage.upload(sample_media) is True
|
||||
assert Path(sample_media.filename).read_text() == Path(dest).read_text()
|
||||
|
||||
|
||||
def test_upload_nonexistent_source(local_storage):
|
||||
media = Media(key="missing.txt", filename="nonexistent.txt")
|
||||
with pytest.raises(FileNotFoundError):
|
||||
local_storage.upload(media)
|
||||
|
||||
|
||||
@@ -162,4 +162,25 @@ def test_get_context():
|
||||
|
||||
|
||||
def test_choose_most_complete():
|
||||
pass
|
||||
m_more = Metadata()
|
||||
m_more.set_title("Title 1")
|
||||
m_more.set_content("Content 1")
|
||||
m_more.set_url("https://example.com")
|
||||
|
||||
m_less = Metadata()
|
||||
m_less.set_title("Title 2")
|
||||
m_less.set_content("Content 2")
|
||||
m_less.set_url("https://example.com")
|
||||
m_less.set_context("key", "value")
|
||||
|
||||
res = Metadata.choose_most_complete([m_more, m_less])
|
||||
assert res.metadata.get("title") == "Title 1"
|
||||
|
||||
def test_choose_most_complete_from_pickles(unpickle):
|
||||
# test most complete from pickles before and after an enricher has run
|
||||
# Only compares length of media, not the actual media
|
||||
m_before_enriching = unpickle("metadata_enricher_ytshort_input.pickle")
|
||||
m_after_enriching = unpickle("metadata_enricher_ytshort_expected.pickle")
|
||||
# Iterates `for r in results[1:]:`
|
||||
res = Metadata.choose_most_complete([Metadata(), m_after_enriching, m_before_enriching])
|
||||
assert res.media == m_after_enriching.media
|
||||
|
||||
@@ -1,24 +1,18 @@
|
||||
import sys
|
||||
import pytest
|
||||
from auto_archiver.core.module import get_module_lazy, BaseModule, LazyBaseModule, _LAZY_LOADED_MODULES
|
||||
from auto_archiver.core.module import ModuleFactory, LazyBaseModule
|
||||
from auto_archiver.core.base_module import BaseModule
|
||||
|
||||
@pytest.fixture
|
||||
def example_module():
|
||||
import auto_archiver
|
||||
|
||||
module_factory = ModuleFactory()
|
||||
|
||||
previous_path = auto_archiver.modules.__path__
|
||||
auto_archiver.modules.__path__.append("tests/data/test_modules/")
|
||||
|
||||
module = get_module_lazy("example_module")
|
||||
yield module
|
||||
# cleanup
|
||||
try:
|
||||
del module._manifest
|
||||
except AttributeError:
|
||||
pass
|
||||
del _LAZY_LOADED_MODULES["example_module"]
|
||||
sys.modules.pop("auto_archiver.modules.example_module.example_module", None)
|
||||
auto_archiver.modules.__path__ = previous_path
|
||||
return module_factory.get_module_lazy("example_module")
|
||||
|
||||
def test_get_module_lazy(example_module):
|
||||
assert example_module.name == "example_module"
|
||||
@@ -46,12 +40,14 @@ def test_module_dependency_check_loads_module(example_module):
|
||||
# monkey patch the manifest to include a nonexistnet dependency
|
||||
example_module.manifest["dependencies"]["python"] = ["hash_enricher"]
|
||||
|
||||
module_factory = example_module.module_factory
|
||||
|
||||
loaded_module = example_module.load({})
|
||||
assert loaded_module is not None
|
||||
|
||||
# check the dependency is loaded
|
||||
assert _LAZY_LOADED_MODULES["hash_enricher"] is not None
|
||||
assert _LAZY_LOADED_MODULES["hash_enricher"]._instance is not None
|
||||
assert module_factory._lazy_modules["hash_enricher"] is not None
|
||||
assert module_factory._lazy_modules["hash_enricher"]._instance is not None
|
||||
|
||||
def test_load_module(example_module):
|
||||
|
||||
@@ -69,7 +65,7 @@ def test_load_module(example_module):
|
||||
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
||||
def test_load_modules(module_name):
|
||||
# test that specific modules can be loaded
|
||||
module = get_module_lazy(module_name)
|
||||
module = ModuleFactory().get_module_lazy(module_name)
|
||||
assert module is not None
|
||||
assert isinstance(module, LazyBaseModule)
|
||||
assert module.name == module_name
|
||||
@@ -86,7 +82,7 @@ def test_load_modules(module_name):
|
||||
|
||||
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
||||
def test_lazy_base_module(module_name):
|
||||
lazy_module = get_module_lazy(module_name)
|
||||
lazy_module = ModuleFactory().get_module_lazy(module_name)
|
||||
|
||||
assert lazy_module is not None
|
||||
assert isinstance(lazy_module, LazyBaseModule)
|
||||
|
||||
@@ -4,7 +4,7 @@ from argparse import ArgumentParser, ArgumentTypeError
|
||||
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
||||
from auto_archiver.version import __version__
|
||||
from auto_archiver.core.config import read_yaml, store_yaml
|
||||
from auto_archiver.core.module import _LAZY_LOADED_MODULES
|
||||
|
||||
|
||||
TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
|
||||
TEST_MODULES = "tests/data/test_modules/"
|
||||
@@ -17,22 +17,7 @@ def test_args():
|
||||
|
||||
@pytest.fixture
|
||||
def orchestrator():
|
||||
yield ArchivingOrchestrator()
|
||||
# hack - the loguru logger starts with one logger, but if orchestrator has run before
|
||||
# it'll remove the default logger, add it back in:
|
||||
|
||||
from loguru import logger
|
||||
|
||||
if not logger._core.handlers.get(0):
|
||||
logger._core.handlers_count = 0
|
||||
logger.add(sys.stderr)
|
||||
# and remove the custom logger
|
||||
if logger._core.handlers.get(1):
|
||||
logger.remove(1)
|
||||
|
||||
# delete out any loaded modules
|
||||
_LAZY_LOADED_MODULES.clear()
|
||||
|
||||
return ArchivingOrchestrator()
|
||||
|
||||
@pytest.fixture
|
||||
def basic_parser(orchestrator) -> ArgumentParser:
|
||||
|
||||
144
tests/utils/test_misc.py
Normal file
144
tests/utils/test_misc.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.utils.misc import (
|
||||
mkdir_if_not_exists,
|
||||
expand_url,
|
||||
getattr_or,
|
||||
DateTimeEncoder,
|
||||
dump_payload,
|
||||
get_datetime_from_str,
|
||||
update_nested_dict,
|
||||
calculate_file_hash,
|
||||
random_str,
|
||||
get_timestamp
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_file(tmp_path):
|
||||
file_path = tmp_path / "test.txt"
|
||||
file_path.write_text("test content")
|
||||
return file_path
|
||||
|
||||
|
||||
class TestDirectoryUtils:
|
||||
def test_mkdir_creates_new_directory(self, tmp_path):
|
||||
new_dir = tmp_path / "new_folder"
|
||||
mkdir_if_not_exists(new_dir)
|
||||
assert new_dir.exists()
|
||||
assert new_dir.is_dir()
|
||||
|
||||
def test_mkdir_exists_quietly(self, tmp_path):
|
||||
existing_dir = tmp_path / "existing"
|
||||
existing_dir.mkdir()
|
||||
mkdir_if_not_exists(existing_dir)
|
||||
assert existing_dir.exists()
|
||||
|
||||
class TestURLExpansion:
|
||||
@pytest.mark.parametrize("input_url,expected", [
|
||||
("https://example.com", "https://example.com"),
|
||||
("https://t.co/test", "https://expanded.url")
|
||||
])
|
||||
def test_expand_url(self, input_url, expected, mocker):
|
||||
mock_response = mocker.Mock()
|
||||
mock_response.url = "https://expanded.url"
|
||||
mocker.patch('requests.get', return_value=mock_response)
|
||||
result = expand_url(input_url)
|
||||
assert result == expected
|
||||
|
||||
def test_expand_url_handles_errors(self, caplog, mocker):
|
||||
mocker.patch('requests.get', side_effect=Exception("Connection error"))
|
||||
url = "https://t.co/error"
|
||||
result = expand_url(url)
|
||||
assert result == url
|
||||
assert f"Failed to expand url {url}" in caplog.text
|
||||
|
||||
class TestAttributeHandling:
|
||||
class Sample:
|
||||
exists = "value"
|
||||
none = None
|
||||
|
||||
@pytest.mark.parametrize("obj,attr,default,expected", [
|
||||
(Sample(), "exists", "default", "value"),
|
||||
(Sample(), "none", "default", "default"),
|
||||
(Sample(), "missing", "default", "default"),
|
||||
(None, "anything", "fallback", "fallback"),
|
||||
])
|
||||
def test_getattr_or(self, obj, attr, default, expected):
|
||||
# Test gets attribute or returns a default value
|
||||
assert getattr_or(obj, attr, default) == expected
|
||||
|
||||
class TestDateTimeHandling:
|
||||
def test_datetime_encoder(self, sample_datetime):
|
||||
result = json.dumps({"dt": sample_datetime}, cls=DateTimeEncoder)
|
||||
loaded = json.loads(result)
|
||||
assert loaded["dt"] == str(sample_datetime)
|
||||
|
||||
def test_dump_payload(self, sample_datetime):
|
||||
payload = {"timestamp": sample_datetime}
|
||||
result = dump_payload(payload)
|
||||
assert str(sample_datetime) in result
|
||||
|
||||
@pytest.mark.parametrize("dt_str,fmt,expected", [
|
||||
("2023-01-01 12:00:00+00:00", None, datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)),
|
||||
("20230101 120000", "%Y%m%d %H%M%S", datetime(2023, 1, 1, 12, 0)),
|
||||
("invalid", None, None),
|
||||
])
|
||||
def test_datetime_from_string(self, dt_str, fmt, expected):
|
||||
result = get_datetime_from_str(dt_str, fmt)
|
||||
if expected is None:
|
||||
assert result is None
|
||||
else:
|
||||
assert result == expected.replace(tzinfo=result.tzinfo)
|
||||
|
||||
class TestDictUtils:
|
||||
@pytest.mark.parametrize("original,update,expected", [
|
||||
({"a": 1}, {"b": 2}, {"a": 1, "b": 2}),
|
||||
({"nested": {"a": 1}}, {"nested": {"b": 2}}, {"nested": {"a": 1, "b": 2}}),
|
||||
({"a": {"b": {"c": 1}}}, {"a": {"b": {"c": 2}}}, {"a": {"b": {"c": 2}}}),
|
||||
])
|
||||
def test_update_nested_dict(self, original, update, expected):
|
||||
update_nested_dict(original, update)
|
||||
assert original == expected
|
||||
|
||||
class TestHashingUtils:
|
||||
def test_file_hashing(self, sample_file):
|
||||
expected = hashlib.sha256(b"test content").hexdigest()
|
||||
assert calculate_file_hash(str(sample_file)) == expected
|
||||
|
||||
def test_large_file_hashing(self, tmp_path):
|
||||
file_path = tmp_path / "large.bin"
|
||||
content = b"0" * 16_000_000 * 2 # 32MB
|
||||
file_path.write_bytes(content)
|
||||
|
||||
expected = hashlib.sha256(content).hexdigest()
|
||||
assert calculate_file_hash(str(file_path)) == expected
|
||||
|
||||
class TestMiscUtils:
|
||||
def test_random_str_length(self):
|
||||
for length in [8, 16, 32]:
|
||||
assert len(random_str(length)) == length
|
||||
|
||||
def test_random_str_raises_too_long(self):
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
random_str(64)
|
||||
assert "length must be less than 32 as UUID4 is used" == str(exc_info.value)
|
||||
|
||||
def test_random_str_uniqueness(self):
|
||||
assert random_str() != random_str()
|
||||
|
||||
@pytest.mark.parametrize("ts_input,utc,iso,expected_type", [
|
||||
(datetime.now(), True, True, str),
|
||||
("2023-01-01T12:00:00+00:00", False, False, datetime),
|
||||
(1672574400, True, True, str),
|
||||
])
|
||||
def test_timestamp_parsing(self, ts_input, utc, iso, expected_type):
|
||||
result = get_timestamp(ts_input, utc=utc, iso=iso)
|
||||
assert isinstance(result, expected_type)
|
||||
|
||||
def test_invalid_timestamp_returns_none(self):
|
||||
assert get_timestamp("invalid-date") is None
|
||||
Reference in New Issue
Block a user