From 0d1447117c004358a6111142e99a3adce09d4070 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 15:56:13 +0100 Subject: [PATCH 01/11] updates docs to reflect new general approach extractor --- docs/source/modules/extractor.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/modules/extractor.md b/docs/source/modules/extractor.md index e6375db..096124e 100644 --- a/docs/source/modules/extractor.md +++ b/docs/source/modules/extractor.md @@ -4,8 +4,9 @@ Extractor modules are used to extract the content of a given URL. Typically, one Extractors that are able to extract content from a wide range of websites include: 1. Generic Extractor: parses videos and images on sites using the powerful yt-dlp library. -2. Wayback Machine Extractor: sends pages to the Wayback machine for archiving, and stores the link. -3. WACZ Extractor: runs a web browser to 'browse' the URL and save a copy of the page in WACZ format. +2. Antibot Extractor: uses a headless browser to bypass bot detection and extract content. +3. WACZ Extractor: runs a web browser to 'browse' the URL and save a copy of the page in WACZ format. +4. Wayback Machine Extractor: sends pages to the Wayback machine for archiving, and stores the archived link. ```{include} autogen/extractor.md ``` From eae0da08b3ca42d7fecbcf26a4d25ae12479558f Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 16:00:03 +0100 Subject: [PATCH 02/11] fix issue with two runs of anitbot extractor --- .../antibot_extractor_enricher/antibot_extractor_enricher.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py index 8d5d019..5e61bad 100644 --- a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py +++ b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py @@ -81,6 +81,9 @@ class AntibotExtractorEnricher(Extractor, Enricher): os.makedirs(self.user_data_dir, exist_ok=True) def enrich(self, to_enrich: Metadata, custom_data_dir: bool = True) -> bool: + if to_enrich.get_media_by_id("html_source_code"): + logger.info("Antibot has already been executed, skipping.") + return True using_user_data_dir = self.user_data_dir if custom_data_dir else None url = to_enrich.get_url() From 21255db86a968da25d3004d63db09adb7b3611c7 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 16:00:46 +0100 Subject: [PATCH 03/11] stops using service that is not up for timestamping --- src/auto_archiver/modules/timestamping_enricher/__manifest__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_archiver/modules/timestamping_enricher/__manifest__.py b/src/auto_archiver/modules/timestamping_enricher/__manifest__.py index 403e0ef..7e1384d 100644 --- a/src/auto_archiver/modules/timestamping_enricher/__manifest__.py +++ b/src/auto_archiver/modules/timestamping_enricher/__manifest__.py @@ -20,7 +20,7 @@ # "http://tsa.sinpe.fi.cr/tsaHttp/", # self-signed # "http://tsa.cra.ge/signserver/tsa?workerName=qtsa", # self-signed "http://tss.cnbs.gob.hn/TSS/HttpTspServer", - "http://dss.nowina.lu/pki-factory/tsa/good-tsa", + # "http://dss.nowina.lu/pki-factory/tsa/good-tsa", # "https://freetsa.org/tsr", # self-signed ], "help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.", From 2051e8e491ae823ac032696b5bc8bfb98778b340 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 16:02:07 +0100 Subject: [PATCH 04/11] adds further exponential backoff for Sheets API worksheet enumeration --- .../gsheet_feeder_db/gsheet_feeder_db.py | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py index 645bd45..cb2051c 100644 --- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py +++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py @@ -32,26 +32,33 @@ class GsheetsFeederDB(Feeder, Database): if not self.sheet and not self.sheet_id: raise ValueError("You need to define either a 'sheet' name or a 'sheet_id' in your manifest.") - def open_sheet(self): + def open_sheet(self) -> gspread.Spreadsheet: if self.sheet: return self.gsheets_client.open(self.sheet) else: return self.gsheets_client.open_by_key(self.sheet_id) + @retry( + wait_exponential_multiplier=1, + stop_max_attempt_number=5, + ) + def enumerate_sheets(self, sheet) -> Iterator[gspread.Worksheet]: + for worksheet in sheet.worksheets(): + yield worksheet + def __iter__(self) -> Iterator[Metadata]: - sh = self.open_sheet() - for ii, worksheet in enumerate(sh.worksheets()): - if not self.should_process_sheet(worksheet.title): - logger.debug(f"Skipped worksheet '{worksheet.title}' due to allow/block rules") - continue - logger.info(f"Opening worksheet {ii=}: {worksheet.title=} header={self.header}") - gw = GWorksheet(worksheet, header_row=self.header, columns=self.columns) - if len(missing_cols := self.missing_required_columns(gw)): - logger.debug( - f"Skipped worksheet '{worksheet.title}' due to missing required column(s) for {missing_cols}" - ) - continue - with logger.contextualize(worksheet=f"{sh.title}:{worksheet.title}"): + spreadsheet = self.open_sheet() + for worksheet in self.enumerate_sheets(spreadsheet): + with logger.contextualize(worksheet=f"{spreadsheet.title}:{worksheet.title}"): + if not self.should_process_sheet(worksheet.title): + logger.debug("Skipped worksheet due to allow/block rules") + continue + logger.info(f"Opening worksheet header={self.header}") + gw = GWorksheet(worksheet, header_row=self.header, columns=self.columns) + if len(missing_cols := self.missing_required_columns(gw)): + logger.debug(f"Skipped worksheet due to missing required column(s) for {missing_cols}") + continue + # process and yield metadata here: yield from self._process_rows(gw) logger.info(f"Finished worksheet {worksheet.title}") From 52ed8196a5503f892e67eeaa43f547fb3f9c0b67 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 16:03:47 +0100 Subject: [PATCH 05/11] updates dependencies --- poetry.lock | 277 ++++++++++++++++++++++++++++------------------------ 1 file changed, 151 insertions(+), 126 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0fbc219..40d4c0c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -193,18 +193,18 @@ files = [ [[package]] name = "boto3" -version = "1.38.46" +version = "1.39.3" description = "The AWS SDK for Python" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "boto3-1.38.46-py3-none-any.whl", hash = "sha256:9c8e88a32a6465e5905308708cff5b17547117f06982908bdfdb0108b4a65079"}, - {file = "boto3-1.38.46.tar.gz", hash = "sha256:d1ca2b53138afd0341e1962bd52be6071ab7a63c5b4f89228c5ef8942c40c852"}, + {file = "boto3-1.39.3-py3-none-any.whl", hash = "sha256:056cfa2440fe1a157a7c2be897c749c83e1a322144aa4dad889f2fca66571019"}, + {file = "boto3-1.39.3.tar.gz", hash = "sha256:0a367106497649ae3d8a7b571b8c3be01b7b935a0fe303d4cc2574ed03aecbb4"}, ] [package.dependencies] -botocore = ">=1.38.46,<1.39.0" +botocore = ">=1.39.3,<1.40.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.13.0,<0.14.0" @@ -213,14 +213,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.38.46" +version = "1.39.3" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "botocore-1.38.46-py3-none-any.whl", hash = "sha256:89ca782ffbf2e8769ca9c89234cfa5ca577f1987d07d913ee3c68c4776b1eb5b"}, - {file = "botocore-1.38.46.tar.gz", hash = "sha256:8798e5a418c27cf93195b077153644aea44cb171fcd56edc1ecebaa1e49e226e"}, + {file = "botocore-1.39.3-py3-none-any.whl", hash = "sha256:66a81cfac18ad5e9f47696c73fdf44cdbd8f8ca51ab3fca1effca0aabf61f02f"}, + {file = "botocore-1.39.3.tar.gz", hash = "sha256:da8f477e119f9f8a3aaa8b3c99d9c6856ed0a243680aa3a3fbbfc15a8d4093fb"}, ] [package.dependencies] @@ -966,14 +966,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"] [[package]] name = "google-api-python-client" -version = "2.174.0" +version = "2.175.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "google_api_python_client-2.174.0-py3-none-any.whl", hash = "sha256:f695205ceec97bfaa1590a14282559c4109326c473b07352233a3584cdbf4b89"}, - {file = "google_api_python_client-2.174.0.tar.gz", hash = "sha256:9eb7616a820b38a9c12c5486f9b9055385c7feb18b20cbafc5c5a688b14f3515"}, + {file = "google_api_python_client-2.175.0-py3-none-any.whl", hash = "sha256:5f4313a914d11d2b0840d1daa336caef3f53e28e8234077c139f7b236fba9622"}, + {file = "google_api_python_client-2.175.0.tar.gz", hash = "sha256:f6d5b5c0141194a72cebef33feb1377fa668b3f14dc90a2fae258dbbbdcdb30c"}, ] [package.dependencies] @@ -1748,101 +1748,126 @@ files = [ [[package]] name = "pillow" -version = "11.2.1" +version = "11.3.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pillow-11.2.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:d57a75d53922fc20c165016a20d9c44f73305e67c351bbc60d1adaf662e74047"}, - {file = "pillow-11.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:127bf6ac4a5b58b3d32fc8289656f77f80567d65660bc46f72c0d77e6600cc95"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4ba4be812c7a40280629e55ae0b14a0aafa150dd6451297562e1764808bbe61"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8bd62331e5032bc396a93609982a9ab6b411c05078a52f5fe3cc59234a3abd1"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:562d11134c97a62fe3af29581f083033179f7ff435f78392565a1ad2d1c2c45c"}, - {file = "pillow-11.2.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c97209e85b5be259994eb5b69ff50c5d20cca0f458ef9abd835e262d9d88b39d"}, - {file = "pillow-11.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0c3e6d0f59171dfa2e25d7116217543310908dfa2770aa64b8f87605f8cacc97"}, - {file = "pillow-11.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc1c3bc53befb6096b84165956e886b1729634a799e9d6329a0c512ab651e579"}, - {file = "pillow-11.2.1-cp310-cp310-win32.whl", hash = "sha256:312c77b7f07ab2139924d2639860e084ec2a13e72af54d4f08ac843a5fc9c79d"}, - {file = "pillow-11.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:9bc7ae48b8057a611e5fe9f853baa88093b9a76303937449397899385da06fad"}, - {file = "pillow-11.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:2728567e249cdd939f6cc3d1f049595c66e4187f3c34078cbc0a7d21c47482d2"}, - {file = "pillow-11.2.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35ca289f712ccfc699508c4658a1d14652e8033e9b69839edf83cbdd0ba39e70"}, - {file = "pillow-11.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0409af9f829f87a2dfb7e259f78f317a5351f2045158be321fd135973fff7bf"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4e5c5edee874dce4f653dbe59db7c73a600119fbea8d31f53423586ee2aafd7"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b93a07e76d13bff9444f1a029e0af2964e654bfc2e2c2d46bfd080df5ad5f3d8"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:e6def7eed9e7fa90fde255afaf08060dc4b343bbe524a8f69bdd2a2f0018f600"}, - {file = "pillow-11.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:8f4f3724c068be008c08257207210c138d5f3731af6c155a81c2b09a9eb3a788"}, - {file = "pillow-11.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a0a6709b47019dff32e678bc12c63008311b82b9327613f534e496dacaefb71e"}, - {file = "pillow-11.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f6b0c664ccb879109ee3ca702a9272d877f4fcd21e5eb63c26422fd6e415365e"}, - {file = "pillow-11.2.1-cp311-cp311-win32.whl", hash = "sha256:cc5d875d56e49f112b6def6813c4e3d3036d269c008bf8aef72cd08d20ca6df6"}, - {file = "pillow-11.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:0f5c7eda47bf8e3c8a283762cab94e496ba977a420868cb819159980b6709193"}, - {file = "pillow-11.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:4d375eb838755f2528ac8cbc926c3e31cc49ca4ad0cf79cff48b20e30634a4a7"}, - {file = "pillow-11.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f"}, - {file = "pillow-11.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d"}, - {file = "pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4"}, - {file = "pillow-11.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443"}, - {file = "pillow-11.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c"}, - {file = "pillow-11.2.1-cp312-cp312-win32.whl", hash = "sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3"}, - {file = "pillow-11.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941"}, - {file = "pillow-11.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb"}, - {file = "pillow-11.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28"}, - {file = "pillow-11.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f"}, - {file = "pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155"}, - {file = "pillow-11.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14"}, - {file = "pillow-11.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b"}, - {file = "pillow-11.2.1-cp313-cp313-win32.whl", hash = "sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2"}, - {file = "pillow-11.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691"}, - {file = "pillow-11.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c"}, - {file = "pillow-11.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22"}, - {file = "pillow-11.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406"}, - {file = "pillow-11.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91"}, - {file = "pillow-11.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751"}, - {file = "pillow-11.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9"}, - {file = "pillow-11.2.1-cp313-cp313t-win32.whl", hash = "sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd"}, - {file = "pillow-11.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e"}, - {file = "pillow-11.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681"}, - {file = "pillow-11.2.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:7491cf8a79b8eb867d419648fff2f83cb0b3891c8b36da92cc7f1931d46108c8"}, - {file = "pillow-11.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b02d8f9cb83c52578a0b4beadba92e37d83a4ef11570a8688bbf43f4ca50909"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:014ca0050c85003620526b0ac1ac53f56fc93af128f7546623cc8e31875ab928"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3692b68c87096ac6308296d96354eddd25f98740c9d2ab54e1549d6c8aea9d79"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:f781dcb0bc9929adc77bad571b8621ecb1e4cdef86e940fe2e5b5ee24fd33b35"}, - {file = "pillow-11.2.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:2b490402c96f907a166615e9a5afacf2519e28295f157ec3a2bb9bd57de638cb"}, - {file = "pillow-11.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dd6b20b93b3ccc9c1b597999209e4bc5cf2853f9ee66e3fc9a400a78733ffc9a"}, - {file = "pillow-11.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4b835d89c08a6c2ee7781b8dd0a30209a8012b5f09c0a665b65b0eb3560b6f36"}, - {file = "pillow-11.2.1-cp39-cp39-win32.whl", hash = "sha256:b10428b3416d4f9c61f94b494681280be7686bda15898a3a9e08eb66a6d92d67"}, - {file = "pillow-11.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:6ebce70c3f486acf7591a3d73431fa504a4e18a9b97ff27f5f47b7368e4b9dd1"}, - {file = "pillow-11.2.1-cp39-cp39-win_arm64.whl", hash = "sha256:c27476257b2fdcd7872d54cfd119b3a9ce4610fb85c8e32b70b42e3680a29a1e"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9b7b0d4fd2635f54ad82785d56bc0d94f147096493a79985d0ab57aedd563156"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:aa442755e31c64037aa7c1cb186e0b369f8416c567381852c63444dd666fb772"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0d3348c95b766f54b76116d53d4cb171b52992a1027e7ca50c81b43b9d9e363"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85d27ea4c889342f7e35f6d56e7e1cb345632ad592e8c51b693d7b7556043ce0"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bf2c33d6791c598142f00c9c4c7d47f6476731c31081331664eb26d6ab583e01"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e616e7154c37669fc1dfc14584f11e284e05d1c650e1c0f972f281c4ccc53193"}, - {file = "pillow-11.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:39ad2e0f424394e3aebc40168845fee52df1394a4673a6ee512d840d14ab3013"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:80f1df8dbe9572b4b7abdfa17eb5d78dd620b1d55d9e25f834efdbee872d3aed"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ea926cfbc3957090becbcbbb65ad177161a2ff2ad578b5a6ec9bb1e1cd78753c"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:738db0e0941ca0376804d4de6a782c005245264edaa253ffce24e5a15cbdc7bd"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db98ab6565c69082ec9b0d4e40dd9f6181dab0dd236d26f7a50b8b9bfbd5076"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:036e53f4170e270ddb8797d4c590e6dd14d28e15c7da375c18978045f7e6c37b"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:14f73f7c291279bd65fda51ee87affd7c1e097709f7fdd0188957a16c264601f"}, - {file = "pillow-11.2.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:208653868d5c9ecc2b327f9b9ef34e0e42a4cdd172c2988fd81d62d2bc9bc044"}, - {file = "pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6"}, + {file = "pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860"}, + {file = "pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad"}, + {file = "pillow-11.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7107195ddc914f656c7fc8e4a5e1c25f32e9236ea3ea860f257b0436011fddd0"}, + {file = "pillow-11.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc3e831b563b3114baac7ec2ee86819eb03caa1a2cef0b481a5675b59c4fe23b"}, + {file = "pillow-11.3.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1f182ebd2303acf8c380a54f615ec883322593320a9b00438eb842c1f37ae50"}, + {file = "pillow-11.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4445fa62e15936a028672fd48c4c11a66d641d2c05726c7ec1f8ba6a572036ae"}, + {file = "pillow-11.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71f511f6b3b91dd543282477be45a033e4845a40278fa8dcdbfdb07109bf18f9"}, + {file = "pillow-11.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:040a5b691b0713e1f6cbe222e0f4f74cd233421e105850ae3b3c0ceda520f42e"}, + {file = "pillow-11.3.0-cp310-cp310-win32.whl", hash = "sha256:89bd777bc6624fe4115e9fac3352c79ed60f3bb18651420635f26e643e3dd1f6"}, + {file = "pillow-11.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:19d2ff547c75b8e3ff46f4d9ef969a06c30ab2d4263a9e287733aa8b2429ce8f"}, + {file = "pillow-11.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:819931d25e57b513242859ce1876c58c59dc31587847bf74cfe06b2e0cb22d2f"}, + {file = "pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722"}, + {file = "pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58"}, + {file = "pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f"}, + {file = "pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e"}, + {file = "pillow-11.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:932c754c2d51ad2b2271fd01c3d121daaa35e27efae2a616f77bf164bc0b3e94"}, + {file = "pillow-11.3.0-cp311-cp311-win32.whl", hash = "sha256:b4b8f3efc8d530a1544e5962bd6b403d5f7fe8b9e08227c6b255f98ad82b4ba0"}, + {file = "pillow-11.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1a992e86b0dd7aeb1f053cd506508c0999d710a8f07b4c791c63843fc6a807ac"}, + {file = "pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd"}, + {file = "pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4"}, + {file = "pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024"}, + {file = "pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809"}, + {file = "pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d"}, + {file = "pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149"}, + {file = "pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d"}, + {file = "pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f"}, + {file = "pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c"}, + {file = "pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8"}, + {file = "pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2"}, + {file = "pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b"}, + {file = "pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3"}, + {file = "pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51"}, + {file = "pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580"}, + {file = "pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e"}, + {file = "pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59"}, + {file = "pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe"}, + {file = "pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c"}, + {file = "pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788"}, + {file = "pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31"}, + {file = "pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e"}, + {file = "pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12"}, + {file = "pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77"}, + {file = "pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874"}, + {file = "pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a"}, + {file = "pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214"}, + {file = "pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635"}, + {file = "pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6"}, + {file = "pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae"}, + {file = "pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477"}, + {file = "pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50"}, + {file = "pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b"}, + {file = "pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12"}, + {file = "pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db"}, + {file = "pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa"}, + {file = "pillow-11.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:48d254f8a4c776de343051023eb61ffe818299eeac478da55227d96e241de53f"}, + {file = "pillow-11.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7aee118e30a4cf54fdd873bd3a29de51e29105ab11f9aad8c32123f58c8f8081"}, + {file = "pillow-11.3.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:23cff760a9049c502721bdb743a7cb3e03365fafcdfc2ef9784610714166e5a4"}, + {file = "pillow-11.3.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6359a3bc43f57d5b375d1ad54a0074318a0844d11b76abccf478c37c986d3cfc"}, + {file = "pillow-11.3.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:092c80c76635f5ecb10f3f83d76716165c96f5229addbd1ec2bdbbda7d496e06"}, + {file = "pillow-11.3.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cadc9e0ea0a2431124cde7e1697106471fc4c1da01530e679b2391c37d3fbb3a"}, + {file = "pillow-11.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6a418691000f2a418c9135a7cf0d797c1bb7d9a485e61fe8e7722845b95ef978"}, + {file = "pillow-11.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:97afb3a00b65cc0804d1c7abddbf090a81eaac02768af58cbdcaaa0a931e0b6d"}, + {file = "pillow-11.3.0-cp39-cp39-win32.whl", hash = "sha256:ea944117a7974ae78059fcc1800e5d3295172bb97035c0c1d9345fca1419da71"}, + {file = "pillow-11.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:e5c5858ad8ec655450a7c7df532e9842cf8df7cc349df7225c60d5d348c8aada"}, + {file = "pillow-11.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:6abdbfd3aea42be05702a8dd98832329c167ee84400a1d1f61ab11437f1717eb"}, + {file = "pillow-11.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3cee80663f29e3843b68199b9d6f4f54bd1d4a6b59bdd91bceefc51238bcb967"}, + {file = "pillow-11.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b5f56c3f344f2ccaf0dd875d3e180f631dc60a51b314295a3e681fe8cf851fbe"}, + {file = "pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e67d793d180c9df62f1f40aee3accca4829d3794c95098887edc18af4b8b780c"}, + {file = "pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d000f46e2917c705e9fb93a3606ee4a819d1e3aa7a9b442f6444f07e77cf5e25"}, + {file = "pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:527b37216b6ac3a12d7838dc3bd75208ec57c1c6d11ef01902266a5a0c14fc27"}, + {file = "pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be5463ac478b623b9dd3937afd7fb7ab3d79dd290a28e2b6df292dc75063eb8a"}, + {file = "pillow-11.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8dc70ca24c110503e16918a658b869019126ecfe03109b754c402daff12b3d9f"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7"}, + {file = "pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8"}, + {file = "pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523"}, ] [package.extras] -docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +docs = ["furo", "olefile", "sphinx (>=8.2)", "sphinx-autobuild", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] fpx = ["olefile"] mic = ["olefile"] test-arrow = ["pyarrow"] -tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] +tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "trove-classifiers (>=2024.10.12)"] typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] @@ -2341,14 +2366,14 @@ files = [ [[package]] name = "pytest" -version = "8.4.0" +version = "8.4.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "pytest-8.4.0-py3-none-any.whl", hash = "sha256:f40f825768ad76c0977cbacdf1fd37c6f7a468e460ea6a0636078f8972d4517e"}, - {file = "pytest-8.4.0.tar.gz", hash = "sha256:14d920b48472ea0dbf68e45b96cd1ffda4705f33307dcc86c676c1b5104838a6"}, + {file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"}, + {file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"}, ] [package.dependencies] @@ -2472,14 +2497,14 @@ pytest = ">=7.4,<8.2.2 || >8.2.2" [[package]] name = "pytest-xdist" -version = "3.7.0" +version = "3.8.0" description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pytest_xdist-3.7.0-py3-none-any.whl", hash = "sha256:7d3fbd255998265052435eb9daa4e99b62e6fb9cfb6efd1f858d4d8c0c7f0ca0"}, - {file = "pytest_xdist-3.7.0.tar.gz", hash = "sha256:f9248c99a7c15b7d2f90715df93610353a485827bc06eefb6566d23f6400f126"}, + {file = "pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88"}, + {file = "pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1"}, ] [package.dependencies] @@ -2898,7 +2923,7 @@ description = "Manipulate well-formed Roman numerals" optional = false python-versions = ">=3.9" groups = ["docs"] -markers = "python_version >= \"3.11\"" +markers = "python_version != \"3.10\"" files = [ {file = "roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c"}, {file = "roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d"}, @@ -3096,34 +3121,34 @@ jeepney = ">=0.6" [[package]] name = "selenium" -version = "4.33.0" +version = "4.34.0" description = "Official Python bindings for Selenium WebDriver" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "selenium-4.33.0-py3-none-any.whl", hash = "sha256:af9ea757813918bddfe05cc677bf63c8a0cd277ebf8474b3dd79caa5727fca85"}, - {file = "selenium-4.33.0.tar.gz", hash = "sha256:d90974db95d2cdeb34d2fb1b13f03dc904f53e6c5d228745b0635ada10cd625d"}, + {file = "selenium-4.34.0-py3-none-any.whl", hash = "sha256:fc3535cfd99a073c21bf9091519b48ed31b34bf2cbd132f62e8c732b2e815b2d"}, + {file = "selenium-4.34.0.tar.gz", hash = "sha256:8b7eb05a0ed22f9bb2187fd256c28630824ad01d8397b4e68bc0af7dabf26c80"}, ] [package.dependencies] certifi = ">=2025.4.26" trio = ">=0.30.0,<0.31.0" trio-websocket = ">=0.12.2,<0.13.0" -typing_extensions = ">=4.13.2,<4.14.0" +typing_extensions = ">=4.14.0,<4.15.0" urllib3 = {version = ">=2.4.0,<2.5.0", extras = ["socks"]} websocket-client = ">=1.8.0,<1.9.0" [[package]] name = "seleniumbase" -version = "4.39.5" +version = "4.40.0" description = "A complete web automation framework for end-to-end testing." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "seleniumbase-4.39.5-py3-none-any.whl", hash = "sha256:bda571f4864bba126442571bb0a3ae8a9bee9253461253ac84affd9a48efdb4d"}, - {file = "seleniumbase-4.39.5.tar.gz", hash = "sha256:a6d4930eb894c84d881f0fa596fb357b0fa2bb5a9e89ac3875d9e89eb27054c7"}, + {file = "seleniumbase-4.40.0-py3-none-any.whl", hash = "sha256:dcdd2ddf08e8901928a4a0bea0e9e1a1e512b9bd796e55ede24744300a1ac44e"}, + {file = "seleniumbase-4.40.0.tar.gz", hash = "sha256:fdfacf2204327f70444f3cc59b5f4bbc8d46675b91ba50c53d1f425e3accf3d6"}, ] [package.dependencies] @@ -3156,22 +3181,22 @@ pdbp = ">=1.7.0" pip = {version = ">=25.1.1", markers = "python_version >= \"3.9\""} platformdirs = {version = ">=4.3.8", markers = "python_version >= \"3.9\""} pluggy = {version = "1.6.0", markers = "python_version >= \"3.9\""} -pygments = ">=2.19.1" +pygments = ">=2.19.2" pynose = ">=1.5.4" pyotp = "2.9.0" pyreadline3 = {version = ">=3.5.3", markers = "platform_system == \"Windows\""} -pytest = {version = "8.4.0", markers = "python_version >= \"3.9\""} +pytest = {version = "8.4.1", markers = "python_version >= \"3.9\""} pytest-html = "4.0.2" pytest-metadata = "3.1.1" pytest-ordering = "0.6" pytest-rerunfailures = {version = "15.1", markers = "python_version >= \"3.9\""} -pytest-xdist = {version = "3.7.0", markers = "python_version >= \"3.9\""} +pytest-xdist = {version = "3.8.0", markers = "python_version >= \"3.9\""} python-xlib = {version = "0.33", markers = "platform_system == \"Linux\""} pyyaml = ">=6.0.2" requests = "2.32.4" rich = ">=14.0.0,<15" sbvirtualdisplay = ">=1.4.0" -selenium = {version = "4.33.0", markers = "python_version >= \"3.10\""} +selenium = {version = "4.34.0", markers = "python_version >= \"3.10\""} setuptools = {version = ">=80.9.0", markers = "python_version >= \"3.10\""} six = ">=1.17.0" sniffio = "1.3.1" @@ -3181,7 +3206,7 @@ tabcompleter = ">=1.4.0" trio = {version = "0.30.0", markers = "python_version >= \"3.9\""} trio-websocket = "0.12.2" typing-extensions = ">=4.13.2" -urllib3 = {version = ">=1.26.20,<2.5.0", markers = "python_version >= \"3.10\""} +urllib3 = {version = ">=1.26.20,<2.6.0", markers = "python_version >= \"3.10\""} websocket-client = "1.8.0" websockets = {version = ">=15.0.1", markers = "python_version >= \"3.9\""} wheel = ">=0.45.1" @@ -3189,12 +3214,12 @@ wsproto = "1.2.0" [package.extras] allure = ["allure-behave (>=2.13.5)", "allure-pytest (>=2.13.5)", "allure-python-commons (>=2.13.5)"] -coverage = ["coverage (>=7.6.1) ; python_version < \"3.9\"", "coverage (>=7.9.1) ; python_version >= \"3.9\"", "pytest-cov (>=5.0.0) ; python_version < \"3.9\"", "pytest-cov (>=6.2.1) ; python_version >= \"3.9\""] -flake8 = ["flake8 (==5.0.4) ; python_version < \"3.9\"", "flake8 (==7.2.0) ; python_version >= \"3.9\"", "mccabe (==0.7.0)", "pycodestyle (==2.13.0) ; python_version >= \"3.9\"", "pycodestyle (==2.9.1) ; python_version < \"3.9\"", "pyflakes (==2.5.0) ; python_version < \"3.9\"", "pyflakes (==3.3.2) ; python_version >= \"3.9\""] +coverage = ["coverage (>=7.6.1) ; python_version < \"3.9\"", "coverage (>=7.9.2) ; python_version >= \"3.9\"", "pytest-cov (>=5.0.0) ; python_version < \"3.9\"", "pytest-cov (>=6.2.1) ; python_version >= \"3.9\""] +flake8 = ["flake8 (==5.0.4) ; python_version < \"3.9\"", "flake8 (==7.3.0) ; python_version >= \"3.9\"", "mccabe (==0.7.0)", "pycodestyle (==2.14.0) ; python_version >= \"3.9\"", "pycodestyle (==2.9.1) ; python_version < \"3.9\"", "pyflakes (==2.5.0) ; python_version < \"3.9\"", "pyflakes (==3.4.0) ; python_version >= \"3.9\""] ipdb = ["ipdb (==0.13.13)", "ipython (==7.34.0)"] mss = ["mss (==10.0.0) ; python_version >= \"3.9\"", "mss (==9.0.2) ; python_version < \"3.9\""] -pdfminer = ["cffi (==1.17.1)", "cryptography (==39.0.2) ; python_version < \"3.9\"", "cryptography (==45.0.4) ; python_version >= \"3.9\"", "pdfminer.six (==20250324) ; python_version < \"3.9\"", "pdfminer.six (==20250506) ; python_version >= \"3.9\"", "pycparser (==2.22)"] -pillow = ["Pillow (>=10.4.0) ; python_version < \"3.9\"", "Pillow (>=11.2.1) ; python_version >= \"3.9\""] +pdfminer = ["cffi (==1.17.1)", "cryptography (==39.0.2) ; python_version < \"3.9\"", "cryptography (==45.0.5) ; python_version >= \"3.9\"", "pdfminer.six (==20250324) ; python_version < \"3.9\"", "pdfminer.six (==20250506) ; python_version >= \"3.9\"", "pycparser (==2.22)"] +pillow = ["Pillow (>=10.4.0) ; python_version < \"3.9\"", "Pillow (>=11.3.0) ; python_version >= \"3.9\""] pip-system-certs = ["pip-system-certs (==4.0) ; platform_system == \"Windows\""] proxy = ["proxy.py (==2.4.3)"] psutil = ["psutil (==7.0.0)"] @@ -3327,7 +3352,7 @@ description = "Python documentation generator" optional = false python-versions = ">=3.11" groups = ["docs"] -markers = "python_version >= \"3.11\"" +markers = "python_version != \"3.10\"" files = [ {file = "sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3"}, {file = "sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348"}, @@ -3731,14 +3756,14 @@ wsproto = ">=0.14" [[package]] name = "typing-extensions" -version = "4.13.2" -description = "Backported and Experimental Type Hints for Python 3.8+" +version = "4.14.1" +description = "Backported and Experimental Type Hints for Python 3.9+" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main", "dev", "docs"] files = [ - {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, - {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, + {file = "typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"}, + {file = "typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36"}, ] markers = {dev = "python_version == \"3.10\""} @@ -4160,14 +4185,14 @@ h11 = ">=0.9.0,<1" [[package]] name = "yt-dlp" -version = "2025.6.25" +version = "2025.6.30" description = "A feature-rich command-line audio/video downloader" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "yt_dlp-2025.6.25-py3-none-any.whl", hash = "sha256:1eb31c9a47d56c7433be23a6ae084c640bd4e14961ad43076927ef05280871ea"}, - {file = "yt_dlp-2025.6.25.tar.gz", hash = "sha256:242b648e1a18ab04bdd4cc175a317fe8ec3ad7d0175eee9f981912624b3d6c8b"}, + {file = "yt_dlp-2025.6.30-py3-none-any.whl", hash = "sha256:541becc29ed7b7b3a08751c0a66da4b7f8ee95cb81066221c78e83598bc3d1f3"}, + {file = "yt_dlp-2025.6.30.tar.gz", hash = "sha256:6d0ae855c0a55bfcc28dffba804ec8525b9b955d34a41191a1561a4cec03d8bd"}, ] [package.dependencies] From a8c1ef3912ff3728891b52d8565cf1e352b3a359 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 16:54:58 +0100 Subject: [PATCH 06/11] generic_extractor config to use proxy only when needed to avoid overzealousness --- .../modules/generic_extractor/__manifest__.py | 4 ++++ .../generic_extractor/generic_extractor.py | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 62bd4c8..06fc3ac 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -60,6 +60,10 @@ If you are having issues with the extractor, you can review the version of `yt-d "default": "", "help": "http/https/socks proxy to use for the webdriver, eg https://proxy-user:password@proxy-ip:port", }, + "proxy_on_failure_only": { + "default": True, + "help": "Applies only if a proxy is set. In that case if this setting is True, the extractor will only use the proxy if the initial request fails; if it is False, the extractor will always use the proxy.", + }, "end_means_success": { "default": True, "help": "if True, any archived content will mean a 'success', if False this extractor will not return a 'success' stage; this is useful for cases when the yt-dlp will archive a video but ignore other types of content like images or text only pages that the subsequent extractors can retrieve.", diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index e0d3f04..e536391 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -502,6 +502,9 @@ class GenericExtractor(Extractor): try: result = self.get_metadata_for_post(info_extractor, url, ydl) except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e: + if "NSFW tweet requires authentication." in str(post_e): + logger.warning(str(post_e)) + return False logger.error("Error downloading metadata for post: {error}", error=str(post_e)) return False except Exception as generic_e: @@ -525,13 +528,24 @@ class GenericExtractor(Extractor): return result - def download(self, item: Metadata) -> Metadata: + def download(self, item: Metadata, skip_proxy: bool = False) -> Metadata: url = item.get_url() # TODO: this is a temporary hack until this issue is closed: https://github.com/yt-dlp/yt-dlp/issues/11025 if url.startswith("https://ya.ru"): url = url.replace("https://ya.ru", "https://yandex.ru") item.set("replaced_url", url) + logger.debug(f"{skip_proxy=}, {self.proxy_on_failure_only=}, {self.proxy=}") + + # proxy_on_failure_only logic + if self.proxy and self.proxy_on_failure_only and not skip_proxy: + # when proxy_on_failure_only is True, we first try to download without a proxy and only continue with execution if that fails + try: + if without_proxy := self.download(item, skip_proxy=True): + logger.info("Downloaded successfully without proxy.") + return without_proxy + except Exception: + logger.debug("Download without proxy failed, trying with proxy...") ydl_options = [ "-o", @@ -546,7 +560,7 @@ class GenericExtractor(Extractor): ] # proxy handling - if self.proxy: + if self.proxy and not skip_proxy: ydl_options.extend(["--proxy", self.proxy]) # max_downloads handling From 7234eda85f352b50114b3c21073bca2b8ff2af03 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 18:29:33 +0100 Subject: [PATCH 07/11] expands Sheets API retries for really large spreadsheets --- .../modules/gsheet_feeder_db/gsheet_feeder_db.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py index cb2051c..83738ac 100644 --- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py +++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py @@ -32,6 +32,10 @@ class GsheetsFeederDB(Feeder, Database): if not self.sheet and not self.sheet_id: raise ValueError("You need to define either a 'sheet' name or a 'sheet_id' in your manifest.") + @retry( + wait_exponential_multiplier=1, + stop_max_attempt_number=6, + ) def open_sheet(self) -> gspread.Spreadsheet: if self.sheet: return self.gsheets_client.open(self.sheet) @@ -40,7 +44,7 @@ class GsheetsFeederDB(Feeder, Database): @retry( wait_exponential_multiplier=1, - stop_max_attempt_number=5, + stop_max_attempt_number=6, ) def enumerate_sheets(self, sheet) -> Iterator[gspread.Worksheet]: for worksheet in sheet.worksheets(): From 37c6d9727508b2acd35eec2e6bb0a4ea6a5d38bf Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 18:30:31 +0100 Subject: [PATCH 08/11] new auth wall check logic and escaped CSS selector in selenium --- .../antibot_extractor_enricher.py | 10 ++++++++-- .../antibot_extractor_enricher/dropin.py | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py index 5e61bad..d1a4ee5 100644 --- a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py +++ b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py @@ -99,7 +99,7 @@ class AntibotExtractorEnricher(Extractor, Enricher): dropin = self._get_suitable_dropin(url, sb) dropin.open_page(url) - if self.detect_auth_wall and self._hit_auth_wall(sb): + if self.detect_auth_wall and (dropin.hit_auth_wall() and self._hit_auth_wall(sb)): logger.warning("Skipping since auth wall or CAPTCHA was detected") return False @@ -277,8 +277,14 @@ class AntibotExtractorEnricher(Extractor, Enricher): return url = to_enrich.get_url() all_urls = set() + logger.debug(f"Extracting media for {js_css_selector=}") + + try: + sources = sb.execute_script(js_css_selector) + except selenium.common.exceptions.JavascriptException as e: + logger.error(f"Error executing JavaScript selector {js_css_selector}: {e}") + return - sources = sb.execute_script(js_css_selector) # js_for_css_selectors for src in sources: if len(all_urls) >= max_media: diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py b/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py index 47c958a..b2539b1 100644 --- a/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py +++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py @@ -1,3 +1,4 @@ +import json import os import traceback from typing import Mapping @@ -74,8 +75,11 @@ class Dropin: You can overwrite this instead of `images_selector` for more control over scraped images. """ + if not self.images_selectors(): + return "return [];" + safe_selector = json.dumps(self.images_selectors()) return f""" - return Array.from(document.querySelectorAll("{self.images_selectors()}")).map(el => el.src || el.href).filter(Boolean); + return Array.from(document.querySelectorAll({safe_selector})).map(el => el.src || el.href).filter(Boolean); """ def js_for_video_css_selectors(self) -> str: @@ -84,8 +88,11 @@ class Dropin: You can overwrite this instead of `video_selector` for more control over scraped videos. """ + if not self.video_selectors(): + return "return [];" + safe_selector = json.dumps(self.video_selectors()) return f""" - return Array.from(document.querySelectorAll("{self.video_selectors()}")).map(el => el.src || el.href).filter(Boolean); + return Array.from(document.querySelectorAll({safe_selector})).map(el => el.src || el.href).filter(Boolean); """ def open_page(self, url) -> bool: @@ -103,6 +110,12 @@ class Dropin: """ return 0, 0 + def hit_auth_wall(self) -> bool: + """ + Custom check to see if the current page is behind an authentication wall, if True is returned the default global auth wall detector is used instead. If false, no auth wall is detected and the page is considered open. + """ + return True + def _get_username_password(self, site) -> tuple[str, str]: """ Get the username and password for the site from the extractor's auth data. From 3a34a49822e924ba0384ba5ee833d0bbfde19ef9 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 18:31:12 +0100 Subject: [PATCH 09/11] adds antibot tiktok logic for photos closes #295 --- .../dropins/tiktok.py | 50 +++++++++++++++++++ .../test_antibot_extractor_enricher.py | 7 +++ 2 files changed, 57 insertions(+) create mode 100644 src/auto_archiver/modules/antibot_extractor_enricher/dropins/tiktok.py diff --git a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/tiktok.py b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/tiktok.py new file mode 100644 index 0000000..82b4f21 --- /dev/null +++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/tiktok.py @@ -0,0 +1,50 @@ +from contextlib import suppress +from typing import Mapping +from auto_archiver.modules.antibot_extractor_enricher.dropin import Dropin + + +class TikTokDropin(Dropin): + """ + A class to handle TikTok drop-in functionality for the antibot extractor enricher module. + """ + + def documentation() -> Mapping[str, str]: + return { + "name": "TikTok Dropin", + "description": "Handles TikTok posts and works without authentication.", + "site": "tiktok.com", + } + + @staticmethod + def suitable(url: str) -> bool: + return "tiktok.com" in url + + @staticmethod + def images_selectors() -> str: + return '[data-e2e="detail-photo"] img' + + @staticmethod + def video_selectors() -> str: + return None # TikTok videos should be handled by the generic extractor + + def open_page(self, url) -> bool: + self.sb.wait_for_ready_state_complete() + self._close_cookies_banner() + # TODO: implement login logic + if url != self.sb.get_current_url(): + return False + return True + + def hit_auth_wall(self) -> bool: + return False # TikTok does not require authentication for public posts + + def _close_cookies_banner(self): + with suppress(Exception): # selenium.common.exceptions.JavascriptException + self.sb.execute_script(""" + document + .querySelector("tiktok-cookie-banner") + .shadowRoot.querySelector("faceplate-dialog") + .querySelector("button") + .click() + """) + self.sb.click_if_visible("Skip") diff --git a/tests/extractors/test_antibot_extractor_enricher.py b/tests/extractors/test_antibot_extractor_enricher.py index a8a51dd..3ec34f8 100644 --- a/tests/extractors/test_antibot_extractor_enricher.py +++ b/tests/extractors/test_antibot_extractor_enricher.py @@ -88,6 +88,13 @@ class TestAntibotExtractorEnricher(TestExtractorBase): 5, 0, ), + ( + "https://www.tiktok.com/@tracy_2424/photo/7418200173953830162", + "TikTok", + "Dito ko lang", + 1, + 0, + ), ], ) def test_download_pages_with_media(self, setup_module, make_item, url, in_title, in_text, image_count, video_count): From c1506ee1cff19f365c5e037cdd213a694ebce6b6 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 18:31:39 +0100 Subject: [PATCH 10/11] some wayback errors are expected and should be warnings --- .../wayback_extractor_enricher.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py b/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py index 581ca88..242f2f5 100644 --- a/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py +++ b/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py @@ -2,7 +2,7 @@ import json from auto_archiver.utils.custom_logger import logger import time import requests - +from urllib3.exceptions import MaxRetryError from auto_archiver.core import Extractor, Enricher from auto_archiver.utils import url as UrlUtil from auto_archiver.core import Metadata @@ -45,7 +45,14 @@ class WaybackExtractorEnricher(Enricher, Extractor): if self.if_not_archived_within: post_data["if_not_archived_within"] = self.if_not_archived_within # see https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA for more options - r = requests.post("https://web.archive.org/save/", headers=ia_headers, data=post_data, proxies=proxies) + try: + r = requests.post("https://web.archive.org/save/", headers=ia_headers, data=post_data, proxies=proxies) + except MaxRetryError as e: + logger.warning( + f"MaxRetryError during Wayback POST call to /save, this may be do to a high number of calls leading to rate limiting: {e}" + ) + to_enrich.set("wayback", "failed: possible rate limit") + return False if r.status_code != 200: logger.error(em := f"Internet archive failed with status of {r.status_code}: {r.json()}") @@ -76,6 +83,9 @@ class WaybackExtractorEnricher(Enricher, Extractor): if r_status.status_code == 200 and r_json["status"] == "success": wayback_url = f"https://web.archive.org/web/{r_json['timestamp']}/{r_json['original_url']}" elif r_status.status_code != 200 or r_json["status"] != "pending": + if r_json.get("status_ext") in ["error:blocked-url", "error:unauthorized"]: + logger.warning("Wayback cannot currently archive the URL, skipping.") + to_enrich.set("wayback", r_json.get("status_ext")) logger.error(f"Wayback failed with {r_json}") return False except requests.exceptions.RequestException as e: From d36cdbfa87a7ec41dac0a0ff5607449549d3634a Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Sat, 5 Jul 2025 19:07:23 +0100 Subject: [PATCH 11/11] fixing pypaperclip see issue #339 --- poetry.lock | 10 +++++----- pyproject.toml | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 40d4c0c..8257dd2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2286,13 +2286,13 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pyperclip" -version = "1.9.0" +version = "1.8.2" description = "A cross-platform clipboard module for Python. (Only handles plain text for now.)" optional = false python-versions = "*" groups = ["main"] files = [ - {file = "pyperclip-1.9.0.tar.gz", hash = "sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310"}, + {file = "pyperclip-1.8.2.tar.gz", hash = "sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57"}, ] [[package]] @@ -2923,7 +2923,7 @@ description = "Manipulate well-formed Roman numerals" optional = false python-versions = ">=3.9" groups = ["docs"] -markers = "python_version != \"3.10\"" +markers = "python_version >= \"3.11\"" files = [ {file = "roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c"}, {file = "roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d"}, @@ -3352,7 +3352,7 @@ description = "Python documentation generator" optional = false python-versions = ">=3.11" groups = ["docs"] -markers = "python_version != \"3.10\"" +markers = "python_version >= \"3.11\"" files = [ {file = "sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3"}, {file = "sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348"}, @@ -4219,4 +4219,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "8f0806dff086087dcf5bbec03902bdd05794dab3d16e7e4b379015db26211c92" +content-hash = "ed36205b31c70c885fce2f0260b430e88e804b99d155a99e7fc697f8a90436f2" diff --git a/pyproject.toml b/pyproject.toml index a529334..42dc7d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [project] name = "auto-archiver" -version = "1.1.1" +version = "1.1.2" description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)." requires-python = ">=3.10,<3.13" @@ -58,6 +58,7 @@ dependencies = [ "secretstorage (>=3.3.3,<4.0.0)", "seleniumbase (>=4.36.4,<5.0.0)", "pyautogui (>=0.9.54,<0.10.0)", + "pyperclip (==1.8.2)", ] [tool.poetry.group.dev.dependencies]