diff --git a/poetry.lock b/poetry.lock index 088fc70..8fb48ec 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3077,14 +3077,14 @@ h11 = ">=0.9.0,<1" [[package]] name = "yt-dlp" -version = "2025.1.12" +version = "2025.1.26" description = "A feature-rich command-line audio/video downloader" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "yt_dlp-2025.1.12-py3-none-any.whl", hash = "sha256:f7ea19afb64f8e457a1b9598ddb67f8deaa313bf1d57abd5612db9272ab10795"}, - {file = "yt_dlp-2025.1.12.tar.gz", hash = "sha256:8e7e246e2a5a2cff0a9c13db46844a37a547680702012058c94ec18fce0ca25a"}, + {file = "yt_dlp-2025.1.26-py3-none-any.whl", hash = "sha256:3e76bd896b9f96601021ca192ca0fbdd195e3c3dcc28302a3a34c9bc4979da7b"}, + {file = "yt_dlp-2025.1.26.tar.gz", hash = "sha256:1c9738266921ad43c568ad01ac3362fb7c7af549276fbec92bd72f140da16240"}, ] [package.extras] @@ -3100,4 +3100,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "5a54c84ba388db7b77d1c28973b710fc99aa3822a2860b30acaf5b02ba1927bd" +content-hash = "9ca114395e73af8982abbccc25b385bbca62e50ba7cca8239e52e5c1227cb4b0" diff --git a/pyproject.toml b/pyproject.toml index 3cd47e7..f1be273 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "cryptography (>=41.0.0,<42.0.0)", "boto3 (>=1.28.0,<2.0.0)", "dataclasses-json (>=0.0.0)", - "yt-dlp (==2025.1.12)", + "yt-dlp (>=2025.1.26,<2026.0.0)", "numpy (==2.1.3)", "vk-url-scraper (>=0.0.0)", "requests[socks] (>=0.0.0)", diff --git a/src/auto_archiver/modules/generic_extractor/bluesky.py b/src/auto_archiver/modules/generic_extractor/bluesky.py index 1f92fd8..f2086b0 100644 --- a/src/auto_archiver/modules/generic_extractor/bluesky.py +++ b/src/auto_archiver/modules/generic_extractor/bluesky.py @@ -23,19 +23,8 @@ class Bluesky(GenericDropin): def extract_post(self, url: str, ie_instance: InfoExtractor) -> dict: # TODO: If/when this PR (https://github.com/yt-dlp/yt-dlp/pull/12098) is merged on ytdlp, remove the comments and delete the code below - # handle, video_id = ie_instance._match_valid_url(url).group('handle', 'id') - # return ie_instance._extract_post(handle=handle, post_id=video_id) - handle, video_id = ie_instance._match_valid_url(url).group('handle', 'id') - return ie_instance._download_json( - 'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread', - video_id, query={ - 'uri': f'at://{handle}/app.bsky.feed.post/{video_id}', - 'depth': 0, - 'parentHeight': 0, - })['thread']['post'] - - + return ie_instance._extract_post(handle=handle, post_id=video_id) def _download_bsky_embeds(self, post: dict, archiver: Extractor) -> list[Media]: """ diff --git a/src/auto_archiver/modules/generic_extractor/facebook.py b/src/auto_archiver/modules/generic_extractor/facebook.py new file mode 100644 index 0000000..352d44e --- /dev/null +++ b/src/auto_archiver/modules/generic_extractor/facebook.py @@ -0,0 +1,17 @@ +from .dropin import GenericDropin + + +class Facebook(GenericDropin): + def extract_post(self, url: str, ie_instance): + video_id = ie_instance._match_valid_url(url).group('id') + ie_instance._download_webpage( + url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) + webpage = ie_instance._download_webpage(url, ie_instance._match_valid_url(url).group('id')) + + post_data = ie_instance._extract_from_url.extract_metadata(webpage) + return post_data + + def create_metadata(self, post: dict, ie_instance, archiver, url): + metadata = archiver.create_metadata(url) + metadata.set_title(post.get('title')).set_content(post.get('description')).set_post_data(post) + return metadata \ No newline at end of file