fixes incomplete yt-dlp parts download

This commit is contained in:
msramalho
2026-04-27 12:34:47 +01:00
parent 20fddce3a3
commit bc06de8e5c
4 changed files with 68 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ Key Functionalities:
from __future__ import annotations from __future__ import annotations
import hashlib import hashlib
import os
from typing import Any, List, Union, Dict from typing import Any, List, Union, Dict
from dataclasses import dataclass, field from dataclasses import dataclass, field
from dataclasses_json import dataclass_json from dataclasses_json import dataclass_json
@@ -186,6 +187,9 @@ class Metadata:
continue continue
h = m.get("hash") h = m.get("hash")
if not h: if not h:
if not os.path.exists(m.filename):
logger.warning(f"Skipping missing media file: {m.filename}")
continue
h = calculate_hash_in_chunks(hashlib.sha256(), int(1.6e7), m.filename) h = calculate_hash_in_chunks(hashlib.sha256(), int(1.6e7), m.filename)
if len(h) and h in media_hashes: if len(h) and h in media_hashes:
continue continue

View File

@@ -120,6 +120,9 @@ def ydl_entry_to_filename(ydl, entry: dict) -> str:
directory = os.path.dirname(base_filename) # '/get/path/to' directory = os.path.dirname(base_filename) # '/get/path/to'
basename = os.path.basename(base_filename) # 'file' basename = os.path.basename(base_filename) # 'file'
for f in os.listdir(directory): for f in os.listdir(directory):
# skip incomplete downloads left behind by yt-dlp
if f.endswith(".part"):
continue
if ( if (
f.startswith(basename) f.startswith(basename)
or (entry_url and os.path.splitext(f)[0] in entry_url) or (entry_url and os.path.splitext(f)[0] in entry_url)

View File

@@ -86,6 +86,22 @@ def test_media_management(basic_metadata, media_file):
assert basic_metadata.get_media_by_id("m1") == media1 assert basic_metadata.get_media_by_id("m1") == media1
def test_remove_duplicate_skips_missing_files(basic_metadata, media_file, tmp_path):
"""Missing files should be dropped instead of crashing with FileNotFoundError."""
real_file = tmp_path / "exists.txt"
real_file.write_text("content")
valid = media_file(filename=str(real_file), hash_value="abc")
missing = media_file(filename="/nonexistent/path/gone.mp4")
basic_metadata.add_media(valid, "valid")
basic_metadata.add_media(missing, "missing")
assert len(basic_metadata.media) == 2
basic_metadata.remove_duplicate_media_by_hash()
assert len(basic_metadata.media) == 1
assert basic_metadata.get_media_by_id("valid") == valid
def test_success(): def test_success():
m = Metadata() m = Metadata()
assert not m.is_success() assert not m.is_success()

View File

@@ -14,6 +14,7 @@ from auto_archiver.utils.misc import (
calculate_file_hash, calculate_file_hash,
random_str, random_str,
get_timestamp, get_timestamp,
ydl_entry_to_filename,
) )
@@ -139,3 +140,47 @@ class TestMiscUtils:
def test_invalid_timestamp_returns_none(self): def test_invalid_timestamp_returns_none(self):
assert get_timestamp("invalid-date") is None assert get_timestamp("invalid-date") is None
class TestYdlEntryToFilename:
"""Tests for ydl_entry_to_filename, especially .part file filtering."""
def _make_mock_ydl(self, prepared_filename):
class MockYDL:
def prepare_filename(self, entry):
return prepared_filename
return MockYDL()
def test_returns_exact_file_if_exists(self, tmp_path):
video = tmp_path / "video.mp4"
video.write_bytes(b"data")
ydl = self._make_mock_ydl(str(video))
assert ydl_entry_to_filename(ydl, {}) == str(video)
def test_skips_part_file_returns_complete(self, tmp_path):
"""Simulates yt-dlp leaving a .part file from a failed format
while a complete .webm exists."""
(tmp_path / "f5U3IKfoSYs.f399.mp4.part").write_bytes(b"incomplete")
webm = tmp_path / "f5U3IKfoSYs.webm"
webm.write_bytes(b"complete video")
# ydl.prepare_filename returns the expected .mp4 which doesn't exist
ydl = self._make_mock_ydl(str(tmp_path / "f5U3IKfoSYs.mp4"))
result = ydl_entry_to_filename(ydl, {})
assert result == str(webm)
assert not result.endswith(".part")
def test_skips_part_file_returns_false_if_no_other_match(self, tmp_path):
"""Only a .part file exists — should return False."""
(tmp_path / "video.f399.mp4.part").write_bytes(b"incomplete")
ydl = self._make_mock_ydl(str(tmp_path / "video.mp4"))
assert ydl_entry_to_filename(ydl, {}) is False
def test_returns_false_when_no_files_match(self, tmp_path):
(tmp_path / "unrelated.txt").write_bytes(b"data")
ydl = self._make_mock_ydl(str(tmp_path / "video.mp4"))
assert ydl_entry_to_filename(ydl, {}) is False