fixes incomplete yt-dlp parts download

This commit is contained in:
msramalho
2026-04-27 12:34:47 +01:00
parent 20fddce3a3
commit bc06de8e5c
4 changed files with 68 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ Key Functionalities:
from __future__ import annotations
import hashlib
import os
from typing import Any, List, Union, Dict
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json
@@ -186,6 +187,9 @@ class Metadata:
continue
h = m.get("hash")
if not h:
if not os.path.exists(m.filename):
logger.warning(f"Skipping missing media file: {m.filename}")
continue
h = calculate_hash_in_chunks(hashlib.sha256(), int(1.6e7), m.filename)
if len(h) and h in media_hashes:
continue

View File

@@ -120,6 +120,9 @@ def ydl_entry_to_filename(ydl, entry: dict) -> str:
directory = os.path.dirname(base_filename) # '/get/path/to'
basename = os.path.basename(base_filename) # 'file'
for f in os.listdir(directory):
# skip incomplete downloads left behind by yt-dlp
if f.endswith(".part"):
continue
if (
f.startswith(basename)
or (entry_url and os.path.splitext(f)[0] in entry_url)

View File

@@ -86,6 +86,22 @@ def test_media_management(basic_metadata, media_file):
assert basic_metadata.get_media_by_id("m1") == media1
def test_remove_duplicate_skips_missing_files(basic_metadata, media_file, tmp_path):
"""Missing files should be dropped instead of crashing with FileNotFoundError."""
real_file = tmp_path / "exists.txt"
real_file.write_text("content")
valid = media_file(filename=str(real_file), hash_value="abc")
missing = media_file(filename="/nonexistent/path/gone.mp4")
basic_metadata.add_media(valid, "valid")
basic_metadata.add_media(missing, "missing")
assert len(basic_metadata.media) == 2
basic_metadata.remove_duplicate_media_by_hash()
assert len(basic_metadata.media) == 1
assert basic_metadata.get_media_by_id("valid") == valid
def test_success():
m = Metadata()
assert not m.is_success()

View File

@@ -14,6 +14,7 @@ from auto_archiver.utils.misc import (
calculate_file_hash,
random_str,
get_timestamp,
ydl_entry_to_filename,
)
@@ -139,3 +140,47 @@ class TestMiscUtils:
def test_invalid_timestamp_returns_none(self):
assert get_timestamp("invalid-date") is None
class TestYdlEntryToFilename:
"""Tests for ydl_entry_to_filename, especially .part file filtering."""
def _make_mock_ydl(self, prepared_filename):
class MockYDL:
def prepare_filename(self, entry):
return prepared_filename
return MockYDL()
def test_returns_exact_file_if_exists(self, tmp_path):
video = tmp_path / "video.mp4"
video.write_bytes(b"data")
ydl = self._make_mock_ydl(str(video))
assert ydl_entry_to_filename(ydl, {}) == str(video)
def test_skips_part_file_returns_complete(self, tmp_path):
"""Simulates yt-dlp leaving a .part file from a failed format
while a complete .webm exists."""
(tmp_path / "f5U3IKfoSYs.f399.mp4.part").write_bytes(b"incomplete")
webm = tmp_path / "f5U3IKfoSYs.webm"
webm.write_bytes(b"complete video")
# ydl.prepare_filename returns the expected .mp4 which doesn't exist
ydl = self._make_mock_ydl(str(tmp_path / "f5U3IKfoSYs.mp4"))
result = ydl_entry_to_filename(ydl, {})
assert result == str(webm)
assert not result.endswith(".part")
def test_skips_part_file_returns_false_if_no_other_match(self, tmp_path):
"""Only a .part file exists — should return False."""
(tmp_path / "video.f399.mp4.part").write_bytes(b"incomplete")
ydl = self._make_mock_ydl(str(tmp_path / "video.mp4"))
assert ydl_entry_to_filename(ydl, {}) is False
def test_returns_false_when_no_files_match(self, tmp_path):
(tmp_path / "unrelated.txt").write_bytes(b"data")
ydl = self._make_mock_ydl(str(tmp_path / "video.mp4"))
assert ydl_entry_to_filename(ydl, {}) is False