improving ignored content from waczs

This commit is contained in:
msramalho
2023-07-28 12:19:14 +01:00
parent 7a5c9c65bd
commit aa71c85a98
3 changed files with 32 additions and 2 deletions

View File

@@ -1,10 +1,15 @@
from __future__ import annotations
import os
import traceback
from typing import Any, List
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json, config
import mimetypes
import ffmpeg
from ffmpeg._run import Error
from .context import ArchivingContext
from loguru import logger
@@ -74,6 +79,23 @@ class Media:
def is_audio(self) -> bool:
return self.mimetype.startswith("audio")
def is_image(self) -> bool:
return self.mimetype.startswith("image")
def is_valid_video(self) -> bool:
# checks for video streams with ffmpeg, or min file size for a video
# self.is_video() should be used together with this method
try:
streams = ffmpeg.probe(self.filename, select_streams='v')['streams']
logger.warning(f"STREAMS FOR {self.filename} {streams}")
return any(s.get("duration_ts") > 0 for s in streams)
except Error: return False # ffmpeg errors when reading bad files
except Exception as e:
logger.error(e)
logger.error(traceback.format_exc())
try:
fsize = os.path.getsize(self.filename)
return fsize > 20_000
except: pass
return True