Merge branch 'dev' into fix/improve-deleted-post-detection

This commit is contained in:
Miguel Sozinho Ramalho
2026-01-08 14:36:17 +00:00
committed by GitHub
5 changed files with 694 additions and 635 deletions

1238
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
*.py

View File

@@ -3,6 +3,13 @@
"type": ["enricher"],
"requires_setup": True,
"dependencies": {"python": ["loguru"], "bin": ["exiftool"]},
"configs": {
"look_for_keys": {
"default": [],
"help": "list of lowercased metadata keys that will be included in the enriched metadata. Special keys: 'author', 'datetimes', 'location' to include related metadata fields. The default empty list `[]` means all metadata will be included.",
"type": "list",
},
},
"description": """
Extracts metadata information from files using ExifTool.

View File

@@ -16,6 +16,8 @@ class MetadataEnricher(Enricher):
for i, m in enumerate(to_enrich.media):
if len(md := self.get_metadata(m.filename)):
if self.look_for_keys != []:
md = self.select_metadata(md, self.look_for_keys)
to_enrich.media[i].set("metadata", md)
def get_metadata(self, filename: str) -> dict:
@@ -23,7 +25,6 @@ class MetadataEnricher(Enricher):
# Run ExifTool command to extract metadata from the file
cmd = ["exiftool", filename]
result = subprocess.run(cmd, capture_output=True, text=True)
# Process the output to extract individual metadata fields
metadata = {}
for line in result.stdout.splitlines():
@@ -35,3 +36,33 @@ class MetadataEnricher(Enricher):
except Exception as e:
logger.error(f"Error occurred: {e}: {traceback.format_exc()}")
return {}
def select_metadata(self, all_md, requested_metadata_keys):
"""
coordinates the selection of metadata from the general exiftool output to the user-specified grocery list
"""
# defining the batches of metadata that get pulled for special terms
author_key_terms = ["author", "producer", "creator"]
datetime_key_terms = ["date", "time"]
location_key_terms = ["gps", "latitude", "longitude"]
specified_md = {}
for md_key in all_md.keys():
md_key_lower = md_key.lower()
# checking for special baskets within the grocery list of requested metadata
if ("author" in requested_metadata_keys) and any(
term in md_key_lower and len(all_md[md_key]) for term in author_key_terms
):
specified_md[md_key] = all_md[md_key]
if ("datetime" in requested_metadata_keys) and any(
term in md_key_lower and len(all_md[md_key]) for term in datetime_key_terms
):
specified_md[md_key] = all_md[md_key]
if ("location" in requested_metadata_keys) and any(
term in md_key_lower and len(all_md[md_key]) for term in location_key_terms
):
specified_md[md_key] = all_md[md_key]
# if the metadata value is requested directly
if md_key_lower in requested_metadata_keys or md_key in requested_metadata_keys and len(all_md[md_key]):
specified_md[md_key] = all_md[md_key]
return specified_md

View File

@@ -56,6 +56,19 @@ def test_enrich_sets_metadata(enricher, mocker):
assert metadata.media == [media1, media2]
def test_enrich_no_metadata_selection(enricher, mocker):
media1 = mocker.Mock(filename="img1.jpg")
media2 = mocker.Mock(filename="img2.jpg")
metadata = mocker.Mock()
metadata.media = [media1, media2]
enricher.get_metadata = lambda f: {"key": "value"} if f == "img1.jpg" else {}
enricher.look_for_keys = ["no-key"]
enricher.enrich(metadata)
media1.set.assert_called_once_with("metadata", {})
media2.set.assert_not_called()
assert metadata.media == [media1, media2]
def test_enrich_empty_media(enricher, mocker):
metadata = mocker.Mock()
metadata.media = []
@@ -71,7 +84,9 @@ def test_get_metadata_error_handling(enricher, mocker):
assert "Error occurred: " in mock_log.call_args[0][0]
def test_metadata_pickle(enricher, unpickle, mocker):
# TODO depends on the expected functionality
"""
def test_default_metadata_pickle(enricher, unpickle, mocker):
mock_run = mocker.patch("subprocess.run")
# Uses pickled values
mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
@@ -79,6 +94,39 @@ def test_metadata_pickle(enricher, unpickle, mocker):
expected = unpickle("metadata_enricher_ytshort_expected.pickle")
enricher.enrich(metadata)
expected_media = expected.media
print(expected_media)
actual_media = metadata.media
assert len(expected_media) == len(actual_media)
assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata")
"""
def test_metadata_pickle_megapixel(enricher, unpickle, mocker):
mock_run = mocker.patch("subprocess.run")
mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
metadata = unpickle("metadata_enricher_ytshort_input.pickle")
enricher.look_for_keys = ["megapixels"]
enricher.enrich(metadata)
actual_media = metadata.media
assert actual_media[0].properties.get("metadata") == {"Megapixels": "0.922"}
def test_metadata_specify_datetime_and_metapixels(enricher, unpickle, mocker):
mock_run = mocker.patch("subprocess.run")
mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
metadata = unpickle("metadata_enricher_ytshort_input.pickle")
enricher.look_for_keys = ["datetime", "megapixels", "image height"]
enricher.enrich(metadata)
actual_media = metadata.media
assert actual_media[0].properties.get("metadata") == {
"File Modification Date/Time": "2025:02:18 19:42:50+00:00",
"File Access Date/Time": "2025:02:18 19:42:50+00:00",
"File Inode Change Date/Time": "2025:02:18 19:42:50+00:00",
"Megapixels": "0.922",
"Image Height": "720",
}