diff --git a/example.orchestration.yaml b/example.orchestration.yaml index 06084cd..a040f5c 100644 --- a/example.orchestration.yaml +++ b/example.orchestration.yaml @@ -50,6 +50,7 @@ configurations: text: textual content screenshot: screenshot hash: hash + pdq_hash: perceptual hashes wacz: wacz replaywebpage: replaywebpage instagram_tbot_archiver: diff --git a/src/auto_archiver/core/media.py b/src/auto_archiver/core/media.py index 55af33e..2c8ac28 100644 --- a/src/auto_archiver/core/media.py +++ b/src/auto_archiver/core/media.py @@ -35,7 +35,7 @@ class Media: def all_inner_media(self, include_self=False): """ Media can be inside media properties, examples include transformations on original media. - This function return a generator for all the inner media. + This function returns a generator for all the inner media. """ if include_self: yield self for prop in self.properties.values(): diff --git a/src/auto_archiver/enrichers/pdq_hash_enricher.py b/src/auto_archiver/enrichers/pdq_hash_enricher.py index a903159..79cd604 100644 --- a/src/auto_archiver/enrichers/pdq_hash_enricher.py +++ b/src/auto_archiver/enrichers/pdq_hash_enricher.py @@ -28,8 +28,7 @@ class PdqHashEnricher(Enricher): for m in to_enrich.media: for media in m.all_inner_media(True): - if media.is_image() and media.get("id") != "screenshot": - if len(hd := self.calculate_pdq_hash(media.filename)): + if media.is_image() and media.get("id") != "screenshot" and len(hd := self.calculate_pdq_hash(media.filename)): media.set("pdq_hash", hd) def calculate_pdq_hash(self, filename):