diff --git a/src/auto_archiver/archivers/twitter_archiver.py b/src/auto_archiver/archivers/twitter_archiver.py
index b99e0d8..c5d907d 100644
--- a/src/auto_archiver/archivers/twitter_archiver.py
+++ b/src/auto_archiver/archivers/twitter_archiver.py
@@ -6,7 +6,7 @@ from slugify import slugify
 
 from . import Archiver
 from ..core import Metadata, Media
-from ..utils.misc import remove_get_parameters
+from ..utils import UrlUtil
 
 
 class TwitterArchiver(Archiver):
@@ -78,7 +78,7 @@ class TwitterArchiver(Archiver):
                 media.set("src", variant.url)
                 mimetype = variant.contentType
             elif type(tweet_media) == Photo:
-                media.set("src", tweet_media.fullUrl.replace('name=large', 'name=orig'))
+                media.set("src", tweet_media.fullUrl.replace('name=large', 'name=orig').replace('name=small', 'name=orig'))
                 mimetype = "image/jpeg"
             else:
                 logger.warning(f"Could not get media URL of {tweet_media}")
@@ -96,21 +96,7 @@ class TwitterArchiver(Archiver):
         https://github.com/JustAnotherArchivist/snscrape/issues/996#issuecomment-1615937362
         next to test: https://cdn.embedly.com/widgets/media.html?&schema=twitter&url=https://twitter.com/bellingcat/status/1674700676612386816
         """
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
-            "Accept": "*/*",
-            "Accept-Language": "en-US,en;q=0.5",
-            "Accept-Encoding": "gzip, deflate, br",
-            "Origin": "https://platform.twitter.com",
-            "Connection": "keep-alive",
-            "Referer": "https://platform.twitter.com/",
-            "Sec-Fetch-Dest": "empty",
-            "Sec-Fetch-Mode": "cors",
-            "Sec-Fetch-Site": "cross-site",
-            "Pragma": "no-cache",
-            "Cache-Control": "no-cache",
-            "TE": "trailers"
-        }
+
         logger.debug(f"Trying twitter hack for {url=}")
         result = Metadata()
 
@@ -134,7 +120,7 @@ class TwitterArchiver(Archiver):
             media = Media(filename="")
             media.set("src", u)
             ext = ""
-            if (mtype := mimetypes.guess_type(remove_get_parameters(u))[0]):
+            if (mtype := mimetypes.guess_type(UrlUtil.remove_get_parameters(u))[0]):
                 ext = mimetypes.guess_extension(mtype)
 
             media.filename = self.download_from_url(u, f'{slugify(url)}_{i}{ext}', item)
diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py
index c60bf65..8ac2ddf 100644
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -109,6 +109,8 @@ class ArchivingOrchestrator:
         # looks for Media in result.media and also result.media[x].properties (as list or dict values)
         result.store()
 
+        #TODO: remove any duplicate media, if hash is available
+
         # 6 - format and store formatted if needed
         # enrichers typically need access to already stored URLs etc
         if (final_media := self.formatter.format(result)):
diff --git a/src/auto_archiver/enrichers/pdq_hash_enricher.py b/src/auto_archiver/enrichers/pdq_hash_enricher.py
index 79cd604..9b11053 100644
--- a/src/auto_archiver/enrichers/pdq_hash_enricher.py
+++ b/src/auto_archiver/enrichers/pdq_hash_enricher.py
@@ -1,6 +1,7 @@
+import traceback
 import pdqhash
 import numpy as np
-from PIL import Image
+from PIL import Image, UnidentifiedImageError
 from loguru import logger
 
 from . import Enricher
@@ -32,11 +33,15 @@ class PdqHashEnricher(Enricher):
                         media.set("pdq_hash", hd)    
 
     def calculate_pdq_hash(self, filename):
-        # returns a hexadecimal string with the perceptual hash for the given filename 
-        with Image.open(filename) as img:
-            # convert the image to RGB
-            image_rgb = np.array(img.convert("RGB"))
-            # compute the 256-bit PDQ hash (we do not store the quality score)
-            hash_array, _ = pdqhash.compute(image_rgb)
-            hash = "".join(str(b) for b in hash_array)
-            return hex(int(hash, 2))[2:]
+        # returns a hexadecimal string with the perceptual hash for the given filename
+        try:
+            with Image.open(filename) as img:
+                # convert the image to RGB
+                image_rgb = np.array(img.convert("RGB"))
+                # compute the 256-bit PDQ hash (we do not store the quality score)
+                hash_array, _ = pdqhash.compute(image_rgb)
+                hash = "".join(str(b) for b in hash_array)
+                return hex(int(hash, 2))[2:]
+        except UnidentifiedImageError as e:
+            logger.error(f"Image {filename=} is likely corrupted or in unsupported format {e}: {traceback.format_exc()}")
+        return ""
\ No newline at end of file