From a705a78632da1ca6d852dbe338c263a31943ee2a Mon Sep 17 00:00:00 2001
From: erinhmclark <erinhannahmary.clark@gmail.com>
Date: Mon, 3 Mar 2025 21:06:09 +0000
Subject: [PATCH 1/4] Fix instagram_extractor.py typo in config value.

---
 .../instagram_extractor.py                    | 28 +++++++++-----
 tests/extractors/test_instagram_extractor.py  | 37 ++++++++++++-------
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
index 0af2c32..7ae3b01 100644
--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@@ -3,7 +3,9 @@
     highlights, and tagged posts. Authentication is required via username/password or a session file.
 
 """
-import re, os, shutil, traceback
+import re, os, shutil
+from sys import exc_info
+
 import instaloader
 from loguru import logger
 
@@ -28,19 +30,27 @@ class InstagramExtractor(Extractor):
     def setup(self) -> None:
 
         self.insta = instaloader.Instaloader(
-            download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"
+            download_geotags=True,
+            download_comments=True,
+            compress_json=False,
+            dirname_pattern=self.download_folder,
+            filename_pattern="{date_utc}_UTC_{target}__{typename}"
         )
         try:
             self.insta.load_session_from_file(self.username, self.session_file)
-        except Exception as e:
-            logger.error(f"Unable to login from session file: {e}\n{traceback.format_exc()}")
+        except FileNotFoundError:
+            logger.info("No existing session file found - Attempting login with use and password.")
             try:
-                self.insta.login(self.username, config.instagram_self.password)
-                # TODO: wait for this issue to be fixed https://github.com/instaloader/instaloader/issues/1758
+                self.insta.login(self.username, self.password)
                 self.insta.save_session_to_file(self.session_file)
-            except Exception as e2:
-                logger.error(f"Unable to finish login (retrying from file): {e2}\n{traceback.format_exc()}")
-
+            except Exception as e:
+                logger.error(f"Failed to log in with Instaloader: {e}")
+                # TODO raise exception?
+                # raise Exception(f"Failed to log in with Instaloader: {e}")
+        except Exception as e:
+            logger.error(f"Error loading session file: {e}")
+            # TODO raise exception?
+            # raise Exception(f"Error loading session file: {e}")
 
 
     def download(self, item: Metadata) -> Metadata:
diff --git a/tests/extractors/test_instagram_extractor.py b/tests/extractors/test_instagram_extractor.py
index 7efe1b1..97549b8 100644
--- a/tests/extractors/test_instagram_extractor.py
+++ b/tests/extractors/test_instagram_extractor.py
@@ -3,19 +3,30 @@ import pytest
 from auto_archiver.modules.instagram_extractor import InstagramExtractor
 from .test_extractor_base import TestExtractorBase
 
-class TestInstagramExtractor(TestExtractorBase):
+
+@pytest.fixture
+def intsagram_extractor(setup_module):
 
     extractor_module: str = 'instagram_extractor'
-    config: dict = {}
+    config: dict = {
+        "username": "user_name",
+        "password": "password123",
+        "download_folder": "instaloader",
+        "session_file": "secrets/instaloader.session",
+    }
+    return setup_module(extractor_module, config)
 
-    @pytest.mark.parametrize("url", [
-        "https://www.instagram.com/p/",
-        "https://www.instagram.com/p/1234567890/",
-        "https://www.instagram.com/reel/1234567890/",
-        "https://www.instagram.com/username/",
-        "https://www.instagram.com/username/stories/",
-        "https://www.instagram.com/username/highlights/",
-    ])
-    def test_regex_matches(self, url):
-        # post
-        assert InstagramExtractor.valid_url.match(url)
+
+
+
+@pytest.mark.parametrize("url", [
+    "https://www.instagram.com/p/",
+    "https://www.instagram.com/p/1234567890/",
+    "https://www.instagram.com/reel/1234567890/",
+    "https://www.instagram.com/username/",
+    "https://www.instagram.com/username/stories/",
+    "https://www.instagram.com/username/highlights/",
+])
+def test_regex_matches(url, instagram_extractor):
+    # post
+    assert  instagram_extractor.valid_url.match(url)

From fa1e65f54c5bf8ee09b2e6fcec0e7c997cc6b97a Mon Sep 17 00:00:00 2001
From: erinhmclark <erinhannahmary.clark@gmail.com>
Date: Thu, 6 Mar 2025 16:25:38 +0000
Subject: [PATCH 2/4] Fix instagram_extractor.py typo, add warning to docs, and
 add basic regex test.

---
 .../instagram_extractor/__manifest__.py        | 17 +++++++++++------
 .../instagram_extractor/instagram_extractor.py | 18 +++++-------------
 tests/extractors/test_instagram_extractor.py   | 18 +++++++++++-------
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/auto_archiver/modules/instagram_extractor/__manifest__.py b/src/auto_archiver/modules/instagram_extractor/__manifest__.py
index 05cae19..c9b479a 100644
--- a/src/auto_archiver/modules/instagram_extractor/__manifest__.py
+++ b/src/auto_archiver/modules/instagram_extractor/__manifest__.py
@@ -10,25 +10,30 @@
     "requires_setup": True,
     "configs": {
         "username": {"required": True,
-                     "help": "a valid Instagram username"},
+                     "help": "A valid Instagram username."},
         "password": {
             "required": True,
-            "help": "the corresponding Instagram account password",
+            "help": "The corresponding Instagram account password.",
         },
         "download_folder": {
             "default": "instaloader",
-            "help": "name of a folder to temporarily download content to",
+            "help": "Name of a folder to temporarily download content to.",
         },
         "session_file": {
             "default": "secrets/instaloader.session",
-            "help": "path to the instagram session which saves session credentials",
+            "help": "Path to the instagram session file which saves session credentials. If one doesn't exist this gives the path to store a new one.",
         },
         # TODO: fine-grain
         # "download_stories": {"default": True, "help": "if the link is to a user profile: whether to get stories information"},
     },
     "description": """
-    Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram. This class handles both individual posts
-    and user profiles, downloading as much information as possible, including images, videos, text, stories,
+    Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram. 
+    
+      > ⚠️ **Warning**  
+      > This module is not actively maintained due to known issues with blocking.  
+      > Prioritise usage of the `instagram_tbot_extractor` and `instagram_api_extractor`.
+  
+    This class handles both individual posts and user profiles, downloading as much information as possible, including images, videos, text, stories,
     highlights, and tagged posts. 
     Authentication is required via username/password or a session file.
                     
diff --git a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
index 7ae3b01..7e195ad 100644
--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@@ -4,8 +4,6 @@
 
 """
 import re, os, shutil
-from sys import exc_info
-
 import instaloader
 from loguru import logger
 
@@ -17,10 +15,9 @@ class InstagramExtractor(Extractor):
     """
     Uses Instaloader to download either a post (inc images, videos, text) or as much as possible from a profile (posts, stories, highlights, ...)
     """
+
     # NB: post regex should be tested before profile
-
     valid_url = re.compile(r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com|instagr.am|instagr.com)\/")
-
     # https://regex101.com/r/MGPquX/1
     post_pattern = re.compile(r"{valid_url}(?:p|reel)\/(\w+)".format(valid_url=valid_url))
     # https://regex101.com/r/6Wbsxa/1
@@ -38,19 +35,14 @@ class InstagramExtractor(Extractor):
         )
         try:
             self.insta.load_session_from_file(self.username, self.session_file)
-        except FileNotFoundError:
-            logger.info("No existing session file found - Attempting login with use and password.")
+        except Exception as e:
             try:
+                logger.debug(f"Session file failed", exc_info=True)
+                logger.info("No valid session file found - Attempting login with use and password.")
                 self.insta.login(self.username, self.password)
                 self.insta.save_session_to_file(self.session_file)
             except Exception as e:
-                logger.error(f"Failed to log in with Instaloader: {e}")
-                # TODO raise exception?
-                # raise Exception(f"Failed to log in with Instaloader: {e}")
-        except Exception as e:
-            logger.error(f"Error loading session file: {e}")
-            # TODO raise exception?
-            # raise Exception(f"Error loading session file: {e}")
+                logger.error(f"Failed to setup Instagram Extractor with Instagrapi. {e}")
 
 
     def download(self, item: Metadata) -> Metadata:
diff --git a/tests/extractors/test_instagram_extractor.py b/tests/extractors/test_instagram_extractor.py
index 97549b8..647cab4 100644
--- a/tests/extractors/test_instagram_extractor.py
+++ b/tests/extractors/test_instagram_extractor.py
@@ -1,11 +1,10 @@
 import pytest
 
 from auto_archiver.modules.instagram_extractor import InstagramExtractor
-from .test_extractor_base import TestExtractorBase
 
 
 @pytest.fixture
-def intsagram_extractor(setup_module):
+def instagram_extractor(setup_module, mocker):
 
     extractor_module: str = 'instagram_extractor'
     config: dict = {
@@ -14,11 +13,14 @@ def intsagram_extractor(setup_module):
         "download_folder": "instaloader",
         "session_file": "secrets/instaloader.session",
     }
+    fake_loader = mocker.MagicMock()
+    fake_loader.load_session_from_file.return_value = None
+    fake_loader.login.return_value = None
+    fake_loader.save_session_to_file.return_value = None
+    mocker.patch("instaloader.Instaloader", return_value=fake_loader,)
     return setup_module(extractor_module, config)
 
 
-
-
 @pytest.mark.parametrize("url", [
     "https://www.instagram.com/p/",
     "https://www.instagram.com/p/1234567890/",
@@ -27,6 +29,8 @@ def intsagram_extractor(setup_module):
     "https://www.instagram.com/username/stories/",
     "https://www.instagram.com/username/highlights/",
 ])
-def test_regex_matches(url, instagram_extractor):
-    # post
-    assert  instagram_extractor.valid_url.match(url)
+def test_regex_matches(url: str, instagram_extractor: InstagramExtractor) -> None:
+    """
+    Ensure that the valid_url regex matches all provided Instagram URLs.
+    """
+    assert instagram_extractor.valid_url.match(url)
\ No newline at end of file

From 89d2a8bb5477cfa6db57cefe1a9a7705385fdb45 Mon Sep 17 00:00:00 2001
From: erinhmclark <erinhannahmary.clark@gmail.com>
Date: Fri, 7 Mar 2025 12:34:19 +0000
Subject: [PATCH 3/4] Update the __manifest__.py of the Instagram Extractor.

---
 src/auto_archiver/modules/instagram_extractor/__manifest__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/auto_archiver/modules/instagram_extractor/__manifest__.py b/src/auto_archiver/modules/instagram_extractor/__manifest__.py
index c9b479a..c5d8a5a 100644
--- a/src/auto_archiver/modules/instagram_extractor/__manifest__.py
+++ b/src/auto_archiver/modules/instagram_extractor/__manifest__.py
@@ -31,7 +31,7 @@
     
       > ⚠️ **Warning**  
       > This module is not actively maintained due to known issues with blocking.  
-      > Prioritise usage of the `instagram_tbot_extractor` and `instagram_api_extractor`.
+      > Prioritise usage of the [Instagram Tbot Extracto](./instagram_tbot_extractor.md) and [Instagram API Extractor](./instagram_api_extractor.md)
   
     This class handles both individual posts and user profiles, downloading as much information as possible, including images, videos, text, stories,
     highlights, and tagged posts. 

From 4df03255a4fe39c7a1e20a546f70c3e8f31630b8 Mon Sep 17 00:00:00 2001
From: erinhmclark <erinhannahmary.clark@gmail.com>
Date: Fri, 7 Mar 2025 14:56:35 +0000
Subject: [PATCH 4/4] Fix typo in __manifest__.py

---
 src/auto_archiver/modules/instagram_extractor/__manifest__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/auto_archiver/modules/instagram_extractor/__manifest__.py b/src/auto_archiver/modules/instagram_extractor/__manifest__.py
index c5d8a5a..a66389f 100644
--- a/src/auto_archiver/modules/instagram_extractor/__manifest__.py
+++ b/src/auto_archiver/modules/instagram_extractor/__manifest__.py
@@ -31,7 +31,7 @@
     
       > ⚠️ **Warning**  
       > This module is not actively maintained due to known issues with blocking.  
-      > Prioritise usage of the [Instagram Tbot Extracto](./instagram_tbot_extractor.md) and [Instagram API Extractor](./instagram_api_extractor.md)
+      > Prioritise usage of the [Instagram Tbot Extractor](./instagram_tbot_extractor.md) and [Instagram API Extractor](./instagram_api_extractor.md)
   
     This class handles both individual posts and user profiles, downloading as much information as possible, including images, videos, text, stories,
     highlights, and tagged posts.