mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 12:48:28 +03:00
Fix instagram_extractor.py typo, add warning to docs, and add basic regex test.
This commit is contained in:
@@ -10,25 +10,30 @@
|
||||
"requires_setup": True,
|
||||
"configs": {
|
||||
"username": {"required": True,
|
||||
"help": "a valid Instagram username"},
|
||||
"help": "A valid Instagram username."},
|
||||
"password": {
|
||||
"required": True,
|
||||
"help": "the corresponding Instagram account password",
|
||||
"help": "The corresponding Instagram account password.",
|
||||
},
|
||||
"download_folder": {
|
||||
"default": "instaloader",
|
||||
"help": "name of a folder to temporarily download content to",
|
||||
"help": "Name of a folder to temporarily download content to.",
|
||||
},
|
||||
"session_file": {
|
||||
"default": "secrets/instaloader.session",
|
||||
"help": "path to the instagram session which saves session credentials",
|
||||
"help": "Path to the instagram session file which saves session credentials. If one doesn't exist this gives the path to store a new one.",
|
||||
},
|
||||
# TODO: fine-grain
|
||||
# "download_stories": {"default": True, "help": "if the link is to a user profile: whether to get stories information"},
|
||||
},
|
||||
"description": """
|
||||
Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram. This class handles both individual posts
|
||||
and user profiles, downloading as much information as possible, including images, videos, text, stories,
|
||||
Uses the [Instaloader library](https://instaloader.github.io/as-module.html) to download content from Instagram.
|
||||
|
||||
> ⚠️ **Warning**
|
||||
> This module is not actively maintained due to known issues with blocking.
|
||||
> Prioritise usage of the `instagram_tbot_extractor` and `instagram_api_extractor`.
|
||||
|
||||
This class handles both individual posts and user profiles, downloading as much information as possible, including images, videos, text, stories,
|
||||
highlights, and tagged posts.
|
||||
Authentication is required via username/password or a session file.
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@
|
||||
|
||||
"""
|
||||
import re, os, shutil
|
||||
from sys import exc_info
|
||||
|
||||
import instaloader
|
||||
from loguru import logger
|
||||
|
||||
@@ -17,10 +15,9 @@ class InstagramExtractor(Extractor):
|
||||
"""
|
||||
Uses Instaloader to download either a post (inc images, videos, text) or as much as possible from a profile (posts, stories, highlights, ...)
|
||||
"""
|
||||
|
||||
# NB: post regex should be tested before profile
|
||||
|
||||
valid_url = re.compile(r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com|instagr.am|instagr.com)\/")
|
||||
|
||||
# https://regex101.com/r/MGPquX/1
|
||||
post_pattern = re.compile(r"{valid_url}(?:p|reel)\/(\w+)".format(valid_url=valid_url))
|
||||
# https://regex101.com/r/6Wbsxa/1
|
||||
@@ -38,19 +35,14 @@ class InstagramExtractor(Extractor):
|
||||
)
|
||||
try:
|
||||
self.insta.load_session_from_file(self.username, self.session_file)
|
||||
except FileNotFoundError:
|
||||
logger.info("No existing session file found - Attempting login with use and password.")
|
||||
except Exception as e:
|
||||
try:
|
||||
logger.debug(f"Session file failed", exc_info=True)
|
||||
logger.info("No valid session file found - Attempting login with use and password.")
|
||||
self.insta.login(self.username, self.password)
|
||||
self.insta.save_session_to_file(self.session_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to log in with Instaloader: {e}")
|
||||
# TODO raise exception?
|
||||
# raise Exception(f"Failed to log in with Instaloader: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading session file: {e}")
|
||||
# TODO raise exception?
|
||||
# raise Exception(f"Error loading session file: {e}")
|
||||
logger.error(f"Failed to setup Instagram Extractor with Instagrapi. {e}")
|
||||
|
||||
|
||||
def download(self, item: Metadata) -> Metadata:
|
||||
|
||||
Reference in New Issue
Block a user