mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-07-05 08:08:37 +03:00
Fix instagram_extractor.py typo in config value.
This commit is contained in:
@@ -3,7 +3,9 @@
|
|||||||
highlights, and tagged posts. Authentication is required via username/password or a session file.
|
highlights, and tagged posts. Authentication is required via username/password or a session file.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import re, os, shutil, traceback
|
import re, os, shutil
|
||||||
|
from sys import exc_info
|
||||||
|
|
||||||
import instaloader
|
import instaloader
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
@@ -28,19 +30,27 @@ class InstagramExtractor(Extractor):
|
|||||||
def setup(self) -> None:
|
def setup(self) -> None:
|
||||||
|
|
||||||
self.insta = instaloader.Instaloader(
|
self.insta = instaloader.Instaloader(
|
||||||
download_geotags=True, download_comments=True, compress_json=False, dirname_pattern=self.download_folder, filename_pattern="{date_utc}_UTC_{target}__{typename}"
|
download_geotags=True,
|
||||||
|
download_comments=True,
|
||||||
|
compress_json=False,
|
||||||
|
dirname_pattern=self.download_folder,
|
||||||
|
filename_pattern="{date_utc}_UTC_{target}__{typename}"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
self.insta.load_session_from_file(self.username, self.session_file)
|
self.insta.load_session_from_file(self.username, self.session_file)
|
||||||
except Exception as e:
|
except FileNotFoundError:
|
||||||
logger.error(f"Unable to login from session file: {e}\n{traceback.format_exc()}")
|
logger.info("No existing session file found - Attempting login with use and password.")
|
||||||
try:
|
try:
|
||||||
self.insta.login(self.username, config.instagram_self.password)
|
self.insta.login(self.username, self.password)
|
||||||
# TODO: wait for this issue to be fixed https://github.com/instaloader/instaloader/issues/1758
|
|
||||||
self.insta.save_session_to_file(self.session_file)
|
self.insta.save_session_to_file(self.session_file)
|
||||||
except Exception as e2:
|
except Exception as e:
|
||||||
logger.error(f"Unable to finish login (retrying from file): {e2}\n{traceback.format_exc()}")
|
logger.error(f"Failed to log in with Instaloader: {e}")
|
||||||
|
# TODO raise exception?
|
||||||
|
# raise Exception(f"Failed to log in with Instaloader: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error loading session file: {e}")
|
||||||
|
# TODO raise exception?
|
||||||
|
# raise Exception(f"Error loading session file: {e}")
|
||||||
|
|
||||||
|
|
||||||
def download(self, item: Metadata) -> Metadata:
|
def download(self, item: Metadata) -> Metadata:
|
||||||
|
|||||||
@@ -3,19 +3,30 @@ import pytest
|
|||||||
from auto_archiver.modules.instagram_extractor import InstagramExtractor
|
from auto_archiver.modules.instagram_extractor import InstagramExtractor
|
||||||
from .test_extractor_base import TestExtractorBase
|
from .test_extractor_base import TestExtractorBase
|
||||||
|
|
||||||
class TestInstagramExtractor(TestExtractorBase):
|
|
||||||
|
@pytest.fixture
|
||||||
|
def intsagram_extractor(setup_module):
|
||||||
|
|
||||||
extractor_module: str = 'instagram_extractor'
|
extractor_module: str = 'instagram_extractor'
|
||||||
config: dict = {}
|
config: dict = {
|
||||||
|
"username": "user_name",
|
||||||
|
"password": "password123",
|
||||||
|
"download_folder": "instaloader",
|
||||||
|
"session_file": "secrets/instaloader.session",
|
||||||
|
}
|
||||||
|
return setup_module(extractor_module, config)
|
||||||
|
|
||||||
@pytest.mark.parametrize("url", [
|
|
||||||
"https://www.instagram.com/p/",
|
|
||||||
"https://www.instagram.com/p/1234567890/",
|
|
||||||
"https://www.instagram.com/reel/1234567890/",
|
@pytest.mark.parametrize("url", [
|
||||||
"https://www.instagram.com/username/",
|
"https://www.instagram.com/p/",
|
||||||
"https://www.instagram.com/username/stories/",
|
"https://www.instagram.com/p/1234567890/",
|
||||||
"https://www.instagram.com/username/highlights/",
|
"https://www.instagram.com/reel/1234567890/",
|
||||||
])
|
"https://www.instagram.com/username/",
|
||||||
def test_regex_matches(self, url):
|
"https://www.instagram.com/username/stories/",
|
||||||
# post
|
"https://www.instagram.com/username/highlights/",
|
||||||
assert InstagramExtractor.valid_url.match(url)
|
])
|
||||||
|
def test_regex_matches(url, instagram_extractor):
|
||||||
|
# post
|
||||||
|
assert instagram_extractor.valid_url.match(url)
|
||||||
|
|||||||
Reference in New Issue
Block a user