diff --git a/setup.py b/setup.py index 29b34c9..c0f53c2 100644 --- a/setup.py +++ b/setup.py @@ -2,14 +2,14 @@ import setuptools setuptools.setup( - name = 'socialmediascraper', + name = 'snscrape', version = '0.0-dev', - description = 'A social media scraper', - packages = ['socialmediascraper'], + description = 'A social network service scraper', + packages = ['snscrape'], install_requires = ['requests', 'lxml', 'beautifulsoup4'], entry_points = { 'console_scripts': [ - 'smscrape = socialmediascraper.cli:main', + 'snscrape = snscrape.cli:main', ], }, ) diff --git a/socialmediascraper/__init__.py b/snscrape/__init__.py similarity index 100% rename from socialmediascraper/__init__.py rename to snscrape/__init__.py diff --git a/socialmediascraper/base.py b/snscrape/base.py similarity index 100% rename from socialmediascraper/base.py rename to snscrape/base.py diff --git a/socialmediascraper/cli.py b/snscrape/cli.py similarity index 94% rename from socialmediascraper/cli.py rename to snscrape/cli.py index c11e13c..daa3130 100644 --- a/socialmediascraper/cli.py +++ b/snscrape/cli.py @@ -1,7 +1,7 @@ import argparse import logging -import socialmediascraper.base -import socialmediascraper.modules +import snscrape.base +import snscrape.modules logger = logging.getLogger(__name__) @@ -15,7 +15,7 @@ def parse_args(): parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results') subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use') - classes = socialmediascraper.base.Scraper.__subclasses__() + classes = snscrape.base.Scraper.__subclasses__() for cls in classes: subparser = subparsers.add_parser(cls.name, formatter_class = argparse.ArgumentDefaultsHelpFormatter) cls.setup_parser(subparser) diff --git a/socialmediascraper/modules/__init__.py b/snscrape/modules/__init__.py similarity index 75% rename from socialmediascraper/modules/__init__.py rename to snscrape/modules/__init__.py index 70659e6..0e84c06 100644 --- a/socialmediascraper/modules/__init__.py +++ b/snscrape/modules/__init__.py @@ -1,6 +1,6 @@ import importlib import os -import socialmediascraper.base +import snscrape.base def _import_modules(): @@ -8,7 +8,7 @@ def _import_modules(): for fn in files: if fn.endswith('.py') and fn != '__init__.py': # Import module if not already imported - moduleName = f'socialmediascraper.modules.{fn[:-3]}' + moduleName = f'snscrape.modules.{fn[:-3]}' module = importlib.import_module(moduleName) diff --git a/socialmediascraper/modules/facebook.py b/snscrape/modules/facebook.py similarity index 97% rename from socialmediascraper/modules/facebook.py rename to snscrape/modules/facebook.py index bf4eb22..d139dbd 100644 --- a/socialmediascraper/modules/facebook.py +++ b/snscrape/modules/facebook.py @@ -2,14 +2,14 @@ import bs4 import json import logging import re -import socialmediascraper.base +import snscrape.base import urllib.parse logger = logging.getLogger(__name__) -class FacebookUserScraper(socialmediascraper.base.Scraper): +class FacebookUserScraper(snscrape.base.Scraper): name = 'facebook-user' def __init__(self, username, **kwargs): diff --git a/socialmediascraper/modules/googleplus.py b/snscrape/modules/googleplus.py similarity index 93% rename from socialmediascraper/modules/googleplus.py rename to snscrape/modules/googleplus.py index 350a78b..b27b5e2 100644 --- a/socialmediascraper/modules/googleplus.py +++ b/snscrape/modules/googleplus.py @@ -3,13 +3,13 @@ import itertools import json import logging import re -import socialmediascraper.base +import snscrape.base logger = logging.getLogger(__name__) -class GooglePlusUserScraper(socialmediascraper.base.Scraper): +class GooglePlusUserScraper(snscrape.base.Scraper): name = 'googleplus-user' def __init__(self, user, **kwargs): @@ -48,7 +48,7 @@ class GooglePlusUserScraper(socialmediascraper.base.Scraper): logger.info('User has no posts') return for postObj in response[0][7]: - yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}') + yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}') cursor = response[0][1] # 'ADSJ_x' if cursor is None: # No further pages @@ -86,7 +86,7 @@ class GooglePlusUserScraper(socialmediascraper.base.Scraper): response = json.JSONDecoder().raw_decode(''.join(garbage[pos:]))[0] # Parses only the first structure in the data stream without throwing an error about the extra data at the end for postObj in response[0][2]['74333095'][0][7]: - yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}') + yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}') cursor = response[0][2]['74333095'][0][1] diff --git a/socialmediascraper/modules/instagram.py b/snscrape/modules/instagram.py similarity index 92% rename from socialmediascraper/modules/instagram.py rename to snscrape/modules/instagram.py index 8266929..3287b6c 100644 --- a/socialmediascraper/modules/instagram.py +++ b/snscrape/modules/instagram.py @@ -1,13 +1,13 @@ import hashlib import json import logging -import socialmediascraper.base +import snscrape.base logger = logging.getLogger(__name__) -class InstagramUserScraper(socialmediascraper.base.Scraper): +class InstagramUserScraper(snscrape.base.Scraper): name = 'instagram-user' def __init__(self, username, **kwargs): @@ -17,7 +17,7 @@ class InstagramUserScraper(socialmediascraper.base.Scraper): def _response_to_items(self, response, username): for node in response['user']['edge_owner_to_timeline_media']['edges']: code = node['node']['shortcode'] - yield socialmediascraper.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here? + yield snscrape.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here? def get_items(self): headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} diff --git a/socialmediascraper/modules/twitter.py b/snscrape/modules/twitter.py similarity index 94% rename from socialmediascraper/modules/twitter.py rename to snscrape/modules/twitter.py index bbb34bb..c690162 100644 --- a/socialmediascraper/modules/twitter.py +++ b/snscrape/modules/twitter.py @@ -1,13 +1,13 @@ import bs4 import json import logging -import socialmediascraper.base +import snscrape.base logger = logging.getLogger(__name__) -class TwitterSearchScraper(socialmediascraper.base.Scraper): +class TwitterSearchScraper(snscrape.base.Scraper): name = 'twitter-search' def __init__(self, query, **kwargs): @@ -23,7 +23,7 @@ class TwitterSearchScraper(socialmediascraper.base.Scraper): for tweet in feed: username = tweet.find('span', 'username').find('b').text tweetID = tweet['data-item-id'] - yield socialmediascraper.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}') + yield snscrape.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}') def _check_json_callback(self, r): if r.headers['content-type'] != 'application/json;charset=utf-8':