Rename from socialmediascraper to snscrape

This commit is contained in:
JustAnotherArchivist
2018-08-21 22:54:14 +02:00
parent 9fb3ac6013
commit 6b6ae3d33b
9 changed files with 21 additions and 21 deletions

View File

@@ -2,14 +2,14 @@ import setuptools
setuptools.setup(
name = 'socialmediascraper',
name = 'snscrape',
version = '0.0-dev',
description = 'A social media scraper',
packages = ['socialmediascraper'],
description = 'A social network service scraper',
packages = ['snscrape'],
install_requires = ['requests', 'lxml', 'beautifulsoup4'],
entry_points = {
'console_scripts': [
'smscrape = socialmediascraper.cli:main',
'snscrape = snscrape.cli:main',
],
},
)

View File

@@ -1,7 +1,7 @@
import argparse
import logging
import socialmediascraper.base
import socialmediascraper.modules
import snscrape.base
import snscrape.modules
logger = logging.getLogger(__name__)
@@ -15,7 +15,7 @@ def parse_args():
parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results')
subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
classes = socialmediascraper.base.Scraper.__subclasses__()
classes = snscrape.base.Scraper.__subclasses__()
for cls in classes:
subparser = subparsers.add_parser(cls.name, formatter_class = argparse.ArgumentDefaultsHelpFormatter)
cls.setup_parser(subparser)

View File

@@ -1,6 +1,6 @@
import importlib
import os
import socialmediascraper.base
import snscrape.base
def _import_modules():
@@ -8,7 +8,7 @@ def _import_modules():
for fn in files:
if fn.endswith('.py') and fn != '__init__.py':
# Import module if not already imported
moduleName = f'socialmediascraper.modules.{fn[:-3]}'
moduleName = f'snscrape.modules.{fn[:-3]}'
module = importlib.import_module(moduleName)

View File

@@ -2,14 +2,14 @@ import bs4
import json
import logging
import re
import socialmediascraper.base
import snscrape.base
import urllib.parse
logger = logging.getLogger(__name__)
class FacebookUserScraper(socialmediascraper.base.Scraper):
class FacebookUserScraper(snscrape.base.Scraper):
name = 'facebook-user'
def __init__(self, username, **kwargs):

View File

@@ -3,13 +3,13 @@ import itertools
import json
import logging
import re
import socialmediascraper.base
import snscrape.base
logger = logging.getLogger(__name__)
class GooglePlusUserScraper(socialmediascraper.base.Scraper):
class GooglePlusUserScraper(snscrape.base.Scraper):
name = 'googleplus-user'
def __init__(self, user, **kwargs):
@@ -48,7 +48,7 @@ class GooglePlusUserScraper(socialmediascraper.base.Scraper):
logger.info('User has no posts')
return
for postObj in response[0][7]:
yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
cursor = response[0][1] # 'ADSJ_x'
if cursor is None:
# No further pages
@@ -86,7 +86,7 @@ class GooglePlusUserScraper(socialmediascraper.base.Scraper):
response = json.JSONDecoder().raw_decode(''.join(garbage[pos:]))[0] # Parses only the first structure in the data stream without throwing an error about the extra data at the end
for postObj in response[0][2]['74333095'][0][7]:
yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
cursor = response[0][2]['74333095'][0][1]

View File

@@ -1,13 +1,13 @@
import hashlib
import json
import logging
import socialmediascraper.base
import snscrape.base
logger = logging.getLogger(__name__)
class InstagramUserScraper(socialmediascraper.base.Scraper):
class InstagramUserScraper(snscrape.base.Scraper):
name = 'instagram-user'
def __init__(self, username, **kwargs):
@@ -17,7 +17,7 @@ class InstagramUserScraper(socialmediascraper.base.Scraper):
def _response_to_items(self, response, username):
for node in response['user']['edge_owner_to_timeline_media']['edges']:
code = node['node']['shortcode']
yield socialmediascraper.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
yield snscrape.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
def get_items(self):
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

View File

@@ -1,13 +1,13 @@
import bs4
import json
import logging
import socialmediascraper.base
import snscrape.base
logger = logging.getLogger(__name__)
class TwitterSearchScraper(socialmediascraper.base.Scraper):
class TwitterSearchScraper(snscrape.base.Scraper):
name = 'twitter-search'
def __init__(self, query, **kwargs):
@@ -23,7 +23,7 @@ class TwitterSearchScraper(socialmediascraper.base.Scraper):
for tweet in feed:
username = tweet.find('span', 'username').find('b').text
tweetID = tweet['data-item-id']
yield socialmediascraper.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
yield snscrape.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
def _check_json_callback(self, r):
if r.headers['content-type'] != 'application/json;charset=utf-8':