mirror of
https://github.com/bellingcat/snscrape.git
synced 2026-06-11 03:48:29 +03:00
Rename from socialmediascraper to snscrape
This commit is contained in:
8
setup.py
8
setup.py
@@ -2,14 +2,14 @@ import setuptools
|
||||
|
||||
|
||||
setuptools.setup(
|
||||
name = 'socialmediascraper',
|
||||
name = 'snscrape',
|
||||
version = '0.0-dev',
|
||||
description = 'A social media scraper',
|
||||
packages = ['socialmediascraper'],
|
||||
description = 'A social network service scraper',
|
||||
packages = ['snscrape'],
|
||||
install_requires = ['requests', 'lxml', 'beautifulsoup4'],
|
||||
entry_points = {
|
||||
'console_scripts': [
|
||||
'smscrape = socialmediascraper.cli:main',
|
||||
'snscrape = snscrape.cli:main',
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import argparse
|
||||
import logging
|
||||
import socialmediascraper.base
|
||||
import socialmediascraper.modules
|
||||
import snscrape.base
|
||||
import snscrape.modules
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -15,7 +15,7 @@ def parse_args():
|
||||
parser.add_argument('-n', '--max-results', dest = 'maxResults', type = int, metavar = 'N', help = 'Only return the first N results')
|
||||
|
||||
subparsers = parser.add_subparsers(dest = 'scraper', help = 'The scraper you want to use')
|
||||
classes = socialmediascraper.base.Scraper.__subclasses__()
|
||||
classes = snscrape.base.Scraper.__subclasses__()
|
||||
for cls in classes:
|
||||
subparser = subparsers.add_parser(cls.name, formatter_class = argparse.ArgumentDefaultsHelpFormatter)
|
||||
cls.setup_parser(subparser)
|
||||
@@ -1,6 +1,6 @@
|
||||
import importlib
|
||||
import os
|
||||
import socialmediascraper.base
|
||||
import snscrape.base
|
||||
|
||||
|
||||
def _import_modules():
|
||||
@@ -8,7 +8,7 @@ def _import_modules():
|
||||
for fn in files:
|
||||
if fn.endswith('.py') and fn != '__init__.py':
|
||||
# Import module if not already imported
|
||||
moduleName = f'socialmediascraper.modules.{fn[:-3]}'
|
||||
moduleName = f'snscrape.modules.{fn[:-3]}'
|
||||
module = importlib.import_module(moduleName)
|
||||
|
||||
|
||||
@@ -2,14 +2,14 @@ import bs4
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import socialmediascraper.base
|
||||
import snscrape.base
|
||||
import urllib.parse
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FacebookUserScraper(socialmediascraper.base.Scraper):
|
||||
class FacebookUserScraper(snscrape.base.Scraper):
|
||||
name = 'facebook-user'
|
||||
|
||||
def __init__(self, username, **kwargs):
|
||||
@@ -3,13 +3,13 @@ import itertools
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import socialmediascraper.base
|
||||
import snscrape.base
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GooglePlusUserScraper(socialmediascraper.base.Scraper):
|
||||
class GooglePlusUserScraper(snscrape.base.Scraper):
|
||||
name = 'googleplus-user'
|
||||
|
||||
def __init__(self, user, **kwargs):
|
||||
@@ -48,7 +48,7 @@ class GooglePlusUserScraper(socialmediascraper.base.Scraper):
|
||||
logger.info('User has no posts')
|
||||
return
|
||||
for postObj in response[0][7]:
|
||||
yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
|
||||
yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
|
||||
cursor = response[0][1] # 'ADSJ_x'
|
||||
if cursor is None:
|
||||
# No further pages
|
||||
@@ -86,7 +86,7 @@ class GooglePlusUserScraper(socialmediascraper.base.Scraper):
|
||||
response = json.JSONDecoder().raw_decode(''.join(garbage[pos:]))[0] # Parses only the first structure in the data stream without throwing an error about the extra data at the end
|
||||
|
||||
for postObj in response[0][2]['74333095'][0][7]:
|
||||
yield socialmediascraper.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
|
||||
yield snscrape.base.URLItem(f'https://plus.google.com/{postObj[6]["33558957"][21]}')
|
||||
|
||||
cursor = response[0][2]['74333095'][0][1]
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import socialmediascraper.base
|
||||
import snscrape.base
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InstagramUserScraper(socialmediascraper.base.Scraper):
|
||||
class InstagramUserScraper(snscrape.base.Scraper):
|
||||
name = 'instagram-user'
|
||||
|
||||
def __init__(self, username, **kwargs):
|
||||
@@ -17,7 +17,7 @@ class InstagramUserScraper(socialmediascraper.base.Scraper):
|
||||
def _response_to_items(self, response, username):
|
||||
for node in response['user']['edge_owner_to_timeline_media']['edges']:
|
||||
code = node['node']['shortcode']
|
||||
yield socialmediascraper.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
|
||||
yield snscrape.base.URLItem(f'https://www.instagram.com/p/{code}/?taken-by={username}') #TODO: Do we want the taken-by parameter in here?
|
||||
|
||||
def get_items(self):
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||
@@ -1,13 +1,13 @@
|
||||
import bs4
|
||||
import json
|
||||
import logging
|
||||
import socialmediascraper.base
|
||||
import snscrape.base
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TwitterSearchScraper(socialmediascraper.base.Scraper):
|
||||
class TwitterSearchScraper(snscrape.base.Scraper):
|
||||
name = 'twitter-search'
|
||||
|
||||
def __init__(self, query, **kwargs):
|
||||
@@ -23,7 +23,7 @@ class TwitterSearchScraper(socialmediascraper.base.Scraper):
|
||||
for tweet in feed:
|
||||
username = tweet.find('span', 'username').find('b').text
|
||||
tweetID = tweet['data-item-id']
|
||||
yield socialmediascraper.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
|
||||
yield snscrape.base.URLItem(f'https://twitter.com/{username}/status/{tweetID}')
|
||||
|
||||
def _check_json_callback(self, r):
|
||||
if r.headers['content-type'] != 'application/json;charset=utf-8':
|
||||
Reference in New Issue
Block a user