import requests import numpy as np import pandas as pd import argparse import json from string import Template from datetime import datetime, timezone import sys # gets instagram "locations" around a particular lat/lng using internal API # (requires session cookie for authentication) def get_instagram_locations(lat, lng, cookie): locs = requests.get("https://www.instagram.com/location_search/?latitude=" + str(lat) + "&longitude=" + str(lng) + "&__a=1", headers={ 'Cookie': cookie }).json() return locs['venues'] def get_instagram_locations_by_query(query): locs = requests.get("https://www.instagram.com/web/search/topsearch/?context=place&query=" + query).json() return [v['place']['location'] for v in locs['places']] # queries the instagram location API for several points around a central lat/lng # in order to return additional results def get_fuzzy_locations(lat, lng, cookie, sigma=2): locs = get_instagram_locations(lat, lng, cookie) std_lat = np.std([v['lat'] for v in locs if 'lat' in v]) std_lng = np.std([v['lng'] for v in locs if 'lng' in v]) for delta_lat in range(-sigma, sigma+1): for delta_lng in range(-sigma, sigma+1): new_locs = get_instagram_locations(lat + delta_lat * std_lat, lng + delta_lng * std_lng, cookie) loc_ids = [v['external_id'] for v in locs] for loc in new_locs: if loc['external_id'] not in loc_ids: locs.append(loc) return locs # converts list of instagram locations into valid geojson def make_geojson(locations): features = [] for location in [location for location in locations if 'lng' in location]: feature = { "type": "Feature", "geometry": { "type": "Point", "coordinates": [location["lng"], location["lat"]] }, "properties": location} features.append(feature) return {"type": "FeatureCollection", "features": features} def encode_date(date_str: str): '''Convert date into Instagram "snowflake" ID''' try: date = datetime.strptime(date_str, '%Y-%m-%d') except ValueError: try: date = datetime.strptime(date_str, '%Y-%m-%d') except ValueError: print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr) sys.exit(1) date = date.replace(hour=23, minute=59, second=59, tzinfo=timezone.utc) date_ts = int(date.timestamp()) * 1000 # milliseconds insta_epoch = date_ts - 1314220021300 max_id_num = insta_epoch << 23 return str(max_id_num) html_template = ''' Instagram location visualizations
''' def main(): parser = argparse.ArgumentParser(description="Get a list of Instagram locations near a lat/lng") parser.add_argument("--cookie", action="store", dest="cookie") parser.add_argument("--json", action="store", dest="output") parser.add_argument("--geojson", action="store", dest="geojson") parser.add_argument("--map", action="store", dest="map") parser.add_argument("--csv", action="store", dest="csv") parser.add_argument("--lat", action="store", dest="lat") parser.add_argument("--lng", action="store", dest="lng") parser.add_argument("--date", action="store", dest="date") parser.add_argument("--ids", action="store", dest="dump_ids") args = parser.parse_args() cookie = args.cookie date_var = '' if args.date is not None: date_var = '?max_id=' + encode_date(args.date) locations = get_fuzzy_locations(float(args.lat), float(args.lng), cookie) if (args.output): json.dump(locations, open(args.output, 'w')) if (args.geojson): json.dump(make_geojson(locations), open(args.geojson, 'w')) if (args.map): s = Template(html_template) viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)), date_var=date_var) f = open(args.map, 'w') f.write(viz) f.close() if (args.csv): df = pd.DataFrame(locations) df['url'] = df['external_id'].apply(lambda v: 'https://www.instagram.com/explore/locations/' + str(v) + date_var) df.to_csv(args.csv) if (args.dump_ids): ids = map(lambda loc: str(loc['external_id']), locations) with open(args.dump_ids, 'w') as f: f.write('\n'.join(ids)) if __name__ == "__main__": main()