mirror of
https://github.com/bellingcat/instagram-location-search.git
synced 2026-06-08 02:28:29 +03:00
Blackify
This commit is contained in:
@@ -1,13 +1,14 @@
|
||||
import requests
|
||||
import argparse
|
||||
import json
|
||||
from string import Template
|
||||
from datetime import datetime, timezone
|
||||
import sys
|
||||
from statistics import pstdev
|
||||
from itertools import product
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime, timezone
|
||||
from itertools import product
|
||||
from statistics import pstdev
|
||||
from string import Template
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
# gets instagram "locations" around a particular lat/lng using internal API
|
||||
@@ -17,7 +18,7 @@ def get_instagram_locations(lat, lng, cookie):
|
||||
lat_long = f"lat: {lat:.6f} | lng: {lng:.6f}"
|
||||
url = "https://www.instagram.com/location_search/"
|
||||
params = {"latitude": lat, "longitude": lng, "__a": 1}
|
||||
headers = {'Cookie': cookie}
|
||||
headers = {"Cookie": cookie}
|
||||
try:
|
||||
response = requests.get(url, params=params, headers=headers, timeout=timeout)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
@@ -44,25 +45,23 @@ def get_instagram_locations(lat, lng, cookie):
|
||||
def get_instagram_locations_by_query(query):
|
||||
locs = requests.get("https://www.instagram.com/web/search/topsearch/?context=place&query=" + query).json()
|
||||
|
||||
return [v['place']['location'] for v in locs['places']]
|
||||
return [v["place"]["location"] for v in locs["places"]]
|
||||
|
||||
|
||||
# queries the instagram location API for several points around a central lat/lng
|
||||
# in order to return additional results
|
||||
def get_fuzzy_locations(lat, lng, cookie, sigma=2):
|
||||
locs = get_instagram_locations(lat, lng, cookie)
|
||||
loc_ids = {v['external_id'] for v in locs if "external_id" in v}
|
||||
loc_ids = {v["external_id"] for v in locs if "external_id" in v}
|
||||
|
||||
std_lat = pstdev([v['lat'] for v in locs if 'lat' in v])
|
||||
std_lng = pstdev([v['lng'] for v in locs if 'lng' in v])
|
||||
std_lat = pstdev([v["lat"] for v in locs if "lat" in v])
|
||||
std_lng = pstdev([v["lng"] for v in locs if "lng" in v])
|
||||
|
||||
# filter to avoid calling with both lat and lng deltas equal zero (which would duplicate the call
|
||||
# to obtain the initial loc)
|
||||
deltas = (
|
||||
(
|
||||
lat + delta_lat * std_lat,
|
||||
lng + delta_lng * std_lng
|
||||
) for delta_lat, delta_lng in filter(lambda x: any(x), product(range(-sigma, sigma + 1), repeat=2))
|
||||
(lat + delta_lat * std_lat, lng + delta_lng * std_lng)
|
||||
for delta_lat, delta_lng in filter(lambda x: any(x), product(range(-sigma, sigma + 1), repeat=2))
|
||||
)
|
||||
|
||||
# to change args order for convenient unpacking
|
||||
@@ -73,7 +72,7 @@ def get_fuzzy_locations(lat, lng, cookie, sigma=2):
|
||||
|
||||
for new_locs in results:
|
||||
for loc in new_locs:
|
||||
if 'external_id' in loc and loc['external_id'] not in loc_ids:
|
||||
if "external_id" in loc and loc["external_id"] not in loc_ids:
|
||||
locs.append(loc)
|
||||
loc_ids.add(loc["external_id"])
|
||||
|
||||
@@ -84,26 +83,24 @@ def get_fuzzy_locations(lat, lng, cookie, sigma=2):
|
||||
def make_geojson(locations):
|
||||
features = []
|
||||
|
||||
for location in [location for location in locations if 'lng' in location]:
|
||||
for location in [location for location in locations if "lng" in location]:
|
||||
feature = {
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [location["lng"], location["lat"]]
|
||||
},
|
||||
"properties": location}
|
||||
"geometry": {"type": "Point", "coordinates": [location["lng"], location["lat"]]},
|
||||
"properties": location,
|
||||
}
|
||||
features.append(feature)
|
||||
|
||||
return {"type": "FeatureCollection", "features": features}
|
||||
|
||||
|
||||
def encode_date(date_str: str):
|
||||
'''Convert date into Instagram "snowflake" ID'''
|
||||
"""Convert date into Instagram "snowflake" ID"""
|
||||
try:
|
||||
date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
date = datetime.strptime(date_str, "%Y-%m-%d")
|
||||
except ValueError:
|
||||
try:
|
||||
date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
date = datetime.strptime(date_str, "%Y-%m-%d")
|
||||
except ValueError:
|
||||
print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -115,7 +112,7 @@ def encode_date(date_str: str):
|
||||
return str(max_id_num)
|
||||
|
||||
|
||||
html_template = '''<html>
|
||||
html_template = """<html>
|
||||
<head>
|
||||
<title>Instagram location visualizations</title>
|
||||
|
||||
@@ -177,7 +174,7 @@ html_template = '''<html>
|
||||
centerMarker._icon.classList.add('selected-location');
|
||||
</script>
|
||||
</body>
|
||||
</html>'''
|
||||
</html>"""
|
||||
|
||||
|
||||
def main():
|
||||
@@ -194,25 +191,25 @@ def main():
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cookie = 'sessionid=' + args.session
|
||||
cookie = "sessionid=" + args.session
|
||||
|
||||
date_var = ''
|
||||
date_var = ""
|
||||
if args.date is not None:
|
||||
date_var = '?max_id=' + encode_date(args.date)
|
||||
date_var = "?max_id=" + encode_date(args.date)
|
||||
|
||||
locations = get_fuzzy_locations(float(args.lat), float(args.lng), cookie)
|
||||
|
||||
if args.output:
|
||||
json.dump(locations, open(args.output, 'w'))
|
||||
json.dump(locations, open(args.output, "w"))
|
||||
|
||||
if args.geojson:
|
||||
json.dump(make_geojson(locations), open(args.geojson, 'w'))
|
||||
json.dump(make_geojson(locations), open(args.geojson, "w"))
|
||||
|
||||
if args.map:
|
||||
s = Template(html_template)
|
||||
viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)), date_var=date_var)
|
||||
|
||||
f = open(args.map, 'w')
|
||||
f = open(args.map, "w")
|
||||
f.write(viz)
|
||||
f.close()
|
||||
|
||||
@@ -221,19 +218,19 @@ def main():
|
||||
i["url"] = f"https://www.instagram.com/explore/locations/{i['external_id']}{date_var}"
|
||||
|
||||
# leading empty string for 'id' column is for backward compatibility since that's the pandas behavior.
|
||||
fieldnames = ['', 'name', 'external_id', 'external_id_source', 'lat', 'lng', 'address', 'minimum_age', 'url']
|
||||
fieldnames = ["", "name", "external_id", "external_id_source", "lat", "lng", "address", "minimum_age", "url"]
|
||||
|
||||
with open(args.csv, "w") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for idx, row in enumerate(locations):
|
||||
row[''] = idx
|
||||
row[""] = idx
|
||||
writer.writerow(row)
|
||||
|
||||
if args.dump_ids:
|
||||
ids = map(lambda loc: str(loc['external_id']), locations)
|
||||
with open(args.dump_ids, 'w') as f:
|
||||
f.write('\n'.join(ids))
|
||||
ids = map(lambda loc: str(loc["external_id"]), locations)
|
||||
with open(args.dump_ids, "w") as f:
|
||||
f.write("\n".join(ids))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user