This commit is contained in:
Ivan Kazakov
2022-03-26 19:45:52 +03:00
parent b3e8de3b5a
commit 04d92faf97

View File

@@ -1,13 +1,14 @@
import requests
import argparse
import json
from string import Template
from datetime import datetime, timezone
import sys
from statistics import pstdev
from itertools import product
import csv
import json
import sys
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone
from itertools import product
from statistics import pstdev
from string import Template
import requests
# gets instagram "locations" around a particular lat/lng using internal API
@@ -17,7 +18,7 @@ def get_instagram_locations(lat, lng, cookie):
lat_long = f"lat: {lat:.6f} | lng: {lng:.6f}"
url = "https://www.instagram.com/location_search/"
params = {"latitude": lat, "longitude": lng, "__a": 1}
headers = {'Cookie': cookie}
headers = {"Cookie": cookie}
try:
response = requests.get(url, params=params, headers=headers, timeout=timeout)
except requests.exceptions.ConnectionError as e:
@@ -44,25 +45,23 @@ def get_instagram_locations(lat, lng, cookie):
def get_instagram_locations_by_query(query):
locs = requests.get("https://www.instagram.com/web/search/topsearch/?context=place&query=" + query).json()
return [v['place']['location'] for v in locs['places']]
return [v["place"]["location"] for v in locs["places"]]
# queries the instagram location API for several points around a central lat/lng
# in order to return additional results
def get_fuzzy_locations(lat, lng, cookie, sigma=2):
locs = get_instagram_locations(lat, lng, cookie)
loc_ids = {v['external_id'] for v in locs if "external_id" in v}
loc_ids = {v["external_id"] for v in locs if "external_id" in v}
std_lat = pstdev([v['lat'] for v in locs if 'lat' in v])
std_lng = pstdev([v['lng'] for v in locs if 'lng' in v])
std_lat = pstdev([v["lat"] for v in locs if "lat" in v])
std_lng = pstdev([v["lng"] for v in locs if "lng" in v])
# filter to avoid calling with both lat and lng deltas equal zero (which would duplicate the call
# to obtain the initial loc)
deltas = (
(
lat + delta_lat * std_lat,
lng + delta_lng * std_lng
) for delta_lat, delta_lng in filter(lambda x: any(x), product(range(-sigma, sigma + 1), repeat=2))
(lat + delta_lat * std_lat, lng + delta_lng * std_lng)
for delta_lat, delta_lng in filter(lambda x: any(x), product(range(-sigma, sigma + 1), repeat=2))
)
# to change args order for convenient unpacking
@@ -73,7 +72,7 @@ def get_fuzzy_locations(lat, lng, cookie, sigma=2):
for new_locs in results:
for loc in new_locs:
if 'external_id' in loc and loc['external_id'] not in loc_ids:
if "external_id" in loc and loc["external_id"] not in loc_ids:
locs.append(loc)
loc_ids.add(loc["external_id"])
@@ -84,26 +83,24 @@ def get_fuzzy_locations(lat, lng, cookie, sigma=2):
def make_geojson(locations):
features = []
for location in [location for location in locations if 'lng' in location]:
for location in [location for location in locations if "lng" in location]:
feature = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [location["lng"], location["lat"]]
},
"properties": location}
"geometry": {"type": "Point", "coordinates": [location["lng"], location["lat"]]},
"properties": location,
}
features.append(feature)
return {"type": "FeatureCollection", "features": features}
def encode_date(date_str: str):
'''Convert date into Instagram "snowflake" ID'''
"""Convert date into Instagram "snowflake" ID"""
try:
date = datetime.strptime(date_str, '%Y-%m-%d')
date = datetime.strptime(date_str, "%Y-%m-%d")
except ValueError:
try:
date = datetime.strptime(date_str, '%Y-%m-%d')
date = datetime.strptime(date_str, "%Y-%m-%d")
except ValueError:
print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr)
sys.exit(1)
@@ -115,7 +112,7 @@ def encode_date(date_str: str):
return str(max_id_num)
html_template = '''<html>
html_template = """<html>
<head>
<title>Instagram location visualizations</title>
@@ -177,7 +174,7 @@ html_template = '''<html>
centerMarker._icon.classList.add('selected-location');
</script>
</body>
</html>'''
</html>"""
def main():
@@ -194,25 +191,25 @@ def main():
args = parser.parse_args()
cookie = 'sessionid=' + args.session
cookie = "sessionid=" + args.session
date_var = ''
date_var = ""
if args.date is not None:
date_var = '?max_id=' + encode_date(args.date)
date_var = "?max_id=" + encode_date(args.date)
locations = get_fuzzy_locations(float(args.lat), float(args.lng), cookie)
if args.output:
json.dump(locations, open(args.output, 'w'))
json.dump(locations, open(args.output, "w"))
if args.geojson:
json.dump(make_geojson(locations), open(args.geojson, 'w'))
json.dump(make_geojson(locations), open(args.geojson, "w"))
if args.map:
s = Template(html_template)
viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)), date_var=date_var)
f = open(args.map, 'w')
f = open(args.map, "w")
f.write(viz)
f.close()
@@ -221,19 +218,19 @@ def main():
i["url"] = f"https://www.instagram.com/explore/locations/{i['external_id']}{date_var}"
# leading empty string for 'id' column is for backward compatibility since that's the pandas behavior.
fieldnames = ['', 'name', 'external_id', 'external_id_source', 'lat', 'lng', 'address', 'minimum_age', 'url']
fieldnames = ["", "name", "external_id", "external_id_source", "lat", "lng", "address", "minimum_age", "url"]
with open(args.csv, "w") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for idx, row in enumerate(locations):
row[''] = idx
row[""] = idx
writer.writerow(row)
if args.dump_ids:
ids = map(lambda loc: str(loc['external_id']), locations)
with open(args.dump_ids, 'w') as f:
f.write('\n'.join(ids))
ids = map(lambda loc: str(loc["external_id"]), locations)
with open(args.dump_ids, "w") as f:
f.write("\n".join(ids))
if __name__ == "__main__":