From bc7f07418c8260d072dfe50566242c7eb99209b7 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:34:28 +0200 Subject: [PATCH 1/8] Refactored for PyPI and added Dockerfile --- Dockerfile | 11 + instagram_locations/__init__.py | 0 instagram_locations/instagram_locations.py | 234 +++++++++++++++++++++ instagram_locations/main.py | 10 + setup.py | 29 +++ 5 files changed, 284 insertions(+) create mode 100644 Dockerfile create mode 100644 instagram_locations/__init__.py create mode 100644 instagram_locations/instagram_locations.py create mode 100644 instagram_locations/main.py create mode 100644 setup.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a4a85a8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 + +FROM python:latest + +WORKDIR /app + +COPY . . + +RUN pip install --upgrade pip && pip install build && python -m build && pip install dist/*.whl + +ENTRYPOINT ["instagram_locations"] \ No newline at end of file diff --git a/instagram_locations/__init__.py b/instagram_locations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/instagram_locations/instagram_locations.py b/instagram_locations/instagram_locations.py new file mode 100644 index 0000000..3029251 --- /dev/null +++ b/instagram_locations/instagram_locations.py @@ -0,0 +1,234 @@ +import argparse +import csv +import json +import sys +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime, timezone +from itertools import product +from statistics import pstdev +from string import Template + +import requests + + +# gets instagram "locations" around a particular lat/lng using internal API +# (requires session cookie for authentication) +def get_instagram_locations(lat, lng, cookie): + timeout = 5.0 + lat_long = f"lat: {lat:.6f} | lng: {lng:.6f}" + url = "https://www.instagram.com/location_search/" + params = {"latitude": lat, "longitude": lng, "__a": 1} + headers = {"Cookie": cookie} + try: + response = requests.get(url, params=params, headers=headers, timeout=timeout) + except requests.exceptions.ConnectionError as e: + print(f"Connection failed for {lat_long}: {e}") + return [] + except requests.exceptions.Timeout: + print(f"Connections timed out after {timeout} seconds") + return [] + + try: + locations = response.json() + except json.JSONDecodeError: + print(f"Failed to get location data for {lat_long}") + return [] + + if not isinstance(locations, dict): + print(f"Got invalid response for {lat_long}") + return [] + + locations = locations.get("venues", []) + return locations + + +def get_instagram_locations_by_query(query): + locs = requests.get("https://www.instagram.com/web/search/topsearch/?context=place&query=" + query).json() + + return [v["place"]["location"] for v in locs["places"]] + + +# queries the instagram location API for several points around a central lat/lng +# in order to return additional results +def get_fuzzy_locations(lat, lng, cookie, sigma=2): + locs = get_instagram_locations(lat, lng, cookie) + loc_ids = {v["external_id"] for v in locs if "external_id" in v} + + std_lat = pstdev([v["lat"] for v in locs if "lat" in v]) + std_lng = pstdev([v["lng"] for v in locs if "lng" in v]) + + # filter to avoid calling with both lat and lng deltas equal zero (which would duplicate the call + # to obtain the initial loc) + deltas = ( + (lat + delta_lat * std_lat, lng + delta_lng * std_lng) + for delta_lat, delta_lng in filter(lambda x: any(x), product(range(-sigma, sigma + 1), repeat=2)) + ) + + # to change args order for convenient unpacking + insta_loc_func = lambda ckie, lt, ln: get_instagram_locations(lt, ln, ckie) + + with ThreadPoolExecutor() as ex: + results = ex.map(lambda x: insta_loc_func(cookie, *x), deltas) + + for new_locs in results: + for loc in new_locs: + if "external_id" in loc and loc["external_id"] not in loc_ids: + locs.append(loc) + loc_ids.add(loc["external_id"]) + + return locs + + +# converts list of instagram locations into valid geojson +def make_geojson(locations): + features = [] + + for location in [location for location in locations if "lng" in location]: + feature = { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [location["lng"], location["lat"]]}, + "properties": location, + } + features.append(feature) + + return {"type": "FeatureCollection", "features": features} + + +def encode_date(date_str: str): + """Convert date into Instagram "snowflake" ID""" + try: + date = datetime.strptime(date_str, "%Y-%m-%d") + except ValueError: + try: + date = datetime.strptime(date_str, "%Y-%m-%d") + except ValueError: + print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr) + sys.exit(1) + date = date.replace(hour=23, minute=59, second=59, tzinfo=timezone.utc) + date_ts = int(date.timestamp()) * 1000 # milliseconds + insta_epoch = date_ts - 1314220021300 + max_id_num = insta_epoch << 23 + + return str(max_id_num) + + +html_template = """ + + Instagram location visualizations + + + + + + + + + + +
+ + + +""" + + +def main(): + parser = argparse.ArgumentParser(description="Get a list of Instagram locations near a lat/lng") + parser.add_argument("--cookie", action="store", dest="cookie") + parser.add_argument("--json", action="store", dest="output") + parser.add_argument("--geojson", action="store", dest="geojson") + parser.add_argument("--map", action="store", dest="map") + parser.add_argument("--csv", action="store", dest="csv") + parser.add_argument("--lat", action="store", dest="lat") + parser.add_argument("--lng", action="store", dest="lng") + parser.add_argument("--date", action="store", dest="date") + parser.add_argument("--ids", action="store", dest="dump_ids") + + args = parser.parse_args() + + cookie = args.cookie + + date_var = "" + if args.date is not None: + date_var = "?max_id=" + encode_date(args.date) + + locations = get_fuzzy_locations(float(args.lat), float(args.lng), cookie) + + if args.output: + json.dump(locations, open(args.output, "w")) + + if args.geojson: + json.dump(make_geojson(locations), open(args.geojson, "w")) + + if args.map: + s = Template(html_template) + viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)), date_var=date_var) + + f = open(args.map, "w") + f.write(viz) + f.close() + + if args.csv: + for i in locations: + i["url"] = f"https://www.instagram.com/explore/locations/{i['external_id']}{date_var}" + + # leading empty string for 'id' column is for backward compatibility since that's the pandas behavior. + fieldnames = ["", "name", "external_id", "external_id_source", "lat", "lng", "address", "minimum_age", "url"] + + with open(args.csv, "w") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + for idx, row in enumerate(locations): + row[""] = idx + writer.writerow(row) + + if args.dump_ids: + ids = map(lambda loc: str(loc["external_id"]), locations) + with open(args.dump_ids, "w") as f: + f.write("\n".join(ids)) + \ No newline at end of file diff --git a/instagram_locations/main.py b/instagram_locations/main.py new file mode 100644 index 0000000..d571463 --- /dev/null +++ b/instagram_locations/main.py @@ -0,0 +1,10 @@ +from instagram_locations.instagram_locations import main + + +def start(): + try: + main() + except KeyboardInterrupt as ctrlc: + raise KeyboardInterrupt(ctrlc) from ctrlc + except Exception as err: + raise Exception(err) from err diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b78bf31 --- /dev/null +++ b/setup.py @@ -0,0 +1,29 @@ +import setuptools + +with open("README.md", "r", encoding="utf-8") as file: + long_description = file.read() + +setuptools.setup( + name="instagram-location-search", + version="1.0.0", + author="Bellingcat", + packages=["instagram_locations"], + description="Finds Instagram location IDs near a specified latitude and longitude.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://www.bellingcat.com", + license="MIT License", + install_requires=["requests"], + classifiers=[ + 'Intended Audience :: Information Technology', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Natural Language :: English', + 'Programming Language :: Python :: 3' + ], + entry_points={ + "console_scripts": [ + "instagram_locations=instagram_locations.main:start", + ] + }, +) \ No newline at end of file From 38ee854273f52513247fe9c1d82ec256166076c7 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:45:30 +0200 Subject: [PATCH 2/8] Update README.md --- README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index abffe3b..ff800cd 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,17 @@ # Instagram Location Search -## Prerequisites - -This Python application requires `requests` to be properly installed. This can be done with `pip3 install requests`. +## Installation +This Python application can be installed from PyPI using pip, and can be built into a Docker image +### Install from PyPI +`pip3 install instagram-location-search` +### Build Docker image +`docker build instagram-location-search .` ## Example usage The following command will search for Instagram locations nearby the coordinates 32.22 N, 110.97 W (downtown Tucson, Arizona.) The list of locations is saved as a CSV file at "locs.csv". -```python3 instagram-locations.py --cookie "" --lat 32.22 --lng -110.97 --csv locs.csv``` +```instagram_locations --cookie "" --lat 32.22 --lng -110.97 --csv locs.csv``` Note that this requires Instagram cookies in order to work! See below for how to obtain one from your account. @@ -26,7 +29,7 @@ Using the `--map ` command line argument, a simple Leaflet map Multiple types of output can be generated. For example, the following command will search for Instagram locations, save the JSON list, a CSV file, and a map for viewing the locations visually. -```python3 instagram-locations.py --cookie "" --lat 32.22 --lng -110.97 --json locs.json --csv locs.csv --map map.html``` +```instagram_locations --cookie "" --lat 32.22 --lng -110.97 --json locs.json --csv locs.csv --map map.html``` ## Sample Usage with `instagram-scraper` The ID list generated with the `--ids` flag can be passed into `instagram-scraper` to pull down image metadata. @@ -36,7 +39,7 @@ The ID list generated with the `--ids` flag can be passed into `instagram-scrape First, get the proximal location IDs of your target location: ```sh -python3 instagram-locations.py --cookies "" --lat --lng --ids location_ids.txt +instagram_locations --cookies "" --lat --lng --ids location_ids.txt ``` Be sure to install `instagram-scraper`: From 9ac7ff6d20c305f5276aa090aa41c4d4a42722b3 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:46:19 +0200 Subject: [PATCH 3/8] Update setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b78bf31..b56d46d 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setuptools.setup( long_description_content_type="text/markdown", url="https://www.bellingcat.com", license="MIT License", - install_requires=["requests"], + install_requires=["requests", "instagram-scraper"], classifiers=[ 'Intended Audience :: Information Technology', 'License :: OSI Approved :: MIT License', @@ -26,4 +26,4 @@ setuptools.setup( "instagram_locations=instagram_locations.main:start", ] }, -) \ No newline at end of file +) From fbc1c25a15f42fef9727144aa17c2dbdd6361e2c Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:46:51 +0200 Subject: [PATCH 4/8] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ff800cd..19d9721 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Instagram Location Search ## Installation -This Python application can be installed from PyPI using pip, and can be built into a Docker image +This Python application can be installed from PyPI using pip, and can also be built into a Docker image ### Install from PyPI `pip3 install instagram-location-search` From 709c85d74bc209f5fe40393e26ebebf40fb3483a Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 13 Jan 2023 07:18:04 +0200 Subject: [PATCH 5/8] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b56d46d..7feb055 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setuptools.setup( ], entry_points={ "console_scripts": [ - "instagram_locations=instagram_locations.main:start", + "instagram_locations=instagram_locations.instagram_locations:main", ] }, ) From 0c675e14c983ada0a310ad2548d00a6714cc2e95 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 13 Jan 2023 07:18:29 +0200 Subject: [PATCH 6/8] Delete instagram-locations.py --- instagram-locations.py | 237 ----------------------------------------- 1 file changed, 237 deletions(-) delete mode 100644 instagram-locations.py diff --git a/instagram-locations.py b/instagram-locations.py deleted file mode 100644 index 4323bac..0000000 --- a/instagram-locations.py +++ /dev/null @@ -1,237 +0,0 @@ -import argparse -import csv -import json -import sys -from concurrent.futures import ThreadPoolExecutor -from datetime import datetime, timezone -from itertools import product -from statistics import pstdev -from string import Template - -import requests - - -# gets instagram "locations" around a particular lat/lng using internal API -# (requires session cookie for authentication) -def get_instagram_locations(lat, lng, cookie): - timeout = 5.0 - lat_long = f"lat: {lat:.6f} | lng: {lng:.6f}" - url = "https://www.instagram.com/location_search/" - params = {"latitude": lat, "longitude": lng, "__a": 1} - headers = {"Cookie": cookie} - try: - response = requests.get(url, params=params, headers=headers, timeout=timeout) - except requests.exceptions.ConnectionError as e: - print(f"Connection failed for {lat_long}: {e}") - return [] - except requests.exceptions.Timeout: - print(f"Connections timed out after {timeout} seconds") - return [] - - try: - locations = response.json() - except json.JSONDecodeError: - print(f"Failed to get location data for {lat_long}") - return [] - - if not isinstance(locations, dict): - print(f"Got invalid response for {lat_long}") - return [] - - locations = locations.get("venues", []) - return locations - - -def get_instagram_locations_by_query(query): - locs = requests.get("https://www.instagram.com/web/search/topsearch/?context=place&query=" + query).json() - - return [v["place"]["location"] for v in locs["places"]] - - -# queries the instagram location API for several points around a central lat/lng -# in order to return additional results -def get_fuzzy_locations(lat, lng, cookie, sigma=2): - locs = get_instagram_locations(lat, lng, cookie) - loc_ids = {v["external_id"] for v in locs if "external_id" in v} - - std_lat = pstdev([v["lat"] for v in locs if "lat" in v]) - std_lng = pstdev([v["lng"] for v in locs if "lng" in v]) - - # filter to avoid calling with both lat and lng deltas equal zero (which would duplicate the call - # to obtain the initial loc) - deltas = ( - (lat + delta_lat * std_lat, lng + delta_lng * std_lng) - for delta_lat, delta_lng in filter(lambda x: any(x), product(range(-sigma, sigma + 1), repeat=2)) - ) - - # to change args order for convenient unpacking - insta_loc_func = lambda ckie, lt, ln: get_instagram_locations(lt, ln, ckie) - - with ThreadPoolExecutor() as ex: - results = ex.map(lambda x: insta_loc_func(cookie, *x), deltas) - - for new_locs in results: - for loc in new_locs: - if "external_id" in loc and loc["external_id"] not in loc_ids: - locs.append(loc) - loc_ids.add(loc["external_id"]) - - return locs - - -# converts list of instagram locations into valid geojson -def make_geojson(locations): - features = [] - - for location in [location for location in locations if "lng" in location]: - feature = { - "type": "Feature", - "geometry": {"type": "Point", "coordinates": [location["lng"], location["lat"]]}, - "properties": location, - } - features.append(feature) - - return {"type": "FeatureCollection", "features": features} - - -def encode_date(date_str: str): - """Convert date into Instagram "snowflake" ID""" - try: - date = datetime.strptime(date_str, "%Y-%m-%d") - except ValueError: - try: - date = datetime.strptime(date_str, "%Y-%m-%d") - except ValueError: - print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr) - sys.exit(1) - date = date.replace(hour=23, minute=59, second=59, tzinfo=timezone.utc) - date_ts = int(date.timestamp()) * 1000 # milliseconds - insta_epoch = date_ts - 1314220021300 - max_id_num = insta_epoch << 23 - - return str(max_id_num) - - -html_template = """ - - Instagram location visualizations - - - - - - - - - - -
- - - -""" - - -def main(): - parser = argparse.ArgumentParser(description="Get a list of Instagram locations near a lat/lng") - parser.add_argument("--cookie", action="store", dest="cookie") - parser.add_argument("--json", action="store", dest="output") - parser.add_argument("--geojson", action="store", dest="geojson") - parser.add_argument("--map", action="store", dest="map") - parser.add_argument("--csv", action="store", dest="csv") - parser.add_argument("--lat", action="store", dest="lat") - parser.add_argument("--lng", action="store", dest="lng") - parser.add_argument("--date", action="store", dest="date") - parser.add_argument("--ids", action="store", dest="dump_ids") - - args = parser.parse_args() - - cookie = args.cookie - - date_var = "" - if args.date is not None: - date_var = "?max_id=" + encode_date(args.date) - - locations = get_fuzzy_locations(float(args.lat), float(args.lng), cookie) - - if args.output: - json.dump(locations, open(args.output, "w")) - - if args.geojson: - json.dump(make_geojson(locations), open(args.geojson, "w")) - - if args.map: - s = Template(html_template) - viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)), date_var=date_var) - - f = open(args.map, "w") - f.write(viz) - f.close() - - if args.csv: - for i in locations: - i["url"] = f"https://www.instagram.com/explore/locations/{i['external_id']}{date_var}" - - # leading empty string for 'id' column is for backward compatibility since that's the pandas behavior. - fieldnames = ["", "name", "external_id", "external_id_source", "lat", "lng", "address", "minimum_age", "url"] - - with open(args.csv, "w") as f: - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - for idx, row in enumerate(locations): - row[""] = idx - writer.writerow(row) - - if args.dump_ids: - ids = map(lambda loc: str(loc["external_id"]), locations) - with open(args.dump_ids, "w") as f: - f.write("\n".join(ids)) - - -if __name__ == "__main__": - main() From 92c8abacba60ecad3572467362f80e553b7aaf2d Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Fri, 13 Jan 2023 07:23:03 +0200 Subject: [PATCH 7/8] Delete main.py --- instagram_locations/main.py | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 instagram_locations/main.py diff --git a/instagram_locations/main.py b/instagram_locations/main.py deleted file mode 100644 index d571463..0000000 --- a/instagram_locations/main.py +++ /dev/null @@ -1,10 +0,0 @@ -from instagram_locations.instagram_locations import main - - -def start(): - try: - main() - except KeyboardInterrupt as ctrlc: - raise KeyboardInterrupt(ctrlc) from ctrlc - except Exception as err: - raise Exception(err) from err From f07ca63b91b689b4a70fa58bf9ea05c2b1808b40 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Thu, 2 Feb 2023 14:15:21 +0200 Subject: [PATCH 8/8] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 19d9721..151e9fe 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ ## Installation This Python application can be installed from PyPI using pip, and can also be built into a Docker image -### Install from PyPI -`pip3 install instagram-location-search` +### Install with Pip +`pip3 install git+https://github.com/bellingcat/instagram-location-search` ### Build Docker image `docker build instagram-location-search .`