From bc7f07418c8260d072dfe50566242c7eb99209b7 Mon Sep 17 00:00:00 2001 From: Richard Mwewa <74001397+rly0nheart@users.noreply.github.com> Date: Tue, 10 Jan 2023 21:34:28 +0200 Subject: [PATCH] Refactored for PyPI and added Dockerfile --- Dockerfile | 11 + instagram_locations/__init__.py | 0 instagram_locations/instagram_locations.py | 234 +++++++++++++++++++++ instagram_locations/main.py | 10 + setup.py | 29 +++ 5 files changed, 284 insertions(+) create mode 100644 Dockerfile create mode 100644 instagram_locations/__init__.py create mode 100644 instagram_locations/instagram_locations.py create mode 100644 instagram_locations/main.py create mode 100644 setup.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a4a85a8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 + +FROM python:latest + +WORKDIR /app + +COPY . . + +RUN pip install --upgrade pip && pip install build && python -m build && pip install dist/*.whl + +ENTRYPOINT ["instagram_locations"] \ No newline at end of file diff --git a/instagram_locations/__init__.py b/instagram_locations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/instagram_locations/instagram_locations.py b/instagram_locations/instagram_locations.py new file mode 100644 index 0000000..3029251 --- /dev/null +++ b/instagram_locations/instagram_locations.py @@ -0,0 +1,234 @@ +import argparse +import csv +import json +import sys +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime, timezone +from itertools import product +from statistics import pstdev +from string import Template + +import requests + + +# gets instagram "locations" around a particular lat/lng using internal API +# (requires session cookie for authentication) +def get_instagram_locations(lat, lng, cookie): + timeout = 5.0 + lat_long = f"lat: {lat:.6f} | lng: {lng:.6f}" + url = "https://www.instagram.com/location_search/" + params = {"latitude": lat, "longitude": lng, "__a": 1} + headers = {"Cookie": cookie} + try: + response = requests.get(url, params=params, headers=headers, timeout=timeout) + except requests.exceptions.ConnectionError as e: + print(f"Connection failed for {lat_long}: {e}") + return [] + except requests.exceptions.Timeout: + print(f"Connections timed out after {timeout} seconds") + return [] + + try: + locations = response.json() + except json.JSONDecodeError: + print(f"Failed to get location data for {lat_long}") + return [] + + if not isinstance(locations, dict): + print(f"Got invalid response for {lat_long}") + return [] + + locations = locations.get("venues", []) + return locations + + +def get_instagram_locations_by_query(query): + locs = requests.get("https://www.instagram.com/web/search/topsearch/?context=place&query=" + query).json() + + return [v["place"]["location"] for v in locs["places"]] + + +# queries the instagram location API for several points around a central lat/lng +# in order to return additional results +def get_fuzzy_locations(lat, lng, cookie, sigma=2): + locs = get_instagram_locations(lat, lng, cookie) + loc_ids = {v["external_id"] for v in locs if "external_id" in v} + + std_lat = pstdev([v["lat"] for v in locs if "lat" in v]) + std_lng = pstdev([v["lng"] for v in locs if "lng" in v]) + + # filter to avoid calling with both lat and lng deltas equal zero (which would duplicate the call + # to obtain the initial loc) + deltas = ( + (lat + delta_lat * std_lat, lng + delta_lng * std_lng) + for delta_lat, delta_lng in filter(lambda x: any(x), product(range(-sigma, sigma + 1), repeat=2)) + ) + + # to change args order for convenient unpacking + insta_loc_func = lambda ckie, lt, ln: get_instagram_locations(lt, ln, ckie) + + with ThreadPoolExecutor() as ex: + results = ex.map(lambda x: insta_loc_func(cookie, *x), deltas) + + for new_locs in results: + for loc in new_locs: + if "external_id" in loc and loc["external_id"] not in loc_ids: + locs.append(loc) + loc_ids.add(loc["external_id"]) + + return locs + + +# converts list of instagram locations into valid geojson +def make_geojson(locations): + features = [] + + for location in [location for location in locations if "lng" in location]: + feature = { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [location["lng"], location["lat"]]}, + "properties": location, + } + features.append(feature) + + return {"type": "FeatureCollection", "features": features} + + +def encode_date(date_str: str): + """Convert date into Instagram "snowflake" ID""" + try: + date = datetime.strptime(date_str, "%Y-%m-%d") + except ValueError: + try: + date = datetime.strptime(date_str, "%Y-%m-%d") + except ValueError: + print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr) + sys.exit(1) + date = date.replace(hour=23, minute=59, second=59, tzinfo=timezone.utc) + date_ts = int(date.timestamp()) * 1000 # milliseconds + insta_epoch = date_ts - 1314220021300 + max_id_num = insta_epoch << 23 + + return str(max_id_num) + + +html_template = """ +
+