mirror of
https://github.com/bellingcat/instagram-location-search.git
synced 2026-06-08 02:28:29 +03:00
Add --date argument filter
This commit is contained in:
17
README.md
17
README.md
@@ -12,6 +12,23 @@ The following command will search for Instagram locations nearby the coordinates
|
||||
|
||||
Note that this requires an Instagram session ID in order to work! See below for how to obtain one from your account.
|
||||
|
||||
## Example usage with date
|
||||
|
||||
The following command will search for Instagram locations near Seattle's "Capitol Hill Autonomous Zone" during the George Floyd
|
||||
protests in early June, 2020. Not all location pages in the area will have posts relevant to the Zone, but some do. Open the
|
||||
resulting `map.html` file in your browser to view locations.
|
||||
|
||||
```python3 instagram-locations.py --session "<session-id-token>" --lat 47.6164311 --lng -122.3203952 --map map.html --date 2020-06-09```
|
||||
|
||||
When using the `--date` argument, links to Instagram location pages will be filtered to show posts created on this date or earlier.
|
||||
Instagram will usually first show a 3x3 grid of "Top Images and Videos" that are more recent, however once you scroll past that
|
||||
there is a section labeled "Most recent" which will show the posts sorted by date (if any).
|
||||
These links are only used together with the `--csv` and `--map` arguments, they aren't included in `--json` or `--geojson`.
|
||||
Note: Instagram treats these dates as "UTC", which is a timezone near Great Britain. If your target location is far from this zone,
|
||||
it's worth adding a couple of days to your filter to make sure you capture all relevant posts. Also, this only specifies the
|
||||
*maximum* post date that can be displayed. If nothing was posted that day at that location, it will show older posts (sometimes
|
||||
even multiple years older).
|
||||
|
||||
### Other output formats
|
||||
|
||||
Using the `--json <output-location>` command line argument, the list can be saved as a JSON file, almost identical to the raw API response.
|
||||
|
||||
@@ -4,6 +4,8 @@ import pandas as pd
|
||||
import argparse
|
||||
import json
|
||||
from string import Template
|
||||
from datetime import datetime, timezone
|
||||
import sys
|
||||
|
||||
# gets instagram "locations" around a particular lat/lng using internal API
|
||||
# (requires session cookie for authentication)
|
||||
@@ -11,7 +13,6 @@ def get_instagram_locations(lat, lng, cookie):
|
||||
locs = requests.get("https://www.instagram.com/location_search/?latitude=" + str(lat) + "&longitude=" + str(lng) + "&__a=1", headers={
|
||||
'Cookie': cookie
|
||||
}).json()
|
||||
|
||||
return locs['venues']
|
||||
|
||||
|
||||
@@ -55,6 +56,23 @@ def make_geojson(locs):
|
||||
|
||||
return {"type": "FeatureCollection", "features": features}
|
||||
|
||||
def encode_date(date_str: str):
|
||||
'''Convert date into Instagram "snowflake" ID'''
|
||||
try:
|
||||
date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
try:
|
||||
date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
date = date.replace(hour=23, minute=59, second=59, tzinfo=timezone.utc)
|
||||
date_ts = int(date.timestamp()) * 1000 # milliseconds
|
||||
insta_epoch = date_ts - 1314220021300
|
||||
max_id_num = insta_epoch << 23
|
||||
|
||||
return str(max_id_num)
|
||||
|
||||
html_template = '''<html>
|
||||
<head>
|
||||
<title>Instagram location visualizations</title>
|
||||
@@ -102,7 +120,7 @@ html_template = '''<html>
|
||||
}).addTo(map);
|
||||
|
||||
function onEachFeature(feature, layer) {
|
||||
layer.bindPopup(`<a href="https://www.instagram.com/explore/locations/` + feature.properties.external_id + `">` + feature.properties.name + `</a><br />` + feature.properties.address );
|
||||
layer.bindPopup(`<a href="https://www.instagram.com/explore/locations/` + feature.properties.external_id + `$date_var">` + feature.properties.name + `</a><br />` + feature.properties.address );
|
||||
}
|
||||
|
||||
L.geoJSON(locs, {
|
||||
@@ -121,11 +139,16 @@ def main():
|
||||
parser.add_argument("--csv", action="store", dest="csv")
|
||||
parser.add_argument("--lat", action="store", dest="lat")
|
||||
parser.add_argument("--lng", action="store", dest="lng")
|
||||
parser.add_argument("--date", action="store", dest="date")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cookie = 'sessionid=' + args.session
|
||||
|
||||
date_var = ''
|
||||
if args.date is not None:
|
||||
date_var = '?max_id=' + encode_date(args.date)
|
||||
|
||||
locations = get_fuzzy_locations(float(args.lat), float(args.lng), cookie)
|
||||
|
||||
if (args.output):
|
||||
@@ -136,7 +159,7 @@ def main():
|
||||
|
||||
if (args.map):
|
||||
s = Template(html_template)
|
||||
viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)))
|
||||
viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)), date_var=date_var)
|
||||
|
||||
f = open(args.map, 'w')
|
||||
f.write(viz)
|
||||
@@ -144,7 +167,7 @@ def main():
|
||||
|
||||
if (args.csv):
|
||||
df = pd.DataFrame(locations)
|
||||
df['url'] = df['external_id'].apply(lambda v: 'https://www.instagram.com/explore/locations/' + str(v))
|
||||
df['url'] = df['external_id'].apply(lambda v: 'https://www.instagram.com/explore/locations/' + str(v) + date_var)
|
||||
df.to_csv(args.csv)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user