mirror of
https://github.com/bellingcat/instagram-location-search.git
synced 2026-06-08 02:28:29 +03:00
Merge pull request #1 from nemec/main
Add a --date filter and some other quality-of-life improvements to the generated data
This commit is contained in:
19
README.md
19
README.md
@@ -2,7 +2,7 @@
|
||||
|
||||
## Prerequisites
|
||||
|
||||
This Python application requires `requests` and `numpy` to be properly installed. This can be done with `pip3 install requests numpy`.
|
||||
This Python application requires `requests`, `numpy`, and `pandas` to be properly installed. This can be done with `pip3 install requests numpy pandas`.
|
||||
|
||||
## Example usage
|
||||
|
||||
@@ -12,6 +12,23 @@ The following command will search for Instagram locations nearby the coordinates
|
||||
|
||||
Note that this requires an Instagram session ID in order to work! See below for how to obtain one from your account.
|
||||
|
||||
## Example usage with date
|
||||
|
||||
The following command will search for Instagram locations near Seattle's "Capitol Hill Autonomous Zone" during the George Floyd
|
||||
protests in early June, 2020. Not all location pages in the area will have posts relevant to the Zone, but some do. Open the
|
||||
resulting `map.html` file in your browser to view locations.
|
||||
|
||||
```python3 instagram-locations.py --session "<session-id-token>" --lat 47.6164311 --lng -122.3203952 --map map.html --date 2020-06-09```
|
||||
|
||||
When using the `--date` argument, links to Instagram location pages will be filtered to show posts created on this date or earlier.
|
||||
Instagram will usually first show a 3x3 grid of "Top Images and Videos" that are more recent, however once you scroll past that
|
||||
there is a section labeled "Most recent" which will show the posts sorted by date (if any).
|
||||
These links are only used together with the `--csv` and `--map` arguments, they aren't included in `--json` or `--geojson`.
|
||||
Note: Instagram treats these dates as "UTC", which is a timezone near Great Britain. If your target location is far from this zone,
|
||||
it's worth adding a couple of days to your filter to make sure you capture all relevant posts. Also, this only specifies the
|
||||
*maximum* post date that can be displayed. If nothing was posted that day at that location, it will show older posts (sometimes
|
||||
even multiple years older).
|
||||
|
||||
### Other output formats
|
||||
|
||||
Using the `--json <output-location>` command line argument, the list can be saved as a JSON file, almost identical to the raw API response.
|
||||
|
||||
@@ -4,6 +4,8 @@ import pandas as pd
|
||||
import argparse
|
||||
import json
|
||||
from string import Template
|
||||
from datetime import datetime, timezone
|
||||
import sys
|
||||
|
||||
# gets instagram "locations" around a particular lat/lng using internal API
|
||||
# (requires session cookie for authentication)
|
||||
@@ -11,7 +13,6 @@ def get_instagram_locations(lat, lng, cookie):
|
||||
locs = requests.get("https://www.instagram.com/location_search/?latitude=" + str(lat) + "&longitude=" + str(lng) + "&__a=1", headers={
|
||||
'Cookie': cookie
|
||||
}).json()
|
||||
|
||||
return locs['venues']
|
||||
|
||||
|
||||
@@ -55,6 +56,23 @@ def make_geojson(locs):
|
||||
|
||||
return {"type": "FeatureCollection", "features": features}
|
||||
|
||||
def encode_date(date_str: str):
|
||||
'''Convert date into Instagram "snowflake" ID'''
|
||||
try:
|
||||
date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
try:
|
||||
date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
print('Unable to parse date. Please use format "yyyy-mm-dd".', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
date = date.replace(hour=23, minute=59, second=59, tzinfo=timezone.utc)
|
||||
date_ts = int(date.timestamp()) * 1000 # milliseconds
|
||||
insta_epoch = date_ts - 1314220021300
|
||||
max_id_num = insta_epoch << 23
|
||||
|
||||
return str(max_id_num)
|
||||
|
||||
html_template = '''<html>
|
||||
<head>
|
||||
<title>Instagram location visualizations</title>
|
||||
@@ -84,6 +102,10 @@ html_template = '''<html>
|
||||
width: 100%;
|
||||
height: 600px;
|
||||
}
|
||||
img.selected-location {
|
||||
filter: hue-rotate(120deg);
|
||||
z-index: 999 !important;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@@ -102,12 +124,15 @@ html_template = '''<html>
|
||||
}).addTo(map);
|
||||
|
||||
function onEachFeature(feature, layer) {
|
||||
layer.bindPopup(`<a href="https://www.instagram.com/explore/locations/` + feature.properties.external_id + `">` + feature.properties.name + `</a><br />` + feature.properties.address );
|
||||
layer.bindPopup(`<a href="https://www.instagram.com/explore/locations/` + feature.properties.external_id + `$date_var" target="_blank">` + feature.properties.name + `</a><br />` + feature.properties.address );
|
||||
}
|
||||
|
||||
L.geoJSON(locs, {
|
||||
onEachFeature: onEachFeature
|
||||
}).addTo(map);
|
||||
|
||||
var centerMarker = L.marker([$lat, $lng]).addTo(map);
|
||||
centerMarker._icon.classList.add('selected-location');
|
||||
</script>
|
||||
</body>
|
||||
</html>'''
|
||||
@@ -121,11 +146,16 @@ def main():
|
||||
parser.add_argument("--csv", action="store", dest="csv")
|
||||
parser.add_argument("--lat", action="store", dest="lat")
|
||||
parser.add_argument("--lng", action="store", dest="lng")
|
||||
parser.add_argument("--date", action="store", dest="date")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cookie = 'sessionid=' + args.session
|
||||
|
||||
date_var = ''
|
||||
if args.date is not None:
|
||||
date_var = '?max_id=' + encode_date(args.date)
|
||||
|
||||
locations = get_fuzzy_locations(float(args.lat), float(args.lng), cookie)
|
||||
|
||||
if (args.output):
|
||||
@@ -136,7 +166,7 @@ def main():
|
||||
|
||||
if (args.map):
|
||||
s = Template(html_template)
|
||||
viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)))
|
||||
viz = s.substitute(lat=args.lat, lng=args.lng, locs=json.dumps(make_geojson(locations)), date_var=date_var)
|
||||
|
||||
f = open(args.map, 'w')
|
||||
f.write(viz)
|
||||
@@ -144,7 +174,7 @@ def main():
|
||||
|
||||
if (args.csv):
|
||||
df = pd.DataFrame(locations)
|
||||
df['url'] = df['external_id'].apply(lambda v: 'https://www.instagram.com/explore/locations/' + str(v))
|
||||
df['url'] = df['external_id'].apply(lambda v: 'https://www.instagram.com/explore/locations/' + str(v) + date_var)
|
||||
df.to_csv(args.csv)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user