mirror of
https://github.com/bellingcat/geoclustering.git
synced 2026-06-10 12:38:30 +03:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8657bd73ec | ||
|
|
e633665813 | ||
|
|
cff5256d06 | ||
|
|
4dfa08bbbc | ||
|
|
eaa4022b70 | ||
|
|
1cb5541baa | ||
|
|
b40074317c | ||
|
|
f1053953ba |
3
.github/actions/setup-venv/action.yml
vendored
3
.github/actions/setup-venv/action.yml
vendored
@@ -30,8 +30,7 @@ runs:
|
||||
id: virtualenv-cache
|
||||
with:
|
||||
path: .venv
|
||||
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
|
||||
|
||||
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('Pipfile.lock') }}
|
||||
- if: steps.virtualenv-cache.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022, Felix Spöttel
|
||||
Copyright (c) 2022, Stichting Bellingcat
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -39,6 +39,12 @@ pip install .
|
||||
```
|
||||
Usage: geoclustering [OPTIONS] FILENAME
|
||||
|
||||
Tool to cluster geolocations. A cluster is created when a certain number of
|
||||
points (--size) each are within a given distance (--distance) of at least
|
||||
one other point in the cluster. Input is supplied as a csv file. At a
|
||||
minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
|
||||
reflected to the output.
|
||||
|
||||
Options:
|
||||
-d, --distance FLOAT (in km) Max. distance between two points in
|
||||
a cluster. [required]
|
||||
@@ -50,6 +56,9 @@ Options:
|
||||
Clustering algorithm to be used. `optics`
|
||||
produces tighter clusters but is slower.
|
||||
Default: dbscan
|
||||
--open Open the generated visualization in the
|
||||
default browser automatically.
|
||||
--debug Print debug output.
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from pathlib import Path
|
||||
import click
|
||||
import os
|
||||
import webbrowser
|
||||
|
||||
import geoclustering.clustering as clustering
|
||||
@@ -6,7 +8,13 @@ import geoclustering.encoding as encoding
|
||||
import geoclustering.io as io
|
||||
|
||||
|
||||
@click.command()
|
||||
def print_debug(s):
|
||||
click.secho(s, fg="bright_black")
|
||||
|
||||
|
||||
@click.command(
|
||||
help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
|
||||
)
|
||||
@click.option(
|
||||
"--distance",
|
||||
"-d",
|
||||
@@ -38,9 +46,20 @@ import geoclustering.io as io
|
||||
default="dbscan",
|
||||
help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
|
||||
)
|
||||
@click.option(
|
||||
"--open",
|
||||
is_flag=True,
|
||||
help="Open the generated visualization in the default browser automatically.",
|
||||
)
|
||||
@click.option("--debug", is_flag=True, help="Print debug output.")
|
||||
@click.argument("filename", type=click.Path(exists=True))
|
||||
def main(distance, size, output, filename, algorithm):
|
||||
def main(distance, size, output, filename, algorithm, open, debug):
|
||||
if debug:
|
||||
print_debug(f"Reading input from {Path(filename).absolute()}")
|
||||
|
||||
df = io.read_csv_file(filename)
|
||||
if debug:
|
||||
print_debug(f"Read {len(df)} valid coordinates")
|
||||
|
||||
clusters = clustering.cluster_locations(
|
||||
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
|
||||
@@ -50,14 +69,19 @@ def main(distance, size, output, filename, algorithm):
|
||||
click.echo("Did not find clusters matching input parameters.")
|
||||
return
|
||||
|
||||
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
|
||||
|
||||
encoded = encoding.encode_clusters(clusters)
|
||||
|
||||
io.write_output_file(output, "result.txt", encoded["string"])
|
||||
io.write_output_file(output, "result.json", encoded["json"])
|
||||
io.write_output_file(output, "result.geojson", encoded["geojson"])
|
||||
vis = io.write_visualization(output, "result.html", encoded["geojson"])
|
||||
click.echo(f"Output files saved to {Path(output).absolute()}")
|
||||
|
||||
webbrowser.open_new_tab("file://" + str(vis.absolute()))
|
||||
if open:
|
||||
print_debug(f"Opening visualization in default browser")
|
||||
webbrowser.open_new_tab("file://" + str(vis.absolute()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -14,8 +14,6 @@ def to_cluster_dict(df, clustering):
|
||||
"""
|
||||
clusters_by_id = {}
|
||||
|
||||
print(clustering.labels_)
|
||||
|
||||
for idx, cluster_id in enumerate(clustering.labels_):
|
||||
# ignore "noise" locations that don't belong to any cluster.
|
||||
if cluster_id > -1:
|
||||
|
||||
@@ -2,9 +2,22 @@ from keplergl import KeplerGl
|
||||
from pathlib import Path
|
||||
from pkg_resources import resource_filename
|
||||
import json
|
||||
import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
class HiddenPrints:
|
||||
"""Disables stdout prints for a block of code."""
|
||||
|
||||
def __enter__(self):
|
||||
self._original_stdout = sys.stdout
|
||||
sys.stdout = open(os.devnull, "w")
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
sys.stdout.close()
|
||||
sys.stdout = self._original_stdout
|
||||
|
||||
|
||||
def is_valid_lat(val: str) -> bool:
|
||||
@@ -64,7 +77,10 @@ def write_output_file(dirname, filename, data):
|
||||
|
||||
def write_visualization(dirname, filename, data):
|
||||
"""Write a visualization, ensuring parent directories."""
|
||||
map = KeplerGl()
|
||||
# Hide kepler stdout output.
|
||||
with HiddenPrints():
|
||||
map = KeplerGl()
|
||||
|
||||
map.add_data(data=data, name="clusters")
|
||||
|
||||
# config configures a default color scheme for our clusters layer.
|
||||
@@ -73,6 +89,9 @@ def write_visualization(dirname, filename, data):
|
||||
map.config = json.loads(f.read())
|
||||
|
||||
filepath = ensure_file_path(dirname, filename)
|
||||
map.save_to_html(file_name=str(filepath), center_map=True)
|
||||
|
||||
# Hide kepler stdout output.
|
||||
with HiddenPrints():
|
||||
map.save_to_html(file_name=str(filepath), center_map=True)
|
||||
|
||||
return filepath
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"config": {
|
||||
"dataId": "clusters",
|
||||
"label": "clusters",
|
||||
"color": [179, 173, 158],
|
||||
"color": [248, 149, 112],
|
||||
"highlightColor": [252, 242, 26, 255],
|
||||
"columns": { "geojson": "_geojson" },
|
||||
"isVisible": true,
|
||||
@@ -19,16 +19,30 @@
|
||||
"thickness": 0.5,
|
||||
"strokeColor": null,
|
||||
"colorRange": {
|
||||
"name": "Global Warming",
|
||||
"type": "sequential",
|
||||
"name": "Uber Viz Qualitative 4",
|
||||
"type": "qualitative",
|
||||
"category": "Uber",
|
||||
"colors": [
|
||||
"#5A1846",
|
||||
"#900C3F",
|
||||
"#C70039",
|
||||
"#E3611C",
|
||||
"#F1920E",
|
||||
"#FFC300"
|
||||
"#12939A",
|
||||
"#DDB27C",
|
||||
"#88572C",
|
||||
"#FF991F",
|
||||
"#F15C17",
|
||||
"#223F9A",
|
||||
"#DA70BF",
|
||||
"#125C77",
|
||||
"#4DC19C",
|
||||
"#776E57",
|
||||
"#17B8BE",
|
||||
"#F6D18A",
|
||||
"#B7885E",
|
||||
"#FFCB99",
|
||||
"#F89570",
|
||||
"#829AE3",
|
||||
"#E79FD5",
|
||||
"#1E96BE",
|
||||
"#89DAC1",
|
||||
"#B3AD9E"
|
||||
]
|
||||
},
|
||||
"strokeColorRange": {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
_MAJOR = "0"
|
||||
_MINOR = "1"
|
||||
_MINOR = "2"
|
||||
# On main and in a nightly release the patch should be one ahead of the last
|
||||
# released build.
|
||||
_PATCH = "3"
|
||||
_PATCH = "0"
|
||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||
_SUFFIX = ""
|
||||
|
||||
0
scripts/release.sh
Normal file → Executable file
0
scripts/release.sh
Normal file → Executable file
Reference in New Issue
Block a user