mirror of
https://github.com/bellingcat/geoclustering.git
synced 2026-06-12 21:48:30 +03:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c6e3671a16 | ||
|
|
f287cb8d02 | ||
|
|
6f83246478 | ||
|
|
b02139c50f | ||
|
|
0c789c3335 | ||
|
|
55cdec2fc8 | ||
|
|
aa228bcde2 | ||
|
|
fa4983aea6 | ||
|
|
2596b3d87c | ||
|
|
c91b0cd94d | ||
|
|
e6f56d6c62 | ||
|
|
4c46ff44a8 | ||
|
|
2e63491f72 | ||
|
|
03e132ff03 | ||
|
|
3b47f2343d | ||
|
|
6eb9007ece |
3
.github/actions/setup-venv/action.yml
vendored
3
.github/actions/setup-venv/action.yml
vendored
@@ -30,7 +30,8 @@ runs:
|
|||||||
id: virtualenv-cache
|
id: virtualenv-cache
|
||||||
with:
|
with:
|
||||||
path: .venv
|
path: .venv
|
||||||
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('Pipfile.lock') }}
|
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
|
||||||
|
|
||||||
- if: steps.virtualenv-cache.outputs.cache-hit != 'true'
|
- if: steps.virtualenv-cache.outputs.cache-hit != 'true'
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2022, Stichting Bellingcat
|
Copyright (c) 2022, Felix Spöttel
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
@@ -39,12 +39,6 @@ pip install .
|
|||||||
```
|
```
|
||||||
Usage: geoclustering [OPTIONS] FILENAME
|
Usage: geoclustering [OPTIONS] FILENAME
|
||||||
|
|
||||||
Tool to cluster geolocations. A cluster is created when a certain number of
|
|
||||||
points (--size) each are within a given distance (--distance) of at least
|
|
||||||
one other point in the cluster. Input is supplied as a csv file. At a
|
|
||||||
minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
|
|
||||||
reflected to the output.
|
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-d, --distance FLOAT (in km) Max. distance between two points in
|
-d, --distance FLOAT (in km) Max. distance between two points in
|
||||||
a cluster. [required]
|
a cluster. [required]
|
||||||
@@ -56,9 +50,6 @@ Options:
|
|||||||
Clustering algorithm to be used. `optics`
|
Clustering algorithm to be used. `optics`
|
||||||
produces tighter clusters but is slower.
|
produces tighter clusters but is slower.
|
||||||
Default: dbscan
|
Default: dbscan
|
||||||
--open Open the generated visualization in the
|
|
||||||
default browser automatically.
|
|
||||||
--debug Print debug output.
|
|
||||||
--help Show this message and exit.
|
--help Show this message and exit.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
from pathlib import Path
|
|
||||||
import click
|
import click
|
||||||
import os
|
|
||||||
import webbrowser
|
import webbrowser
|
||||||
|
|
||||||
import geoclustering.clustering as clustering
|
import geoclustering.clustering as clustering
|
||||||
@@ -8,9 +6,7 @@ import geoclustering.encoding as encoding
|
|||||||
import geoclustering.io as io
|
import geoclustering.io as io
|
||||||
|
|
||||||
|
|
||||||
@click.command(
|
@click.command()
|
||||||
help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
|
|
||||||
)
|
|
||||||
@click.option(
|
@click.option(
|
||||||
"--distance",
|
"--distance",
|
||||||
"-d",
|
"-d",
|
||||||
@@ -42,44 +38,26 @@ import geoclustering.io as io
|
|||||||
default="dbscan",
|
default="dbscan",
|
||||||
help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
|
help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
|
||||||
)
|
)
|
||||||
@click.option(
|
|
||||||
"--open",
|
|
||||||
is_flag=True,
|
|
||||||
help="Open the generated visualization in the default browser automatically.",
|
|
||||||
)
|
|
||||||
@click.option("--debug", is_flag=True, help="Print debug output.")
|
|
||||||
@click.argument("filename", type=click.Path(exists=True))
|
@click.argument("filename", type=click.Path(exists=True))
|
||||||
def main(distance, size, output, filename, algorithm, open, debug):
|
def main(distance, size, output, filename, algorithm):
|
||||||
def print_debug(s):
|
|
||||||
if debug:
|
|
||||||
click.secho(s, fg="bright_black")
|
|
||||||
|
|
||||||
df = io.read_csv_file(filename)
|
df = io.read_csv_file(filename)
|
||||||
print_debug(f"Read {len(df)} valid coordinates from {Path(filename).absolute()}")
|
|
||||||
|
|
||||||
clusters = clustering.cluster_locations(
|
clusters = clustering.cluster_locations(
|
||||||
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
|
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
|
||||||
)
|
)
|
||||||
|
|
||||||
if not bool(clusters):
|
if not bool(clusters):
|
||||||
click.secho("Did not find clusters matching input parameters.", fg="yellow")
|
click.echo("Did not find clusters matching input parameters.")
|
||||||
return
|
return
|
||||||
|
|
||||||
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
|
|
||||||
|
|
||||||
encoded = encoding.encode_clusters(clusters)
|
encoded = encoding.encode_clusters(clusters)
|
||||||
|
|
||||||
io.write_output_file(output, "result.txt", encoded["string"])
|
io.write_output_file(output, "result.txt", encoded["string"])
|
||||||
io.write_output_file(output, "result.json", encoded["json"])
|
io.write_output_file(output, "result.json", encoded["json"])
|
||||||
io.write_output_file(output, "result.geojson", encoded["geojson"])
|
io.write_output_file(output, "result.geojson", encoded["geojson"])
|
||||||
vis = io.write_visualization(output, "result.html", encoded["geojson"])
|
vis = io.write_visualization(output, "result.html", encoded["geojson"])
|
||||||
click.echo(f"Output files saved to {Path(output).absolute()}")
|
|
||||||
|
|
||||||
if open:
|
webbrowser.open_new_tab("file://" + str(vis.absolute()))
|
||||||
print_debug(f"Opening visualization in default browser")
|
|
||||||
webbrowser.open_new_tab("file://" + str(vis.absolute()))
|
|
||||||
|
|
||||||
click.secho("Clustering completed.", fg="green")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ def to_cluster_dict(df, clustering):
|
|||||||
"""
|
"""
|
||||||
clusters_by_id = {}
|
clusters_by_id = {}
|
||||||
|
|
||||||
|
print(clustering.labels_)
|
||||||
|
|
||||||
for idx, cluster_id in enumerate(clustering.labels_):
|
for idx, cluster_id in enumerate(clustering.labels_):
|
||||||
# ignore "noise" locations that don't belong to any cluster.
|
# ignore "noise" locations that don't belong to any cluster.
|
||||||
if cluster_id > -1:
|
if cluster_id > -1:
|
||||||
|
|||||||
@@ -2,22 +2,9 @@ from keplergl import KeplerGl
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pkg_resources import resource_filename
|
from pkg_resources import resource_filename
|
||||||
import json
|
import json
|
||||||
|
import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class HiddenPrints:
|
|
||||||
"""Disables stdout prints for a block of code."""
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
self._original_stdout = sys.stdout
|
|
||||||
sys.stdout = open(os.devnull, "w")
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
||||||
sys.stdout.close()
|
|
||||||
sys.stdout = self._original_stdout
|
|
||||||
|
|
||||||
|
|
||||||
def is_valid_lat(val: str) -> bool:
|
def is_valid_lat(val: str) -> bool:
|
||||||
@@ -77,10 +64,7 @@ def write_output_file(dirname, filename, data):
|
|||||||
|
|
||||||
def write_visualization(dirname, filename, data):
|
def write_visualization(dirname, filename, data):
|
||||||
"""Write a visualization, ensuring parent directories."""
|
"""Write a visualization, ensuring parent directories."""
|
||||||
# Hide kepler stdout output.
|
map = KeplerGl()
|
||||||
with HiddenPrints():
|
|
||||||
map = KeplerGl()
|
|
||||||
|
|
||||||
map.add_data(data=data, name="clusters")
|
map.add_data(data=data, name="clusters")
|
||||||
|
|
||||||
# config configures a default color scheme for our clusters layer.
|
# config configures a default color scheme for our clusters layer.
|
||||||
@@ -89,9 +73,6 @@ def write_visualization(dirname, filename, data):
|
|||||||
map.config = json.loads(f.read())
|
map.config = json.loads(f.read())
|
||||||
|
|
||||||
filepath = ensure_file_path(dirname, filename)
|
filepath = ensure_file_path(dirname, filename)
|
||||||
|
map.save_to_html(file_name=str(filepath), center_map=True)
|
||||||
# Hide kepler stdout output.
|
|
||||||
with HiddenPrints():
|
|
||||||
map.save_to_html(file_name=str(filepath), center_map=True)
|
|
||||||
|
|
||||||
return filepath
|
return filepath
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"config": {
|
"config": {
|
||||||
"dataId": "clusters",
|
"dataId": "clusters",
|
||||||
"label": "clusters",
|
"label": "clusters",
|
||||||
"color": [248, 149, 112],
|
"color": [179, 173, 158],
|
||||||
"highlightColor": [252, 242, 26, 255],
|
"highlightColor": [252, 242, 26, 255],
|
||||||
"columns": { "geojson": "_geojson" },
|
"columns": { "geojson": "_geojson" },
|
||||||
"isVisible": true,
|
"isVisible": true,
|
||||||
@@ -19,30 +19,16 @@
|
|||||||
"thickness": 0.5,
|
"thickness": 0.5,
|
||||||
"strokeColor": null,
|
"strokeColor": null,
|
||||||
"colorRange": {
|
"colorRange": {
|
||||||
"name": "Uber Viz Qualitative 4",
|
"name": "Global Warming",
|
||||||
"type": "qualitative",
|
"type": "sequential",
|
||||||
"category": "Uber",
|
"category": "Uber",
|
||||||
"colors": [
|
"colors": [
|
||||||
"#12939A",
|
"#5A1846",
|
||||||
"#DDB27C",
|
"#900C3F",
|
||||||
"#88572C",
|
"#C70039",
|
||||||
"#FF991F",
|
"#E3611C",
|
||||||
"#F15C17",
|
"#F1920E",
|
||||||
"#223F9A",
|
"#FFC300"
|
||||||
"#DA70BF",
|
|
||||||
"#125C77",
|
|
||||||
"#4DC19C",
|
|
||||||
"#776E57",
|
|
||||||
"#17B8BE",
|
|
||||||
"#F6D18A",
|
|
||||||
"#B7885E",
|
|
||||||
"#FFCB99",
|
|
||||||
"#F89570",
|
|
||||||
"#829AE3",
|
|
||||||
"#E79FD5",
|
|
||||||
"#1E96BE",
|
|
||||||
"#89DAC1",
|
|
||||||
"#B3AD9E"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"strokeColorRange": {
|
"strokeColorRange": {
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
_MAJOR = "0"
|
_MAJOR = "0"
|
||||||
_MINOR = "2"
|
_MINOR = "1"
|
||||||
# On main and in a nightly release the patch should be one ahead of the last
|
# On main and in a nightly release the patch should be one ahead of the last
|
||||||
# released build.
|
# released build.
|
||||||
_PATCH = "1"
|
_PATCH = "2"
|
||||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||||
_SUFFIX = ""
|
_SUFFIX = ""
|
||||||
|
|||||||
0
scripts/release.sh
Executable file → Normal file
0
scripts/release.sh
Executable file → Normal file
2
setup.py
2
setup.py
@@ -22,7 +22,7 @@ setup(
|
|||||||
author_email="tech@bellingcat.com",
|
author_email="tech@bellingcat.com",
|
||||||
license="MIT",
|
license="MIT",
|
||||||
packages=["geoclustering"],
|
packages=["geoclustering"],
|
||||||
package_data={"geoclustering": ["kepler_config.json"]},
|
package_data={"geoclustering": ["kepler_config.json"]}
|
||||||
keywords=["cluster", "gis", "pattern-analysis"],
|
keywords=["cluster", "gis", "pattern-analysis"],
|
||||||
entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]},
|
entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]},
|
||||||
install_requires=[
|
install_requires=[
|
||||||
|
|||||||
Reference in New Issue
Block a user