8 Commits

Author SHA1 Message Date
Felix Spöttel
8657bd73ec Bump version to v0.2.0 for release 2022-07-01 18:15:07 +02:00
Felix Spöttel
e633665813 chore: update license 2022-07-01 18:12:00 +02:00
Felix Spöttel
cff5256d06 feat: add --debug flag, improve logging & help
closes #9
2022-07-01 17:53:09 +02:00
Felix Spöttel
4dfa08bbbc feat: add --open flag (#11)
closes #5
2022-07-01 17:08:53 +02:00
Felix Spöttel
eaa4022b70 ci: use pipfile.lock as cache key 2022-07-01 17:05:43 +02:00
Felix Spöttel
1cb5541baa chore: remove clustering print 2022-07-01 17:04:56 +02:00
Felix Spöttel
b40074317c feat: extend kepler.gl color range
closes #10
2022-07-01 17:04:33 +02:00
Miguel Sozinho Ramalho
f1053953ba feat: auto-deploy to pypi (#8) 2022-07-01 15:23:50 +01:00
10 changed files with 86 additions and 23 deletions

View File

@@ -30,8 +30,7 @@ runs:
id: virtualenv-cache
with:
path: .venv
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('Pipfile.lock') }}
- if: steps.virtualenv-cache.outputs.cache-hit != 'true'
shell: bash
run: |

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2022, Felix Spöttel
Copyright (c) 2022, Stichting Bellingcat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -39,6 +39,12 @@ pip install .
```
Usage: geoclustering [OPTIONS] FILENAME
Tool to cluster geolocations. A cluster is created when a certain number of
points (--size) each are within a given distance (--distance) of at least
one other point in the cluster. Input is supplied as a csv file. At a
minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
reflected to the output.
Options:
-d, --distance FLOAT (in km) Max. distance between two points in
a cluster. [required]
@@ -50,6 +56,9 @@ Options:
Clustering algorithm to be used. `optics`
produces tighter clusters but is slower.
Default: dbscan
--open Open the generated visualization in the
default browser automatically.
--debug Print debug output.
--help Show this message and exit.
```

View File

@@ -1,4 +1,6 @@
from pathlib import Path
import click
import os
import webbrowser
import geoclustering.clustering as clustering
@@ -6,7 +8,13 @@ import geoclustering.encoding as encoding
import geoclustering.io as io
@click.command()
def print_debug(s):
click.secho(s, fg="bright_black")
@click.command(
help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
)
@click.option(
"--distance",
"-d",
@@ -38,9 +46,20 @@ import geoclustering.io as io
default="dbscan",
help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
)
@click.option(
"--open",
is_flag=True,
help="Open the generated visualization in the default browser automatically.",
)
@click.option("--debug", is_flag=True, help="Print debug output.")
@click.argument("filename", type=click.Path(exists=True))
def main(distance, size, output, filename, algorithm):
def main(distance, size, output, filename, algorithm, open, debug):
if debug:
print_debug(f"Reading input from {Path(filename).absolute()}")
df = io.read_csv_file(filename)
if debug:
print_debug(f"Read {len(df)} valid coordinates")
clusters = clustering.cluster_locations(
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
@@ -50,14 +69,19 @@ def main(distance, size, output, filename, algorithm):
click.echo("Did not find clusters matching input parameters.")
return
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
encoded = encoding.encode_clusters(clusters)
io.write_output_file(output, "result.txt", encoded["string"])
io.write_output_file(output, "result.json", encoded["json"])
io.write_output_file(output, "result.geojson", encoded["geojson"])
vis = io.write_visualization(output, "result.html", encoded["geojson"])
click.echo(f"Output files saved to {Path(output).absolute()}")
webbrowser.open_new_tab("file://" + str(vis.absolute()))
if open:
print_debug(f"Opening visualization in default browser")
webbrowser.open_new_tab("file://" + str(vis.absolute()))
if __name__ == "__main__":

View File

@@ -14,8 +14,6 @@ def to_cluster_dict(df, clustering):
"""
clusters_by_id = {}
print(clustering.labels_)
for idx, cluster_id in enumerate(clustering.labels_):
# ignore "noise" locations that don't belong to any cluster.
if cluster_id > -1:

View File

@@ -2,9 +2,22 @@ from keplergl import KeplerGl
from pathlib import Path
from pkg_resources import resource_filename
import json
import json
import pandas as pd
import numpy as np
import os
import sys
class HiddenPrints:
"""Disables stdout prints for a block of code."""
def __enter__(self):
self._original_stdout = sys.stdout
sys.stdout = open(os.devnull, "w")
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout.close()
sys.stdout = self._original_stdout
def is_valid_lat(val: str) -> bool:
@@ -64,7 +77,10 @@ def write_output_file(dirname, filename, data):
def write_visualization(dirname, filename, data):
"""Write a visualization, ensuring parent directories."""
map = KeplerGl()
# Hide kepler stdout output.
with HiddenPrints():
map = KeplerGl()
map.add_data(data=data, name="clusters")
# config configures a default color scheme for our clusters layer.
@@ -73,6 +89,9 @@ def write_visualization(dirname, filename, data):
map.config = json.loads(f.read())
filepath = ensure_file_path(dirname, filename)
map.save_to_html(file_name=str(filepath), center_map=True)
# Hide kepler stdout output.
with HiddenPrints():
map.save_to_html(file_name=str(filepath), center_map=True)
return filepath

View File

@@ -9,7 +9,7 @@
"config": {
"dataId": "clusters",
"label": "clusters",
"color": [179, 173, 158],
"color": [248, 149, 112],
"highlightColor": [252, 242, 26, 255],
"columns": { "geojson": "_geojson" },
"isVisible": true,
@@ -19,16 +19,30 @@
"thickness": 0.5,
"strokeColor": null,
"colorRange": {
"name": "Global Warming",
"type": "sequential",
"name": "Uber Viz Qualitative 4",
"type": "qualitative",
"category": "Uber",
"colors": [
"#5A1846",
"#900C3F",
"#C70039",
"#E3611C",
"#F1920E",
"#FFC300"
"#12939A",
"#DDB27C",
"#88572C",
"#FF991F",
"#F15C17",
"#223F9A",
"#DA70BF",
"#125C77",
"#4DC19C",
"#776E57",
"#17B8BE",
"#F6D18A",
"#B7885E",
"#FFCB99",
"#F89570",
"#829AE3",
"#E79FD5",
"#1E96BE",
"#89DAC1",
"#B3AD9E"
]
},
"strokeColorRange": {

View File

@@ -1,8 +1,8 @@
_MAJOR = "0"
_MINOR = "1"
_MINOR = "2"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "2"
_PATCH = "0"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""

0
scripts/release.sh Normal file → Executable file
View File

View File

@@ -22,7 +22,7 @@ setup(
author_email="tech@bellingcat.com",
license="MIT",
packages=["geoclustering"],
package_data={"geoclustering": ["kepler_config.json"]}
package_data={"geoclustering": ["kepler_config.json"]},
keywords=["cluster", "gis", "pattern-analysis"],
entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]},
install_requires=[