16 Commits

Author SHA1 Message Date
msramalho
c6e3671a16 fixing kepler json and version 2022-07-01 14:45:51 +02:00
Miguel Ramalho
f287cb8d02 Bump version to v0.1.0 for release 2022-07-01 14:28:32 +02:00
msramalho
6f83246478 renaming to geoclustering due to pypi 2022-07-01 14:24:22 +02:00
msramalho
b02139c50f test:release 2022-07-01 14:13:03 +02:00
msramalho
0c789c3335 cleanup 2022-07-01 13:32:41 +02:00
msramalho
55cdec2fc8 delete lint 2022-07-01 13:23:32 +02:00
msramalho
aa228bcde2 simplify 2022-07-01 13:22:39 +02:00
msramalho
fa4983aea6 main.yml fix 2022-07-01 13:13:22 +02:00
msramalho
2596b3d87c trigger ga 2022-07-01 13:12:40 +02:00
Miguel Sozinho Ramalho
c91b0cd94d Merge branch 'main' into feat-pypi-workflow 2022-07-01 12:12:07 +01:00
msramalho
e6f56d6c62 updates 2022-07-01 13:10:42 +02:00
msramalho
4c46ff44a8 vresion fix 2022-07-01 13:04:57 +02:00
msramalho
2e63491f72 steps 2022-07-01 13:03:12 +02:00
msramalho
03e132ff03 build dist 2022-07-01 13:02:08 +02:00
msramalho
3b47f2343d on push 2022-07-01 12:48:19 +02:00
msramalho
6eb9007ece testing workflow without 2022-07-01 12:46:49 +02:00
10 changed files with 24 additions and 85 deletions

View File

@@ -30,7 +30,8 @@ runs:
id: virtualenv-cache id: virtualenv-cache
with: with:
path: .venv path: .venv
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('Pipfile.lock') }} key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
- if: steps.virtualenv-cache.outputs.cache-hit != 'true' - if: steps.virtualenv-cache.outputs.cache-hit != 'true'
shell: bash shell: bash
run: | run: |

View File

@@ -1,6 +1,6 @@
MIT License MIT License
Copyright (c) 2022, Stichting Bellingcat Copyright (c) 2022, Felix Spöttel
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

View File

@@ -39,12 +39,6 @@ pip install .
``` ```
Usage: geoclustering [OPTIONS] FILENAME Usage: geoclustering [OPTIONS] FILENAME
Tool to cluster geolocations. A cluster is created when a certain number of
points (--size) each are within a given distance (--distance) of at least
one other point in the cluster. Input is supplied as a csv file. At a
minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
reflected to the output.
Options: Options:
-d, --distance FLOAT (in km) Max. distance between two points in -d, --distance FLOAT (in km) Max. distance between two points in
a cluster. [required] a cluster. [required]
@@ -56,9 +50,6 @@ Options:
Clustering algorithm to be used. `optics` Clustering algorithm to be used. `optics`
produces tighter clusters but is slower. produces tighter clusters but is slower.
Default: dbscan Default: dbscan
--open Open the generated visualization in the
default browser automatically.
--debug Print debug output.
--help Show this message and exit. --help Show this message and exit.
``` ```

View File

@@ -1,6 +1,4 @@
from pathlib import Path
import click import click
import os
import webbrowser import webbrowser
import geoclustering.clustering as clustering import geoclustering.clustering as clustering
@@ -8,9 +6,7 @@ import geoclustering.encoding as encoding
import geoclustering.io as io import geoclustering.io as io
@click.command( @click.command()
help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
)
@click.option( @click.option(
"--distance", "--distance",
"-d", "-d",
@@ -42,44 +38,26 @@ import geoclustering.io as io
default="dbscan", default="dbscan",
help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan", help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
) )
@click.option(
"--open",
is_flag=True,
help="Open the generated visualization in the default browser automatically.",
)
@click.option("--debug", is_flag=True, help="Print debug output.")
@click.argument("filename", type=click.Path(exists=True)) @click.argument("filename", type=click.Path(exists=True))
def main(distance, size, output, filename, algorithm, open, debug): def main(distance, size, output, filename, algorithm):
def print_debug(s):
if debug:
click.secho(s, fg="bright_black")
df = io.read_csv_file(filename) df = io.read_csv_file(filename)
print_debug(f"Read {len(df)} valid coordinates from {Path(filename).absolute()}")
clusters = clustering.cluster_locations( clusters = clustering.cluster_locations(
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
) )
if not bool(clusters): if not bool(clusters):
click.secho("Did not find clusters matching input parameters.", fg="yellow") click.echo("Did not find clusters matching input parameters.")
return return
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
encoded = encoding.encode_clusters(clusters) encoded = encoding.encode_clusters(clusters)
io.write_output_file(output, "result.txt", encoded["string"]) io.write_output_file(output, "result.txt", encoded["string"])
io.write_output_file(output, "result.json", encoded["json"]) io.write_output_file(output, "result.json", encoded["json"])
io.write_output_file(output, "result.geojson", encoded["geojson"]) io.write_output_file(output, "result.geojson", encoded["geojson"])
vis = io.write_visualization(output, "result.html", encoded["geojson"]) vis = io.write_visualization(output, "result.html", encoded["geojson"])
click.echo(f"Output files saved to {Path(output).absolute()}")
if open: webbrowser.open_new_tab("file://" + str(vis.absolute()))
print_debug(f"Opening visualization in default browser")
webbrowser.open_new_tab("file://" + str(vis.absolute()))
click.secho("Clustering completed.", fg="green")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -14,6 +14,8 @@ def to_cluster_dict(df, clustering):
""" """
clusters_by_id = {} clusters_by_id = {}
print(clustering.labels_)
for idx, cluster_id in enumerate(clustering.labels_): for idx, cluster_id in enumerate(clustering.labels_):
# ignore "noise" locations that don't belong to any cluster. # ignore "noise" locations that don't belong to any cluster.
if cluster_id > -1: if cluster_id > -1:

View File

@@ -2,22 +2,9 @@ from keplergl import KeplerGl
from pathlib import Path from pathlib import Path
from pkg_resources import resource_filename from pkg_resources import resource_filename
import json import json
import json
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import os
import sys
class HiddenPrints:
"""Disables stdout prints for a block of code."""
def __enter__(self):
self._original_stdout = sys.stdout
sys.stdout = open(os.devnull, "w")
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout.close()
sys.stdout = self._original_stdout
def is_valid_lat(val: str) -> bool: def is_valid_lat(val: str) -> bool:
@@ -77,10 +64,7 @@ def write_output_file(dirname, filename, data):
def write_visualization(dirname, filename, data): def write_visualization(dirname, filename, data):
"""Write a visualization, ensuring parent directories.""" """Write a visualization, ensuring parent directories."""
# Hide kepler stdout output. map = KeplerGl()
with HiddenPrints():
map = KeplerGl()
map.add_data(data=data, name="clusters") map.add_data(data=data, name="clusters")
# config configures a default color scheme for our clusters layer. # config configures a default color scheme for our clusters layer.
@@ -89,9 +73,6 @@ def write_visualization(dirname, filename, data):
map.config = json.loads(f.read()) map.config = json.loads(f.read())
filepath = ensure_file_path(dirname, filename) filepath = ensure_file_path(dirname, filename)
map.save_to_html(file_name=str(filepath), center_map=True)
# Hide kepler stdout output.
with HiddenPrints():
map.save_to_html(file_name=str(filepath), center_map=True)
return filepath return filepath

View File

@@ -9,7 +9,7 @@
"config": { "config": {
"dataId": "clusters", "dataId": "clusters",
"label": "clusters", "label": "clusters",
"color": [248, 149, 112], "color": [179, 173, 158],
"highlightColor": [252, 242, 26, 255], "highlightColor": [252, 242, 26, 255],
"columns": { "geojson": "_geojson" }, "columns": { "geojson": "_geojson" },
"isVisible": true, "isVisible": true,
@@ -19,30 +19,16 @@
"thickness": 0.5, "thickness": 0.5,
"strokeColor": null, "strokeColor": null,
"colorRange": { "colorRange": {
"name": "Uber Viz Qualitative 4", "name": "Global Warming",
"type": "qualitative", "type": "sequential",
"category": "Uber", "category": "Uber",
"colors": [ "colors": [
"#12939A", "#5A1846",
"#DDB27C", "#900C3F",
"#88572C", "#C70039",
"#FF991F", "#E3611C",
"#F15C17", "#F1920E",
"#223F9A", "#FFC300"
"#DA70BF",
"#125C77",
"#4DC19C",
"#776E57",
"#17B8BE",
"#F6D18A",
"#B7885E",
"#FFCB99",
"#F89570",
"#829AE3",
"#E79FD5",
"#1E96BE",
"#89DAC1",
"#B3AD9E"
] ]
}, },
"strokeColorRange": { "strokeColorRange": {

View File

@@ -1,8 +1,8 @@
_MAJOR = "0" _MAJOR = "0"
_MINOR = "2" _MINOR = "1"
# On main and in a nightly release the patch should be one ahead of the last # On main and in a nightly release the patch should be one ahead of the last
# released build. # released build.
_PATCH = "1" _PATCH = "2"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See # This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics. # https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = "" _SUFFIX = ""

0
scripts/release.sh Executable file → Normal file
View File

View File

@@ -22,7 +22,7 @@ setup(
author_email="tech@bellingcat.com", author_email="tech@bellingcat.com",
license="MIT", license="MIT",
packages=["geoclustering"], packages=["geoclustering"],
package_data={"geoclustering": ["kepler_config.json"]}, package_data={"geoclustering": ["kepler_config.json"]}
keywords=["cluster", "gis", "pattern-analysis"], keywords=["cluster", "gis", "pattern-analysis"],
entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]}, entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]},
install_requires=[ install_requires=[