15 Commits

Author SHA1 Message Date
Miguel Ramalho
f287cb8d02 Bump version to v0.1.0 for release 2022-07-01 14:28:32 +02:00
msramalho
6f83246478 renaming to geoclustering due to pypi 2022-07-01 14:24:22 +02:00
msramalho
b02139c50f test:release 2022-07-01 14:13:03 +02:00
msramalho
0c789c3335 cleanup 2022-07-01 13:32:41 +02:00
msramalho
55cdec2fc8 delete lint 2022-07-01 13:23:32 +02:00
msramalho
aa228bcde2 simplify 2022-07-01 13:22:39 +02:00
msramalho
fa4983aea6 main.yml fix 2022-07-01 13:13:22 +02:00
msramalho
2596b3d87c trigger ga 2022-07-01 13:12:40 +02:00
Miguel Sozinho Ramalho
c91b0cd94d Merge branch 'main' into feat-pypi-workflow 2022-07-01 12:12:07 +01:00
msramalho
e6f56d6c62 updates 2022-07-01 13:10:42 +02:00
msramalho
4c46ff44a8 vresion fix 2022-07-01 13:04:57 +02:00
msramalho
2e63491f72 steps 2022-07-01 13:03:12 +02:00
msramalho
03e132ff03 build dist 2022-07-01 13:02:08 +02:00
msramalho
3b47f2343d on push 2022-07-01 12:48:19 +02:00
msramalho
6eb9007ece testing workflow without 2022-07-01 12:46:49 +02:00
11 changed files with 22 additions and 86 deletions

View File

@@ -30,7 +30,8 @@ runs:
id: virtualenv-cache
with:
path: .venv
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('Pipfile.lock') }}
key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
- if: steps.virtualenv-cache.outputs.cache-hit != 'true'
shell: bash
run: |

View File

@@ -74,7 +74,7 @@ jobs:
name: Release
runs-on: ubuntu-latest
needs: [checks]
if: startsWith(github.ref, 'refs/tags/')
# if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/checkout@v1

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2022, Stichting Bellingcat
Copyright (c) 2022, Felix Spöttel
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -39,12 +39,6 @@ pip install .
```
Usage: geoclustering [OPTIONS] FILENAME
Tool to cluster geolocations. A cluster is created when a certain number of
points (--size) each are within a given distance (--distance) of at least
one other point in the cluster. Input is supplied as a csv file. At a
minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
reflected to the output.
Options:
-d, --distance FLOAT (in km) Max. distance between two points in
a cluster. [required]
@@ -56,9 +50,6 @@ Options:
Clustering algorithm to be used. `optics`
produces tighter clusters but is slower.
Default: dbscan
--open Open the generated visualization in the
default browser automatically.
--debug Print debug output.
--help Show this message and exit.
```

View File

@@ -1,6 +1,4 @@
from pathlib import Path
import click
import os
import webbrowser
import geoclustering.clustering as clustering
@@ -8,13 +6,7 @@ import geoclustering.encoding as encoding
import geoclustering.io as io
def print_debug(s):
click.secho(s, fg="bright_black")
@click.command(
help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
)
@click.command()
@click.option(
"--distance",
"-d",
@@ -46,20 +38,9 @@ def print_debug(s):
default="dbscan",
help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
)
@click.option(
"--open",
is_flag=True,
help="Open the generated visualization in the default browser automatically.",
)
@click.option("--debug", is_flag=True, help="Print debug output.")
@click.argument("filename", type=click.Path(exists=True))
def main(distance, size, output, filename, algorithm, open, debug):
if debug:
print_debug(f"Reading input from {Path(filename).absolute()}")
def main(distance, size, output, filename, algorithm):
df = io.read_csv_file(filename)
if debug:
print_debug(f"Read {len(df)} valid coordinates")
clusters = clustering.cluster_locations(
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
@@ -69,19 +50,14 @@ def main(distance, size, output, filename, algorithm, open, debug):
click.echo("Did not find clusters matching input parameters.")
return
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
encoded = encoding.encode_clusters(clusters)
io.write_output_file(output, "result.txt", encoded["string"])
io.write_output_file(output, "result.json", encoded["json"])
io.write_output_file(output, "result.geojson", encoded["geojson"])
vis = io.write_visualization(output, "result.html", encoded["geojson"])
click.echo(f"Output files saved to {Path(output).absolute()}")
if open:
print_debug(f"Opening visualization in default browser")
webbrowser.open_new_tab("file://" + str(vis.absolute()))
webbrowser.open_new_tab("file://" + str(vis.absolute()))
if __name__ == "__main__":

View File

@@ -14,6 +14,8 @@ def to_cluster_dict(df, clustering):
"""
clusters_by_id = {}
print(clustering.labels_)
for idx, cluster_id in enumerate(clustering.labels_):
# ignore "noise" locations that don't belong to any cluster.
if cluster_id > -1:

View File

@@ -2,22 +2,9 @@ from keplergl import KeplerGl
from pathlib import Path
from pkg_resources import resource_filename
import json
import json
import pandas as pd
import numpy as np
import os
import sys
class HiddenPrints:
"""Disables stdout prints for a block of code."""
def __enter__(self):
self._original_stdout = sys.stdout
sys.stdout = open(os.devnull, "w")
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout.close()
sys.stdout = self._original_stdout
def is_valid_lat(val: str) -> bool:
@@ -77,10 +64,7 @@ def write_output_file(dirname, filename, data):
def write_visualization(dirname, filename, data):
"""Write a visualization, ensuring parent directories."""
# Hide kepler stdout output.
with HiddenPrints():
map = KeplerGl()
map = KeplerGl()
map.add_data(data=data, name="clusters")
# config configures a default color scheme for our clusters layer.
@@ -89,9 +73,6 @@ def write_visualization(dirname, filename, data):
map.config = json.loads(f.read())
filepath = ensure_file_path(dirname, filename)
# Hide kepler stdout output.
with HiddenPrints():
map.save_to_html(file_name=str(filepath), center_map=True)
map.save_to_html(file_name=str(filepath), center_map=True)
return filepath

View File

@@ -9,7 +9,7 @@
"config": {
"dataId": "clusters",
"label": "clusters",
"color": [248, 149, 112],
"color": [179, 173, 158],
"highlightColor": [252, 242, 26, 255],
"columns": { "geojson": "_geojson" },
"isVisible": true,
@@ -19,30 +19,16 @@
"thickness": 0.5,
"strokeColor": null,
"colorRange": {
"name": "Uber Viz Qualitative 4",
"type": "qualitative",
"name": "Global Warming",
"type": "sequential",
"category": "Uber",
"colors": [
"#12939A",
"#DDB27C",
"#88572C",
"#FF991F",
"#F15C17",
"#223F9A",
"#DA70BF",
"#125C77",
"#4DC19C",
"#776E57",
"#17B8BE",
"#F6D18A",
"#B7885E",
"#FFCB99",
"#F89570",
"#829AE3",
"#E79FD5",
"#1E96BE",
"#89DAC1",
"#B3AD9E"
"#5A1846",
"#900C3F",
"#C70039",
"#E3611C",
"#F1920E",
"#FFC300"
]
},
"strokeColorRange": {

View File

@@ -1,5 +1,5 @@
_MAJOR = "0"
_MINOR = "2"
_MINOR = "1"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "0"

0
scripts/release.sh Executable file → Normal file
View File

View File

@@ -22,7 +22,6 @@ setup(
author_email="tech@bellingcat.com",
license="MIT",
packages=["geoclustering"],
package_data={"geoclustering": ["kepler_config.json"]},
keywords=["cluster", "gis", "pattern-analysis"],
entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]},
install_requires=[