fixing kepler json and version

Bump version to v0.1.0 for release
renaming to geoclustering due to pypi
2026-06-12 21:48:30 +03:00 · 2022-07-01 14:45:51 +02:00 · 2022-07-01 14:28:32 +02:00 · 2022-07-01 14:24:22 +02:00 · 2022-07-01 14:13:03 +02:00 · 2022-07-01 13:32:41 +02:00
10 changed files with 24 additions and 85 deletions
--- a/.github/actions/setup-venv/action.yml
+++ b/.github/actions/setup-venv/action.yml
@@ -30,7 +30,8 @@ runs:
      id: virtualenv-cache
      with:
        path: .venv
-        key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('Pipfile.lock') }}
+        key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
    - if: steps.virtualenv-cache.outputs.cache-hit != 'true'
      shell: bash
      run: |
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2022, Stichting Bellingcat
+Copyright (c) 2022, Felix Spöttel
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
+++ b/README.md
@@ -39,12 +39,6 @@ pip install .
 ```
 Usage: geoclustering [OPTIONS] FILENAME
  Tool to cluster geolocations. A cluster is created when a certain number of
  points (--size) each are within a given distance (--distance) of at least
  one other point in the cluster. Input is supplied as a csv file. At a
  minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
  reflected to the output.
 Options:
  -d, --distance FLOAT            (in km) Max. distance between two points in
                                  a cluster.  [required]
@@ -56,9 +50,6 @@ Options:
                                  Clustering algorithm to be used. `optics`
                                  produces tighter clusters but is slower.
                                  Default: dbscan
  --open                          Open the generated visualization in the
                                  default browser automatically.
  --debug                         Print debug output.
  --help                          Show this message and exit.
 ```
--- a/geoclustering/main.py
+++ b/geoclustering/main.py
@@ -1,6 +1,4 @@
 from pathlib import Path
 import click
 import os
 import webbrowser
 import geoclustering.clustering as clustering
@@ -8,9 +6,7 @@ import geoclustering.encoding as encoding
 import geoclustering.io as io
-@click.command(
+@click.command()
    help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
 )
@click.option(
    "--distance",
    "-d",
@@ -42,44 +38,26 @@ import geoclustering.io as io
    default="dbscan",
    help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
 )
@click.option(
    "--open",
    is_flag=True,
    help="Open the generated visualization in the default browser automatically.",
 )
@click.option("--debug", is_flag=True, help="Print debug output.")
@click.argument("filename", type=click.Path(exists=True))
-def main(distance, size, output, filename, algorithm, open, debug):
+def main(distance, size, output, filename, algorithm):
    def print_debug(s):
        if debug:
            click.secho(s, fg="bright_black")
    df = io.read_csv_file(filename)
    print_debug(f"Read {len(df)} valid coordinates from {Path(filename).absolute()}")
    clusters = clustering.cluster_locations(
        df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
    )
    if not bool(clusters):
-        click.secho("Did not find clusters matching input parameters.", fg="yellow")
+        click.echo("Did not find clusters matching input parameters.")
        return
    print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
    encoded = encoding.encode_clusters(clusters)
    io.write_output_file(output, "result.txt", encoded["string"])
    io.write_output_file(output, "result.json", encoded["json"])
    io.write_output_file(output, "result.geojson", encoded["geojson"])
    vis = io.write_visualization(output, "result.html", encoded["geojson"])
    click.echo(f"Output files saved to {Path(output).absolute()}")
-    if open:
+    webbrowser.open_new_tab("file://" + str(vis.absolute()))
        print_debug(f"Opening visualization in default browser")
        webbrowser.open_new_tab("file://" + str(vis.absolute()))
    click.secho("Clustering completed.", fg="green")
 if __name__ == "__main__":
--- a/geoclustering/clustering.py
+++ b/geoclustering/clustering.py
@@ -14,6 +14,8 @@ def to_cluster_dict(df, clustering):
    """
    clusters_by_id = {}
    print(clustering.labels_)
    for idx, cluster_id in enumerate(clustering.labels_):
        # ignore "noise" locations that don't belong to any cluster.
        if cluster_id > -1:
--- a/geoclustering/io.py
+++ b/geoclustering/io.py
@@ -2,22 +2,9 @@ from keplergl import KeplerGl
 from pathlib import Path
 from pkg_resources import resource_filename
 import json
 import json
 import pandas as pd
 import numpy as np
 import os
 import sys
 class HiddenPrints:
    """Disables stdout prints for a block of code."""
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, "w")
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
 def is_valid_lat(val: str) -> bool:
@@ -77,10 +64,7 @@ def write_output_file(dirname, filename, data):
 def write_visualization(dirname, filename, data):
    """Write a visualization, ensuring parent directories."""
-    # Hide kepler stdout output.
+    map = KeplerGl()
    with HiddenPrints():
        map = KeplerGl()
    map.add_data(data=data, name="clusters")
    # config configures a default color scheme for our clusters layer.
@@ -89,9 +73,6 @@ def write_visualization(dirname, filename, data):
        map.config = json.loads(f.read())
    filepath = ensure_file_path(dirname, filename)
-
+    map.save_to_html(file_name=str(filepath), center_map=True)
    # Hide kepler stdout output.
    with HiddenPrints():
        map.save_to_html(file_name=str(filepath), center_map=True)
    return filepath
--- a/geoclustering/kepler_config.json
+++ b/geoclustering/kepler_config.json
@@ -9,7 +9,7 @@
          "config": {
            "dataId": "clusters",
            "label": "clusters",
-            "color": [248, 149, 112],
+            "color": [179, 173, 158],
            "highlightColor": [252, 242, 26, 255],
            "columns": { "geojson": "_geojson" },
            "isVisible": true,
@@ -19,30 +19,16 @@
              "thickness": 0.5,
              "strokeColor": null,
              "colorRange": {
-                "name": "Uber Viz Qualitative 4",
+                "name": "Global Warming",
-                "type": "qualitative",
+                "type": "sequential",
                "category": "Uber",
                "colors": [
-                  "#12939A",
+                  "#5A1846",
-                  "#DDB27C",
+                  "#900C3F",
-                  "#88572C",
+                  "#C70039",
-                  "#FF991F",
+                  "#E3611C",
-                  "#F15C17",
+                  "#F1920E",
-                  "#223F9A",
+                  "#FFC300"
                  "#DA70BF",
                  "#125C77",
                  "#4DC19C",
                  "#776E57",
                  "#17B8BE",
                  "#F6D18A",
                  "#B7885E",
                  "#FFCB99",
                  "#F89570",
                  "#829AE3",
                  "#E79FD5",
                  "#1E96BE",
                  "#89DAC1",
                  "#B3AD9E"
                ]
              },
              "strokeColorRange": {
--- a/geoclustering/version.py
+++ b/geoclustering/version.py
@@ -1,8 +1,8 @@
 _MAJOR = "0"
-_MINOR = "2"
+_MINOR = "1"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "1"
+_PATCH = "2"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""
--- a/scripts/release.sh
+++ b/scripts/release.sh
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@ setup(
    author_email="tech@bellingcat.com",
    license="MIT",
    packages=["geoclustering"],
-    package_data={"geoclustering": ["kepler_config.json"]},
+    package_data={"geoclustering": ["kepler_config.json"]}
    keywords=["cluster", "gis", "pattern-analysis"],
    entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]},
    install_requires=[
Author	SHA1	Message	Date
msramalho	c6e3671a16	fixing kepler json and version	2022-07-01 14:45:51 +02:00
Miguel Ramalho	f287cb8d02	Bump version to v0.1.0 for release	2022-07-01 14:28:32 +02:00
msramalho	6f83246478	renaming to geoclustering due to pypi	2022-07-01 14:24:22 +02:00
msramalho	b02139c50f	test:release	2022-07-01 14:13:03 +02:00
msramalho	0c789c3335	cleanup	2022-07-01 13:32:41 +02:00
msramalho	55cdec2fc8	delete lint	2022-07-01 13:23:32 +02:00
msramalho	aa228bcde2	simplify	2022-07-01 13:22:39 +02:00
msramalho	fa4983aea6	main.yml fix	2022-07-01 13:13:22 +02:00
msramalho	2596b3d87c	trigger ga	2022-07-01 13:12:40 +02:00
Miguel Sozinho Ramalho	c91b0cd94d	Merge branch 'main' into feat-pypi-workflow	2022-07-01 12:12:07 +01:00
msramalho	e6f56d6c62	updates	2022-07-01 13:10:42 +02:00
msramalho	4c46ff44a8	vresion fix	2022-07-01 13:04:57 +02:00
msramalho	2e63491f72	steps	2022-07-01 13:03:12 +02:00
msramalho	03e132ff03	build dist	2022-07-01 13:02:08 +02:00
msramalho	3b47f2343d	on push	2022-07-01 12:48:19 +02:00
msramalho	6eb9007ece	testing workflow without	2022-07-01 12:46:49 +02:00