migrate gh artifact actions to v4 (#20 )

* migrate gh artifact actions to v4 from migration guide no breaking changes apply here. * updates pipfile.lock dependency versions * updates CI due to pytest issue see https://github.com/scipy/scipy/issues/22236 * bump to python 3.12 * revert to py3.10
Bump version to v0.4.1 for release
2026-06-12 05:28:29 +03:00 · 2025-01-09 15:47:27 +00:00 · 2022-09-27 14:49:17 +01:00 · 2022-09-27 14:49:04 +01:00 · 2022-09-27 14:43:05 +01:00 · 2022-09-27 14:41:48 +01:00
33 changed files with 4125 additions and 1241 deletions
--- a/.github/actions/setup-venv/action.yml
+++ b/.github/actions/setup-venv/action.yml
@@ -0,0 +1,53 @@
 name: Python virtualenv
 description: Set up a Python virtual environment with caching
 inputs:
  python-version:
    description: The Python version to use
    required: true
  cache-prefix:
    description: Update this to invalidate the cache
    required: true
    default: v0
 runs:
  using: composite
  steps:
    - name: Setup Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ inputs.python-version }}
    - shell: bash
      run: |
        # Install prerequisites.
        pip install --upgrade pip setuptools wheel virtualenv
    - shell: bash
      run: |
        # Get the exact Python version to use in the cache key.
        echo "PYTHON_VERSION=$(python --version)" >> $GITHUB_ENV
    - uses: actions/cache@v2
      id: virtualenv-cache
      with:
        path: .venv
        key: ${{ inputs.cache-prefix }}-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('Pipfile.lock') }}
    - if: steps.virtualenv-cache.outputs.cache-hit != 'true'
      shell: bash
      run: |
        # Set up virtual environment without cache hit.
        test -d .venv || virtualenv -p $(which python) --copies --reset-app-data .venv
        . .venv/bin/activate
        pip install -e .[dev]
    - if: steps.virtualenv-cache.outputs.cache-hit == 'true'
      shell: bash
      run: |
        # Set up virtual environment from cache hit.
        . .venv/bin/activate
        pip install --no-deps -e .[dev]
    - shell: bash
      run: |
        # Show environment info.
        . .venv/bin/activate
        echo "✓ Installed $(python --version) virtual environment to $(which python)"
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -1,10 +0,0 @@
 name: Lint
 on: [push]
 jobs:
  black:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - uses: psf/black@stable
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -0,0 +1,119 @@
 name: Main
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
 # on: [push]
 on:
  pull_request:
    branches:
      - main
  push:
    branches:
      - main
    tags:
      - "v*.*.*"
 env:
  # Change this to invalidate existing cache.
  CACHE_PREFIX: v0
  PYTHONPATH: ./
 jobs:
  checks:
    name: Python ${{ matrix.python }} - ${{ matrix.task.name }}
    runs-on: [ubuntu-latest]
    timeout-minutes: 15
    strategy:
      fail-fast: false
      matrix:
        include:
          - python: "3.10"
            task:
              name: "Build"
              run: |
                python setup.py check
                python setup.py bdist_wheel sdist
          - python: "3.10"
            task:
              name: "Lint"
              run: |
                black --check .
          - python: "3.10"
            task:
              name: "Test"
              run: pytest --exitfirst --failed-first --assert=plain
          - python: "3.8"
            task:
              name: "Test (3.8)"
              run: pytest --exitfirst --failed-first --assert=plain
    steps:
      - uses: actions/checkout@v3
      - name: Setup Python environment
        uses: ./.github/actions/setup-venv
        with:
          python-version: ${{ matrix.python }}
          cache-prefix: ${{ env.CACHE_PREFIX }}
      - name: ${{ matrix.task.name }}
        run: |
          . .venv/bin/activate
          ${{ matrix.task.run }}
      - name: Upload package distribution files
        if: matrix.task.name == 'Build'
        uses: actions/upload-artifact@v4
        with:
          name: package
          path: dist
      - name: Clean up
        if: always()
        run: |
          . .venv/bin/activate
          pip uninstall -y geoclustering
  release:
    name: Release
    runs-on: ubuntu-latest
    needs: [checks]
    if: startsWith(github.ref, 'refs/tags/')
    steps:
      - uses: actions/checkout@v1
      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"
      - name: Install requirements
        run: |
          pip install --upgrade pip setuptools wheel "twine>=1.11.0"
      - name: Prepare environment
        run: |
          echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV
          echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
      - name: Download package distribution files
        uses: actions/download-artifact@v4
        with:
          name: package
          path: dist
      - name: Publish package to PyPI
        run: |
          twine upload -u '${{ secrets.PYPI_USERNAME }}' -p '${{ secrets.PYPI_PASSWORD }}' dist/*
      - name: Publish GitHub release
        uses: softprops/action-gh-release@v1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          # body_path: ${{ github.workspace }}-RELEASE_NOTES.md
          prerelease: ${{ contains(env.TAG, 'rc') }}
          files: |
            dist/*
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
 repos:
  - repo: https://github.com/psf/black
    rev: 22.3.0
    hooks:
      - id: black
        # It is recommended to specify the latest version of Python
        # supported by your project here, or alternatively use
        # pre-commit's default_language_version, see
        # https://pre-commit.com/#top_level-default_language_version
        language_version: python3.9
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2022, Felix Spöttel
+Copyright (c) 2022, Stichting Bellingcat
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/22
+++ b/22
@@ -0,0 +1,22 @@
 [[source]]
 url = "https://pypi.org/simple"
 verify_ssl = true
 name = "pypi"
 [packages]
 click = "*"
 geojson = "*"
 keplergl = "*"
 numpy = "*"
 pandas = "*"
 scikit-learn = "*"
 [dev-packages]
 black = "*"
 pre-commit = "*"
 pytest = "*"
 wheel = "*"
 geoclustering = {editable = true, path = "."}
 [requires]
 python_version = "3.9"
--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# geocluster
+# geoclustering
 > 📍 command-line tool for clustering geolocations.
@@ -10,34 +10,38 @@
 ### Clustering Method
-A cluster is created when a certain number of points (=> `--size`) each are within a given distance (=> `--distance`) of at least one other point in the cluster. 
+A cluster is created when a certain number of points (defined with `--size`) each are within a given distance (defined with `--distance`) of at least one other point in the cluster. 
 ## Install
-Clone the repository:
+Install with pip:
 ```sh
-git clone https://github.com/fspoettel/geocluster
+# with kepler.gl visualization support
-cd geocluster
+pip install geoclustering[full]
 # only text-based output
 pip install geoclustering
 ```
-Install keplergl build dependencies:
+If the `full` install fails, you might need to install kepler.gl build dependencies:
 ```sh
 # macos
 brew install proj gdal
 ```
 Install project with pip:
 ```sh
 pip install .
 ```
 ## Usage
 ```
-Usage: geocluster [OPTIONS] FILENAME
+Usage: geoclustering [OPTIONS] FILENAME
  Tool to cluster geolocations. A cluster is created when a certain number of
  points (defined with --size) each are within a given distance (defined with
  --distance) of at least one other point in the cluster. Input is supplied as
  a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column.
  Other rows are reflected to the output.
 Options:
  -d, --distance FLOAT            (in km) Max. distance between two points in
@@ -50,12 +54,15 @@ Options:
                                  Clustering algorithm to be used. `optics`
                                  produces tighter clusters but is slower.
                                  Default: dbscan
  --open                          Open the generated visualization in the
                                  default browser automatically.
  --debug                         Print debug output.
  --help                          Show this message and exit.
 ```
 ## Input
-Inputs are supplied as a `.csv` file. The only required fields are `lat` and `lon`, all other fields are reflected to the output.
+Inputs are supplied as a `.csv` file. At a minimum, each row needs to have a `lat` and a `lon`` column. Other rows are reflected to the output.
 ```csv
 id,name,lat,lon
@@ -65,7 +72,7 @@ id,name,lat,lon
 ## Output
-If at least one cluster was found, the tool outputs a folder with `json`, `geojson`, `text` and a kepler.gl `html` files.
+If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt`, `csv` files. A kepler.gl `html` file is generated as well.
 ### JSON
@@ -114,7 +121,7 @@ Encodes a single `FeatureCollection`, containing all points as `Feature` objects
 }
 ```
-### txt
+### Text
 Encodes cluster as blocks separated by a newline, where each line in a cluster block contains one point.
@@ -125,6 +132,39 @@ id 9, name Rosanna Foggo, lat -6.2074293, lon 106.8915948
 // ...
 ```
 ### CSV
 Encodes each event in one line with `cluster_id` information associated.
 ```csv
 cluster_id,name,lat,lon
 9,Rosanna Foggo,-6.2074293,106.8915948
 ...
 ```
 ### kepler.gl
 ![kepler.gl instance](https://user-images.githubusercontent.com/1682504/176478177-c0446b51-4060-495c-803d-79e2bbd3e966.png)
 ## Develop
 It is assumed that you are using **Python3.9+**. It is encouraged to [setup a virtualenv](https://wiki.archlinux.org/title/Python/Virtual_environment#venv>) for development.
 ```sh
    # install dependencies & dev-dependencies
    # PIP
    pip install -e .[dev,full]
    # PIPENV
    pipenv install --dev -e .
    # install a git hook that runs the code formatter before each commit.
    pre-commit install
 ```
 We use [Black](https://github.com/psf/black) as our code formatter. If you don't want to use the `pre-commit` hook, you can run the formatter manually or via an editor plugin.
 ## Release
 1. Update [version.py](geoclustering/version.py)
 2. Run `scripts/release.sh` 
 3. Confirm GH action completed successfully
--- a/geocluster/main.py
+++ b/geocluster/main.py
@@ -1,64 +0,0 @@
 import click
 import webbrowser
 import geocluster.clustering as clustering
 import geocluster.encoding as encoding
 import geocluster.io as io
@click.command()
@click.option(
    "--distance",
    "-d",
    type=click.FLOAT,
    required=True,
    help="(in km) Max. distance between two points in a cluster.",
 )
@click.option(
    "--size",
    "-s",
    type=click.INT,
    required=True,
    help="Min. number of points in a cluster.",
 )
@click.option(
    "--output",
    "-o",
    type=click.Path(exists=False),
    default="output",
    help="Output directory for results. Default: ./output",
 )
@click.option(
    "--algorithm",
    "-a",
    type=click.Choice(
        ["dbscan", "optics"],
        case_sensitive=False,
    ),
    default="dbscan",
    help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
 )
@click.argument("filename", type=click.Path(exists=True))
 def main(distance, size, output, filename, algorithm):
    df = io.read_csv_file(filename)
    clusters = clustering.cluster_locations(
        df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
    )
    if not bool(clusters):
        click.echo("Did not find clusters matching input parameters.")
        return
    encoded = encoding.encode_clusters(clusters)
    io.write_output_file(output, "result.txt", encoded["string"])
    io.write_output_file(output, "result.json", encoded["json"])
    io.write_output_file(output, "result.geojson", encoded["geojson"])
    vis = io.write_visualization(output, "result.html", encoded["geojson"])
    webbrowser.open_new_tab("file://" + str(vis.absolute()))
 if __name__ == "__main__":
    main()
--- a/geocluster/io.py
+++ b/geocluster/io.py
@@ -1,78 +0,0 @@
 from keplergl import KeplerGl
 from pathlib import Path
 from pkg_resources import resource_filename
 import json
 import json
 import pandas as pd
 import numpy as np
 def is_valid_lat(val: str) -> bool:
    """Given a string, check if it corresponds to a valid decimal latitude value"""
    try:
        val = float(val)
        return val >= -90 and val <= 90
    except:
        return False
 def is_valid_lon(val: str) -> bool:
    """Given a string, check if it corresponds to a valid decimal longitude value"""
    try:
        val = float(val)
        return val >= -180 and val <= 180
    except:
        return False
 def read_csv_file(filename):
    """Read input csv file, dropping rows that don't have valid location data."""
    df = pd.read_csv(filename)
    initial_rows = len(df)
    df = df.dropna(subset=["lat", "lon"])
    df = df.replace(
        {np.nan: None}
    )  # replace for other fields not to break kepler parsing
    print(f"Ignored {initial_rows - len(df)} coordinates with NaN")
    valid_index = df.lat.astype(str).apply(is_valid_lat) & df.lon.astype(str).apply(
        is_valid_lon
    )
    if len(df_invalid := df[~valid_index]):
        print(f"Found {len(df_invalid)} invalid coordinate pairs, ignoring:")
        print(df_invalid[["lat", "lon"]].to_string())
    return df[valid_index]
 def ensure_file_path(dirname, filename):
    """Ensure a parent directory exists for a file."""
    path = Path(dirname)
    path.mkdir(parents=True, exist_ok=True)
    return path / filename
 def write_output_file(dirname, filename, data):
    """Write a file, ensuring parent directories."""
    filepath = ensure_file_path(dirname, filename)
    with open(filepath, "w") as f:
        f.write(data)
    return filepath
 def write_visualization(dirname, filename, data):
    """Write a visualization, ensuring parent directories."""
    map = KeplerGl()
    map.add_data(data=data, name="clusters")
    # config configures a default color scheme for our clusters layer.
    config_file = resource_filename("geocluster", "kepler_config.json")
    with open(config_file) as f:
        map.config = json.loads(f.read())
    filepath = ensure_file_path(dirname, filename)
    map.save_to_html(file_name=str(filepath), center_map=True)
    return filepath
--- a/geoclustering/init.py
+++ b/geoclustering/init.py
--- a/geoclustering/main.py
+++ b/geoclustering/main.py
@@ -0,0 +1,96 @@
 from pathlib import Path
 import click
 import webbrowser
 import geoclustering.clustering as clustering
 import geoclustering.encoding as encoding
 import geoclustering.io as io
@click.command(
    help="Tool to cluster geolocations. A cluster is created when a certain number of points (defined with --size) each are within a given distance (defined with --distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
 )
@click.option(
    "--distance",
    "-d",
    type=click.FLOAT,
    required=True,
    help="(in km) Max. distance between two points in a cluster.",
 )
@click.option(
    "--size",
    "-s",
    type=click.INT,
    required=True,
    help="Min. number of points in a cluster.",
 )
@click.option(
    "--output",
    "-o",
    type=click.Path(exists=False),
    default="output",
    help="Output directory for results. Default: ./output",
 )
@click.option(
    "--algorithm",
    "-a",
    type=click.Choice(
        ["dbscan", "optics"],
        case_sensitive=False,
    ),
    default="dbscan",
    help="Clustering algorithm to be used. `optics` produces tighter clusters but is slower. Default: dbscan",
 )
@click.option(
    "--open",
    "_open",
    is_flag=True,
    help="Open the generated visualization in the default browser automatically.",
 )
@click.option("--debug", is_flag=True, help="Print debug output.")
@click.argument("filename", type=click.Path(exists=True))
 def main(distance, size, output, filename, algorithm, _open, debug):
    def print_debug(s):
        if debug:
            click.secho(s, fg="bright_black")
    df = io.read_csv_file(filename)
    print_debug(f"Read {len(df)} valid coordinates from {Path(filename).absolute()}")
    clusters = clustering.cluster_locations(
        df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
    )
    if not bool(clusters):
        click.secho("Did not find clusters matching input parameters.", fg="yellow")
        return
    print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
    encoded = encoding.encode_clusters(clusters)
    io.write_output_file(output, "result.txt", encoded["string"])
    io.write_output_file(output, "result.json", encoded["json"])
    io.write_output_file(output, "result.geojson", encoded["geojson"])
    io.write_output_file(output, "result.csv", encoded["csv"])
    vis = io.write_visualization(output, "result.html", encoded["geojson"])
    if vis is None:
        print_debug("Skipped generating visualization: kepler is not installed.")
    click.echo(f"Output files saved to {Path(output).absolute()}")
    if _open:
        if vis:
            webbrowser.open_new_tab("file://" + str(vis.absolute()))
            print_debug("Opened visualization in default browser.")
        else:
            click.secho(
                "Can't open kepler.gl: package not installed. Please re-install geoclustering with `pip install geoclustering[full]`.",
                fg="yellow",
            )
    click.secho("Clustering completed.", fg="green")
 if __name__ == "__main__":
    main()
--- a/geoclustering/clustering.py
+++ b/geoclustering/clustering.py
@@ -14,8 +14,6 @@ def to_cluster_dict(df, clustering):
    """
    clusters_by_id = {}
    print(clustering.labels_)
    for idx, cluster_id in enumerate(clustering.labels_):
        # ignore "noise" locations that don't belong to any cluster.
        if cluster_id > -1:
--- a/geoclustering/encoding.py
+++ b/geoclustering/encoding.py
@@ -1,6 +1,8 @@
 import json
 import numpy as np
 import geojson
 import csv
 import io  # not io.py
 class NpEncoder(json.JSONEncoder):
@@ -47,7 +49,7 @@ class JSONEncoder:
        for record in cluster:
            cluster_data["points"].append(record)
-            self.state.append(cluster_data)
+        self.state.append(cluster_data)
    def get(self):
        return json.dumps(self.state, cls=NpEncoder)
@@ -74,13 +76,37 @@ class GeoJSONEncoder:
        return json.dumps(geojson.FeatureCollection(self.state), cls=NpEncoder)
 class CSVEncoder:
    """Encodes clustering result as a CSV"""
    def __init__(self):
        self.state = io.StringIO()
        self.writer = False
    def visitor(self, cluster_id, cluster):
        if not self.writer:
            self.writer = csv.DictWriter(
                self.state,
                fieldnames=["cluster_id"] + list(cluster[0].keys()),
                quoting=csv.QUOTE_NONNUMERIC,
                lineterminator="\n",
            )
            self.writer.writeheader()
        for record in cluster:
            self.writer.writerow({**record, "cluster_id": cluster_id})
    def get(self):
        return self.state.getvalue()
 def encode_clusters(clusters):
    json_encoder = JSONEncoder()
    geojson_encoder = GeoJSONEncoder()
    string_encoder = StringEncoder()
    csv_encoder = CSVEncoder()
-    encoders = [json_encoder, geojson_encoder, string_encoder]
+    encoders = [json_encoder, geojson_encoder, string_encoder, csv_encoder]
    for cluster_id, cluster in clusters.items():
        for encoder in encoders:
            encoder.visitor(cluster_id, cluster)
@@ -89,4 +115,5 @@ def encode_clusters(clusters):
        "json": json_encoder.get(),
        "geojson": geojson_encoder.get(),
        "string": string_encoder.get(),
        "csv": csv_encoder.get(),
    }
--- a/geoclustering/io.py
+++ b/geoclustering/io.py
@@ -0,0 +1,120 @@
 from pathlib import Path
 from pkg_resources import resource_filename
 import json
 import pandas as pd
 import numpy as np
 import os
 import sys
 # kepler is optional, check if installed.
 try:
    from keplergl import KeplerGl
 except:
    has_kepler = False
 else:
    has_kepler = True
 class HiddenPrints:
    """Disables stdout prints for a block of code."""
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, "w")
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
 def is_valid_lat(val: str) -> bool:
    """Given a string, check if it corresponds to a valid decimal latitude value"""
    try:
        val = float(val)
        return val >= -90 and val <= 90
    except:
        return False
 def is_valid_lon(val: str) -> bool:
    """Given a string, check if it corresponds to a valid decimal longitude value"""
    try:
        val = float(val)
        return val >= -180 and val <= 180
    except:
        return False
 def is_not_none(val: any) -> bool:
    return val is not None
 def read_csv_file(filename):
    """Read input csv file, dropping rows that don't have valid location data."""
    # replace NaN for all fields not to break kepler parsing.
    df = pd.read_csv(filename).replace({np.nan: None})
    # construct an index of values with valid lat & lon.
    valid_index = df.lat.apply(is_valid_lat) & df.lon.apply(is_valid_lon)
    df_invalid = df[~valid_index]
    count_invalid = len(df_invalid)
    if count_invalid:
        df_not_empty = df_invalid[
            (df_invalid.lat.apply(is_not_none) | df_invalid.lon.apply(is_not_none))
        ]
        count_not_empty = len(df_not_empty)
        count_empty = count_invalid - count_not_empty
        if count_empty:
            print(f"Removed {count_empty} empty coordinate pairs.")
        if count_not_empty:
            print(f"Removed {count_not_empty} invalid coordinate pairs:")
            print(df_not_empty[["lat", "lon"]].to_string())
    return df[valid_index]
 def ensure_file_path(dirname, filename):
    """Ensure a parent directory exists for a file."""
    path = Path(dirname)
    path.mkdir(parents=True, exist_ok=True)
    return path / filename
 def write_output_file(dirname, filename, data):
    """Write a file, ensuring parent directories."""
    filepath = ensure_file_path(dirname, filename)
    with open(filepath, "w") as f:
        f.write(data)
    return filepath
 def write_visualization(dirname, filename, data):
    """Write a visualization, ensuring parent directories."""
    if not has_kepler:
        return None
    # Hide kepler stdout output.
    with HiddenPrints():
        map = KeplerGl()
    map.add_data(data=data, name="clusters")
    # config configures a default color scheme for our clusters layer.
    config_file = resource_filename("geoclustering", "kepler_config.json")
    with open(config_file) as f:
        map.config = json.loads(f.read())
    filepath = ensure_file_path(dirname, filename)
    # Hide kepler stdout output.
    with HiddenPrints():
        map.save_to_html(file_name=str(filepath), center_map=True)
    return filepath
--- a/geoclustering/kepler_config.json
+++ b/geoclustering/kepler_config.json
@@ -9,7 +9,7 @@
          "config": {
            "dataId": "clusters",
            "label": "clusters",
-            "color": [179, 173, 158],
+            "color": [248, 149, 112],
            "highlightColor": [252, 242, 26, 255],
            "columns": { "geojson": "_geojson" },
            "isVisible": true,
@@ -19,16 +19,30 @@
              "thickness": 0.5,
              "strokeColor": null,
              "colorRange": {
-                "name": "Global Warming",
+                "name": "Uber Viz Qualitative 4",
-                "type": "sequential",
+                "type": "qualitative",
                "category": "Uber",
                "colors": [
-                  "#5A1846",
+                  "#12939A",
-                  "#900C3F",
+                  "#DDB27C",
-                  "#C70039",
+                  "#88572C",
-                  "#E3611C",
+                  "#FF991F",
-                  "#F1920E",
+                  "#F15C17",
-                  "#FFC300"
+                  "#223F9A",
                  "#DA70BF",
                  "#125C77",
                  "#4DC19C",
                  "#776E57",
                  "#17B8BE",
                  "#F6D18A",
                  "#B7885E",
                  "#FFCB99",
                  "#F89570",
                  "#829AE3",
                  "#E79FD5",
                  "#1E96BE",
                  "#89DAC1",
                  "#B3AD9E"
                ]
              },
              "strokeColorRange": {
--- a/geoclustering/version.py
+++ b/geoclustering/version.py
@@ -0,0 +1,11 @@
 _MAJOR = "0"
 _MINOR = "4"
 # On main and in a nightly release the patch should be one ahead of the last
 # released build.
 _PATCH = "1"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""
 VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
 VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)
--- a/pytest.ini
+++ b/pytest.ini
@@ -0,0 +1,3 @@
 [pytest]
 testpaths = tests/
 python_files = *.py
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -0,0 +1,18 @@
 #!/bin/bash
 set -e
 TAG=$(python -c 'from geoclustering.version import VERSION; print("v" + VERSION)')
 read -p "Creating new release for $TAG. Do you want to continue? [Y/n] " prompt
 if [[ $prompt == "y" || $prompt == "Y" || $prompt == "yes" || $prompt == "Yes" ]]; then
    git add -A
    git commit -m "Bump version to $TAG for release" || true && git push
    echo "Creating new git tag $TAG"
    git tag "$TAG" -m "$TAG"
    git push --tags
 else
    echo "Cancelled"
    exit 1
 fi
--- a/setup.py
+++ b/setup.py
@@ -1,21 +1,41 @@
 from setuptools import setup
 # version.py defines the VERSION and VERSION_SHORT variables.
 # We use exec here so we don't import cached_path whilst setting up.
 VERSION = {}  # type: ignore
 with open("geoclustering/version.py", "r") as version_file:
    exec(version_file.read(), VERSION)
 setup(
-    name="geocluster",
+    name="geoclustering",
-    version="0.1",
+    version=VERSION["VERSION"],
-    description="",
+    description="📍 command-line tool for clustering geolocations.",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    classifiers=[
        "Intended Audience :: Developers",
        "Intended Audience :: Science/Research",
        "License :: OSI Approved :: MIT License",
        "Programming Language :: Python :: 3",
    ],
    author="Bellingcat",
-    packages=["geocluster"],
+    author_email="tech@bellingcat.com",
-    entry_points={"console_scripts": ["geocluster = geocluster.__main__:main"]},
+    license="MIT",
    packages=["geoclustering"],
    package_data={"geoclustering": ["kepler_config.json"]},
    keywords=["cluster", "gis", "pattern-analysis"],
    entry_points={"console_scripts": ["geoclustering = geoclustering.__main__:main"]},
    install_requires=[
        "click",
        "geojson",
        "keplergl",
        "numpy",
        "pandas",
        "scikit-learn",
    ],
-    extras_require={"dev": ["black", "wheel"]},
+    extras_require={
        "dev": ["black", "wheel", "pre-commit", "pytest"],
        "full": ["keplergl"],
    },
    include_package_data=True,
    zip_safe=False,
 )
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/clustering.py
+++ b/tests/clustering.py
@@ -0,0 +1,41 @@
 from geoclustering.clustering import cluster_locations
 from tests.helpers import read_fixture_csv
 df = read_fixture_csv("clustering.csv")
 def has_member(list, name):
    return any(x for x in list if x["name"] == name)
 def test_clustering_all():
    # there should be one cluster with all members but Erin.
    res = cluster_locations(
        df=df, algorithm="dbscan", radius_km=1.97, min_cluster_size=4
    )
    assert len(res.values()) == 1
    assert len(res[0]) == 4
 def test_clustering_split():
    res = cluster_locations(
        df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=2
    )
    # there should be two cluster: Alice & Bob and Carol & Dan
    assert len(res.values()) == 2
    cluster_one = res[0]
    cluster_two = res[1]
    assert len(cluster_one) == 2
    assert has_member(cluster_one, "Alice")
    assert has_member(cluster_one, "Bob")
    assert has_member(cluster_two, "Carol")
    assert has_member(cluster_two, "Dan")
 def test_clustering_none():
    # there should be no clusters now.
    res = cluster_locations(
        df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=3
    )
    assert len(res.values()) == 0
--- a/tests/encoding.py
+++ b/tests/encoding.py
@@ -0,0 +1,30 @@
 from geoclustering.encoding import encode_clusters
 from tests.helpers import read_fixture_csv, read_fixture_content
 df = read_fixture_csv("clustering.csv")
 def test_encoders():
    clusters = {
        0: [
            {"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362},
            {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097},
        ],
        1: [
            {"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246},
            {
                "id": 4,
                "name": "Dan",
                "lat": 52.52443559865125,
                "lon": 13.41261723049818,
            },
        ],
    }
    res = encode_clusters(clusters)
    assert res["string"] == read_fixture_content("snapshots/result.txt")
    assert res["json"] == read_fixture_content("snapshots/result.json")
    assert res["geojson"] == read_fixture_content("snapshots/result.geojson")
    assert res["csv"] == read_fixture_content("snapshots/result.csv")
--- a/tests/fixtures/clustering.csv
+++ b/tests/fixtures/clustering.csv
@@ -0,0 +1,6 @@
 id,name,lat,lon
 1,Alice,52.523955,13.442362
 2,Bob,52.526659,13.448097
 3,Carol,52.525626,13.419246
 4,Dan,52.52443559865125,13.41261723049818
 5,Erin,52.524838991760774,13.383188597040382
--- a/tests/fixtures/io.csv
+++ b/tests/fixtures/io.csv
@@ -0,0 +1,9 @@
 id,name,lat,lon
 1,Alice,,
 2,,52.523955,13.442362
 ,,-90.12,132.23
 4,,78.234,-180.1212
 5,Bob,52.524838991760774,13.383188597040382
 6,Peter,91.234,
 7,Horst,,23.23
 7,Erin,foo,bar
--- a/tests/fixtures/mock1000.csv
+++ b/tests/fixtures/mock1000.csv
--- a/tests/fixtures/mock50.csv
+++ b/tests/fixtures/mock50.csv
@@ -1,51 +1,51 @@
-id,name,lat,lon
+id,name,lat,lon
-1,Bonnibelle Mathwen,40.1324085,64.4911086
+1,Bonnibelle Mathwen,40.1324085,64.4911086
-2,Fayette Elt,49.6235379,6.2379992
+2,Fayette Elt,49.6235379,6.2379992
-3,Jandy Cooch,-7.5874497,110.7420464
+3,Jandy Cooch,-7.5874497,110.7420464
-4,Robb Gerbel,22.2455315,-80.3936994
+4,Robb Gerbel,22.2455315,-80.3936994
-5,Silvie Clipson,40.3418956,21.5118754
+5,Silvie Clipson,40.3418956,21.5118754
-6,Kristina Izakoff,30.741991,121.341969
+6,Kristina Izakoff,30.741991,121.341969
-7,Ricky Sweeting,11.2666664,122.5333328
+7,Ricky Sweeting,11.2666664,122.5333328
-8,Quintin Hazart,35.119385,109.167435
+8,Quintin Hazart,35.119385,109.167435
-9,Sholom Kilmister,55.7393377,37.6642542
+9,Sholom Kilmister,55.7393377,37.6642542
-10,Misty Dooher,49.9776657,20.9421091
+10,Misty Dooher,49.9776657,20.9421091
-11,Knox Phython,-8.4985,123.5226
+11,Knox Phython,-8.4985,123.5226
-12,Shay Davidy,14.4142191,120.9495257
+12,Shay Davidy,14.4142191,120.9495257
-13,Dre Benoey,-31.4561755,-64.2111608
+13,Dre Benoey,-31.4561755,-64.2111608
-14,Prudi Tomek,40.692169,117.163821
+14,Prudi Tomek,40.692169,117.163821
-15,Evey Ealam,31.123586,114.893666
+15,Evey Ealam,31.123586,114.893666
-16,Norry Urch,45.8022541,17.497172
+16,Norry Urch,45.8022541,17.497172
-17,Valerye Dumberell,50.4438122,48.1450932
+17,Valerye Dumberell,50.4438122,48.1450932
-18,Freddy Furtado,58.3767785,11.6764538
+18,Freddy Furtado,58.3767785,11.6764538
-19,Catarina Samett,50.4034992,26.141892
+19,Catarina Samett,50.4034992,26.141892
-20,Lidia Muckian,-38.7359018,-72.5903739
+20,Lidia Muckian,-38.7359018,-72.5903739
-21,Stacey Dockrey,29.741986,106.273576
+21,Stacey Dockrey,29.741986,106.273576
-22,Norri Bonhill,60.6184239,16.7769535
+22,Norri Bonhill,60.6184239,16.7769535
-23,Florence Pretsel,55.96667,25.15
+23,Florence Pretsel,55.96667,25.15
-24,Marten Matantsev,50.9603536,14.3596743
+24,Marten Matantsev,50.9603536,14.3596743
-25,Claiborn Everall,43.884893,-0.5046003
+25,Claiborn Everall,43.884893,-0.5046003
-26,Randolf Hailey,49.4679131,18.2282007
+26,Randolf Hailey,49.4679131,18.2282007
-27,Meggi Kirkebye,57.6888453,11.9943311
+27,Meggi Kirkebye,57.6888453,11.9943311
-28,Denna Le Grove,16.7124054,98.5746649
+28,Denna Le Grove,16.7124054,98.5746649
-29,Randy Verheijden,40.4722617,-7.9751886
+29,Randy Verheijden,40.4722617,-7.9751886
-30,Caterina Blancowe,35.422892,103.352654
+30,Caterina Blancowe,35.422892,103.352654
-31,Joanne Adamovitch,55.9251242,39.4489055
+31,Joanne Adamovitch,55.9251242,39.4489055
-32,Orazio Coppins,,111.6556388
+32,Orazio Coppins,,111.6556388
-33,Anastassia Bennedsen,45.212088,130.478187
+33,Anastassia Bennedsen,45.212088,130.478187
-34,Linoel Ruggier,22.066171,107.781956
+34,Linoel Ruggier,22.066171,107.781956
-35,Paulina Moralis,-11.806679,-77.1657716
+35,Paulina Moralis,-11.806679,-77.1657716
-36,Ambur Outhwaite,59.4033695,17.9443213
+36,Ambur Outhwaite,59.4033695,17.9443213
-37,Laetitia Aspland,37.6086169,138.9089988
+37,Laetitia Aspland,37.6086169,138.9089988
-38,Dew Moxstead,6.1317011,-75.6382657
+38,Dew Moxstead,6.1317011,-75.6382657
-39,Berna Klaiser,40.1394691,-8.3092933
+39,Berna Klaiser,40.1394691,-8.3092933
-40,Krystle Ingold,7.1518505,0.4738293
+40,Krystle Ingold,7.1518505,0.4738293
-41,Cassaundra Cuffin,56.6342788,36.885813
+41,Cassaundra Cuffin,56.6342788,36.885813
-42,Malanie Harpin,46.9,109.75
+42,Malanie Harpin,46.9,109.75
-43,Laurence Stothart,39.912765,116.18362
+43,Laurence Stothart,39.912765,116.18362
-44,Luz O'Siaghail,40.4476834,25.5917918
+44,Luz O'Siaghail,40.4476834,25.5917918
-45,Brittni Garrod,59.0836123,16.18741
+45,Brittni Garrod,59.0836123,16.18741
-46,Karlie Semrad,-8.793392,121.9330894
+46,Karlie Semrad,-8.793392,121.9330894
-47,Leigh Allderidge,45.768045,15.947739
+47,Leigh Allderidge,45.768045,15.947739
-48,Ashlin Gogerty,50.3250139,34.9100068
+48,Ashlin Gogerty,50.3250139,34.9100068
-49,Mozelle De Launde,53.31611,40.70806
+49,Mozelle De Launde,53.31611,40.70806
-50,Ema le Keux,41.6315023,19.9310781
+50,Ema le Keux,41.6315023,19.9310781
--- a/tests/fixtures/snapshots/result.csv
+++ b/tests/fixtures/snapshots/result.csv
@@ -0,0 +1,5 @@
 "cluster_id","id","name","lat","lon"
 0,1,"Alice",52.523955,13.442362
 0,2,"Bob",52.526659,13.448097
 1,3,"Carol",52.525626,13.419246
 1,4,"Dan",52.52443559865125,13.41261723049818
--- a/tests/fixtures/snapshots/result.geojson
+++ b/tests/fixtures/snapshots/result.geojson
@@ -0,0 +1 @@
 {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.442362, 52.523955]}, "properties": {"id": 1, "name": "Alice", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.448097, 52.526659]}, "properties": {"id": 2, "name": "Bob", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.419246, 52.525626]}, "properties": {"id": 3, "name": "Carol", "cluster_id": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.412617, 52.524436]}, "properties": {"id": 4, "name": "Dan", "cluster_id": 1}}]}
--- a/tests/fixtures/snapshots/result.json
+++ b/tests/fixtures/snapshots/result.json
@@ -0,0 +1 @@
 [{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]
--- a/tests/fixtures/snapshots/result.txt
+++ b/tests/fixtures/snapshots/result.txt
@@ -0,0 +1,7 @@
 Cluster 0
 id 1, name Alice, lat 52.523955, lon 13.442362
 id 2, name Bob, lat 52.526659, lon 13.448097
 Cluster 1
 id 3, name Carol, lat 52.525626, lon 13.419246
 id 4, name Dan, lat 52.52443559865125, lon 13.41261723049818
--- a/tests/helpers.py
+++ b/tests/helpers.py
@@ -0,0 +1,16 @@
 import os
 from geoclustering.io import read_csv_file
 def get_fixture_path(filename):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    return os.path.join(dir_path, "fixtures", filename)
 def read_fixture_csv(filename):
    return read_csv_file(get_fixture_path(filename))
 def read_fixture_content(filename):
    with open(get_fixture_path(filename)) as f:
        return f.read()
--- a/tests/io.py
+++ b/tests/io.py
@@ -0,0 +1,25 @@
 from pathlib import Path
 import shutil
 from geoclustering.io import write_output_file
 from tests.helpers import read_fixture_csv
 def test_csv_filters():
    df = read_fixture_csv("io.csv")
    # entries 2 & 5 in fixture are valid.
    assert len(df) == 2
    assert df.iloc[0]["name"] == None
    assert df.iloc[1]["name"] == "Bob"
 def test_write_output_file():
    p = "./this/dir/does/not/exist"
    f = "test.txt"
    write_output_file(p, f, "test")
    path = Path(p) / f
    with open(path) as f:
        assert f.read() == "test"
    shutil.rmtree(Path("./this"))
Author	SHA1	Message	Date
Miguel Sozinho Ramalho	e9b7680263	migrate gh artifact actions to v4 (#20 ) * migrate gh artifact actions to v4 from migration guide no breaking changes apply here. * updates pipfile.lock dependency versions * updates CI due to pytest issue see https://github.com/scipy/scipy/issues/22236 * bump to python 3.12 * revert to py3.10	2025-01-09 15:47:27 +00:00
msramalho	de4d4689b9	Bump version to v0.4.1 for release	2022-09-27 14:49:17 +01:00
msramalho	484d3cb02c	adds: tests for csv	2022-09-27 14:49:04 +01:00
msramalho	65366816fa	updates readme with release info	2022-09-27 14:43:05 +01:00
msramalho	de91354867	Bump version to v0.4.0 for release	2022-09-27 14:41:48 +01:00
Kashyap Maheshwari	e9a7519168	Add new output format: csv with cluster info (#18 ) Co-authored-by: msramalho <19508417+msramalho@users.noreply.github.com>	2022-09-27 14:39:50 +01:00
msramalho	dc7e12642e	adds dev pipenv instructions	2022-09-27 13:59:49 +01:00
msramalho	93c51d7a80	closes #17	2022-09-27 13:58:17 +01:00
msramalho	f77d1d9d62	closes #17	2022-09-27 13:58:00 +01:00
Felix Spöttel	99e844c6ce	fix: compatibility with python < 3.8 (#16 ) * ci: run tests in python 3.7 as well	2022-07-07 10:21:21 +02:00
msramalho	ff094a1d3e	fix: unused import, protected keyword use	2022-07-05 16:36:54 +02:00
Felix Spöttel	926aaf73d6	Bump version to v0.3.0 for release	2022-07-04 16:43:48 +02:00
Felix Spöttel	6a5cb3c3c3	feat: optional kepler.gl integration (#12 )	2022-07-04 16:22:27 +02:00
Felix Spöttel	d252c6b8f3	test: add test suite (#7 ) * add pre-commit hook * improve logging of inconsistent data	2022-07-04 13:54:07 +02:00
Felix Spöttel	1c5d0f649e	docs: update cli documentation	2022-07-01 19:18:06 +02:00
Felix Spöttel	6ed01417c3	docs: update install section	2022-07-01 18:57:40 +02:00
Felix Spöttel	3cc3c30e03	Bump version to v0.2.1 for release	2022-07-01 18:52:00 +02:00
Felix Spöttel	c9d36c6bf3	feat: print success output	2022-07-01 18:51:25 +02:00
Felix Spöttel	62da0806c7	fix: debug prints	2022-07-01 18:48:17 +02:00
Felix Spöttel	8657bd73ec	Bump version to v0.2.0 for release	2022-07-01 18:15:07 +02:00
Felix Spöttel	e633665813	chore: update license	2022-07-01 18:12:00 +02:00
Felix Spöttel	cff5256d06	feat: add `--debug` flag, improve logging & help closes #9	2022-07-01 17:53:09 +02:00
Felix Spöttel	4dfa08bbbc	feat: add `--open` flag (#11 ) closes #5	2022-07-01 17:08:53 +02:00
Felix Spöttel	eaa4022b70	ci: use pipfile.lock as cache key	2022-07-01 17:05:43 +02:00
Felix Spöttel	1cb5541baa	chore: remove clustering print	2022-07-01 17:04:56 +02:00
Felix Spöttel	b40074317c	feat: extend kepler.gl color range closes #10	2022-07-01 17:04:33 +02:00
Miguel Sozinho Ramalho	f1053953ba	feat: auto-deploy to pypi (#8 )	2022-07-01 15:23:50 +01:00
		`@@ -0,0 +1 @@`
							{"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.442362, 52.523955]}, "properties": {"id": 1, "name": "Alice", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.448097, 52.526659]}, "properties": {"id": 2, "name": "Bob", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.419246, 52.525626]}, "properties": {"id": 3, "name": "Carol", "cluster_id": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.412617, 52.524436]}, "properties": {"id": 4, "name": "Dan", "cluster_id": 1}}]}
		`@@ -0,0 +1 @@`
							`[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]`