11 Commits
v0.3.0 ... main

Author SHA1 Message Date
Miguel Sozinho Ramalho
e9b7680263 migrate gh artifact actions to v4 (#20)
* migrate gh artifact actions to v4

from migration guide no breaking changes apply here.

* updates pipfile.lock dependency versions

* updates CI due to pytest issue see https://github.com/scipy/scipy/issues/22236

* bump to python 3.12

* revert to py3.10
2025-01-09 15:47:27 +00:00
msramalho
de4d4689b9 Bump version to v0.4.1 for release 2022-09-27 14:49:17 +01:00
msramalho
484d3cb02c adds: tests for csv 2022-09-27 14:49:04 +01:00
msramalho
65366816fa updates readme with release info 2022-09-27 14:43:05 +01:00
msramalho
de91354867 Bump version to v0.4.0 for release 2022-09-27 14:41:48 +01:00
Kashyap Maheshwari
e9a7519168 Add new output format: csv with cluster info (#18)
Co-authored-by: msramalho <19508417+msramalho@users.noreply.github.com>
2022-09-27 14:39:50 +01:00
msramalho
dc7e12642e adds dev pipenv instructions 2022-09-27 13:59:49 +01:00
msramalho
93c51d7a80 closes #17 2022-09-27 13:58:17 +01:00
msramalho
f77d1d9d62 closes #17 2022-09-27 13:58:00 +01:00
Felix Spöttel
99e844c6ce fix: compatibility with python < 3.8 (#16)
* ci: run tests in python 3.7 as well
2022-07-07 10:21:21 +02:00
msramalho
ff094a1d3e fix: unused import, protected keyword use 2022-07-05 16:36:54 +02:00
12 changed files with 2046 additions and 965 deletions

View File

@@ -36,17 +36,19 @@ jobs:
run: |
python setup.py check
python setup.py bdist_wheel sdist
- python: "3.10"
task:
name: "Style"
name: "Lint"
run: |
black --check .
- python: "3.10"
task:
name: "Test"
run: pytest --exitfirst --failed-first
run: pytest --exitfirst --failed-first --assert=plain
- python: "3.8"
task:
name: "Test (3.8)"
run: pytest --exitfirst --failed-first --assert=plain
steps:
- uses: actions/checkout@v3
@@ -63,7 +65,7 @@ jobs:
- name: Upload package distribution files
if: matrix.task.name == 'Build'
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: package
path: dist
@@ -97,7 +99,7 @@ jobs:
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
- name: Download package distribution files
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: package
path: dist

View File

@@ -16,6 +16,7 @@ black = "*"
pre-commit = "*"
pytest = "*"
wheel = "*"
geoclustering = {editable = true, path = "."}
[requires]
python_version = "3.9"

2905
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -72,7 +72,7 @@ id,name,lat,lon
## Output
If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt` files. A kepler.gl `html` file is generated as well.
If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt`, `csv` files. A kepler.gl `html` file is generated as well.
### JSON
@@ -132,6 +132,16 @@ id 9, name Rosanna Foggo, lat -6.2074293, lon 106.8915948
// ...
```
### CSV
Encodes each event in one line with `cluster_id` information associated.
```csv
cluster_id,name,lat,lon
9,Rosanna Foggo,-6.2074293,106.8915948
...
```
### kepler.gl
![kepler.gl instance](https://user-images.githubusercontent.com/1682504/176478177-c0446b51-4060-495c-803d-79e2bbd3e966.png)
@@ -142,10 +152,19 @@ It is assumed that you are using **Python3.9+**. It is encouraged to [setup a vi
```sh
# install dependencies & dev-dependencies
# PIP
pip install -e .[dev,full]
# PIPENV
pipenv install --dev -e .
# install a git hook that runs the code formatter before each commit.
pre-commit install
```
We use [Black](https://github.com/psf/black) as our code formatter. If you don't want to use the `pre-commit` hook, you can run the formatter manually or via an editor plugin.
## Release
1. Update [version.py](geoclustering/version.py)
2. Run `scripts/release.sh`
3. Confirm GH action completed successfully

View File

@@ -1,6 +1,5 @@
from pathlib import Path
import click
import os
import webbrowser
import geoclustering.clustering as clustering
@@ -44,12 +43,13 @@ import geoclustering.io as io
)
@click.option(
"--open",
"_open",
is_flag=True,
help="Open the generated visualization in the default browser automatically.",
)
@click.option("--debug", is_flag=True, help="Print debug output.")
@click.argument("filename", type=click.Path(exists=True))
def main(distance, size, output, filename, algorithm, open, debug):
def main(distance, size, output, filename, algorithm, _open, debug):
def print_debug(s):
if debug:
click.secho(s, fg="bright_black")
@@ -68,21 +68,21 @@ def main(distance, size, output, filename, algorithm, open, debug):
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
encoded = encoding.encode_clusters(clusters)
io.write_output_file(output, "result.txt", encoded["string"])
io.write_output_file(output, "result.json", encoded["json"])
io.write_output_file(output, "result.geojson", encoded["geojson"])
io.write_output_file(output, "result.csv", encoded["csv"])
vis = io.write_visualization(output, "result.html", encoded["geojson"])
if vis is None:
print_debug(f"Skipped generating visualization: kepler is not installed.")
print_debug("Skipped generating visualization: kepler is not installed.")
click.echo(f"Output files saved to {Path(output).absolute()}")
if open:
if _open:
if vis:
webbrowser.open_new_tab("file://" + str(vis.absolute()))
print_debug(f"Opened visualization in default browser.")
print_debug("Opened visualization in default browser.")
else:
click.secho(
"Can't open kepler.gl: package not installed. Please re-install geoclustering with `pip install geoclustering[full]`.",

View File

@@ -1,6 +1,8 @@
import json
import numpy as np
import geojson
import csv
import io # not io.py
class NpEncoder(json.JSONEncoder):
@@ -47,7 +49,7 @@ class JSONEncoder:
for record in cluster:
cluster_data["points"].append(record)
self.state.append(cluster_data)
self.state.append(cluster_data)
def get(self):
return json.dumps(self.state, cls=NpEncoder)
@@ -74,13 +76,37 @@ class GeoJSONEncoder:
return json.dumps(geojson.FeatureCollection(self.state), cls=NpEncoder)
class CSVEncoder:
"""Encodes clustering result as a CSV"""
def __init__(self):
self.state = io.StringIO()
self.writer = False
def visitor(self, cluster_id, cluster):
if not self.writer:
self.writer = csv.DictWriter(
self.state,
fieldnames=["cluster_id"] + list(cluster[0].keys()),
quoting=csv.QUOTE_NONNUMERIC,
lineterminator="\n",
)
self.writer.writeheader()
for record in cluster:
self.writer.writerow({**record, "cluster_id": cluster_id})
def get(self):
return self.state.getvalue()
def encode_clusters(clusters):
json_encoder = JSONEncoder()
geojson_encoder = GeoJSONEncoder()
string_encoder = StringEncoder()
csv_encoder = CSVEncoder()
encoders = [json_encoder, geojson_encoder, string_encoder]
encoders = [json_encoder, geojson_encoder, string_encoder, csv_encoder]
for cluster_id, cluster in clusters.items():
for encoder in encoders:
encoder.visitor(cluster_id, cluster)
@@ -89,4 +115,5 @@ def encode_clusters(clusters):
"json": json_encoder.get(),
"geojson": geojson_encoder.get(),
"string": string_encoder.get(),
"csv": csv_encoder.get(),
}

View File

@@ -58,13 +58,16 @@ def read_csv_file(filename):
valid_index = df.lat.apply(is_valid_lat) & df.lon.apply(is_valid_lon)
df_invalid = df[~valid_index]
if count_invalid := len(df_invalid):
count_invalid = len(df_invalid)
if count_invalid:
df_not_empty = df_invalid[
(df_invalid.lat.apply(is_not_none) | df_invalid.lon.apply(is_not_none))
]
count_not_empty = len(df_not_empty)
if count_empty := count_invalid - count_not_empty:
count_not_empty = len(df_not_empty)
count_empty = count_invalid - count_not_empty
if count_empty:
print(f"Removed {count_empty} empty coordinate pairs.")
if count_not_empty:

View File

@@ -1,8 +1,8 @@
_MAJOR = "0"
_MINOR = "3"
_MINOR = "4"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "0"
_PATCH = "1"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""

View File

@@ -1,6 +1,5 @@
from geoclustering.clustering import cluster_locations
from geoclustering.io import read_csv_file
from tests.helpers import get_fixture_path, read_fixture_csv
from tests.helpers import read_fixture_csv
df = read_fixture_csv("clustering.csv")

View File

@@ -1,4 +1,3 @@
from geoclustering.clustering import cluster_locations
from geoclustering.encoding import encode_clusters
from tests.helpers import read_fixture_csv, read_fixture_content
@@ -28,3 +27,4 @@ def test_encoders():
assert res["string"] == read_fixture_content("snapshots/result.txt")
assert res["json"] == read_fixture_content("snapshots/result.json")
assert res["geojson"] == read_fixture_content("snapshots/result.geojson")
assert res["csv"] == read_fixture_content("snapshots/result.csv")

5
tests/fixtures/snapshots/result.csv vendored Normal file
View File

@@ -0,0 +1,5 @@
"cluster_id","id","name","lat","lon"
0,1,"Alice",52.523955,13.442362
0,2,"Bob",52.526659,13.448097
1,3,"Carol",52.525626,13.419246
1,4,"Dan",52.52443559865125,13.41261723049818
1 cluster_id id name lat lon
2 0 1 Alice 52.523955 13.442362
3 0 2 Bob 52.526659 13.448097
4 1 3 Carol 52.525626 13.419246
5 1 4 Dan 52.52443559865125 13.41261723049818

View File

@@ -1 +1 @@
[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]
[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]