mirror of
https://github.com/bellingcat/geoclustering.git
synced 2026-06-09 12:08:29 +03:00
Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e9b7680263 | ||
|
|
de4d4689b9 | ||
|
|
484d3cb02c | ||
|
|
65366816fa | ||
|
|
de91354867 | ||
|
|
e9a7519168 | ||
|
|
dc7e12642e | ||
|
|
93c51d7a80 | ||
|
|
f77d1d9d62 | ||
|
|
99e844c6ce | ||
|
|
ff094a1d3e | ||
|
|
926aaf73d6 | ||
|
|
6a5cb3c3c3 | ||
|
|
d252c6b8f3 | ||
|
|
1c5d0f649e | ||
|
|
6ed01417c3 | ||
|
|
3cc3c30e03 | ||
|
|
c9d36c6bf3 | ||
|
|
62da0806c7 |
16
.github/workflows/main.yml
vendored
16
.github/workflows/main.yml
vendored
@@ -36,13 +36,19 @@ jobs:
|
||||
run: |
|
||||
python setup.py check
|
||||
python setup.py bdist_wheel sdist
|
||||
|
||||
- python: "3.10"
|
||||
task:
|
||||
name: "Style"
|
||||
name: "Lint"
|
||||
run: |
|
||||
black --check .
|
||||
|
||||
- python: "3.10"
|
||||
task:
|
||||
name: "Test"
|
||||
run: pytest --exitfirst --failed-first --assert=plain
|
||||
- python: "3.8"
|
||||
task:
|
||||
name: "Test (3.8)"
|
||||
run: pytest --exitfirst --failed-first --assert=plain
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -59,7 +65,7 @@ jobs:
|
||||
|
||||
- name: Upload package distribution files
|
||||
if: matrix.task.name == 'Build'
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: package
|
||||
path: dist
|
||||
@@ -93,7 +99,7 @@ jobs:
|
||||
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||
|
||||
- name: Download package distribution files
|
||||
uses: actions/download-artifact@v3
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: package
|
||||
path: dist
|
||||
|
||||
10
.pre-commit-config.yaml
Normal file
10
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.3.0
|
||||
hooks:
|
||||
- id: black
|
||||
# It is recommended to specify the latest version of Python
|
||||
# supported by your project here, or alternatively use
|
||||
# pre-commit's default_language_version, see
|
||||
# https://pre-commit.com/#top_level-default_language_version
|
||||
language_version: python3.9
|
||||
3
Pipfile
3
Pipfile
@@ -13,7 +13,10 @@ scikit-learn = "*"
|
||||
|
||||
[dev-packages]
|
||||
black = "*"
|
||||
pre-commit = "*"
|
||||
pytest = "*"
|
||||
wheel = "*"
|
||||
geoclustering = {editable = true, path = "."}
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
||||
|
||||
2656
Pipfile.lock
generated
2656
Pipfile.lock
generated
File diff suppressed because it is too large
Load Diff
65
README.md
65
README.md
@@ -10,40 +10,38 @@
|
||||
|
||||
### Clustering Method
|
||||
|
||||
A cluster is created when a certain number of points (=> `--size`) each are within a given distance (=> `--distance`) of at least one other point in the cluster.
|
||||
A cluster is created when a certain number of points (defined with `--size`) each are within a given distance (defined with `--distance`) of at least one other point in the cluster.
|
||||
|
||||
|
||||
## Install
|
||||
|
||||
Clone the repository:
|
||||
Install with pip:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/bellingcat/geoclustering
|
||||
cd geoclustering
|
||||
# with kepler.gl visualization support
|
||||
pip install geoclustering[full]
|
||||
|
||||
# only text-based output
|
||||
pip install geoclustering
|
||||
```
|
||||
|
||||
Install keplergl build dependencies:
|
||||
If the `full` install fails, you might need to install kepler.gl build dependencies:
|
||||
|
||||
```sh
|
||||
# macos
|
||||
brew install proj gdal
|
||||
```
|
||||
|
||||
Install project with pip:
|
||||
```sh
|
||||
pip install .
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```
|
||||
Usage: geoclustering [OPTIONS] FILENAME
|
||||
|
||||
Tool to cluster geolocations. A cluster is created when a certain number of
|
||||
points (--size) each are within a given distance (--distance) of at least
|
||||
one other point in the cluster. Input is supplied as a csv file. At a
|
||||
minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
|
||||
reflected to the output.
|
||||
points (defined with --size) each are within a given distance (defined with
|
||||
--distance) of at least one other point in the cluster. Input is supplied as
|
||||
a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column.
|
||||
Other rows are reflected to the output.
|
||||
|
||||
Options:
|
||||
-d, --distance FLOAT (in km) Max. distance between two points in
|
||||
@@ -64,7 +62,7 @@ Options:
|
||||
|
||||
## Input
|
||||
|
||||
Inputs are supplied as a `.csv` file. The only required fields are `lat` and `lon`, all other fields are reflected to the output.
|
||||
Inputs are supplied as a `.csv` file. At a minimum, each row needs to have a `lat` and a `lon`` column. Other rows are reflected to the output.
|
||||
|
||||
```csv
|
||||
id,name,lat,lon
|
||||
@@ -74,7 +72,7 @@ id,name,lat,lon
|
||||
|
||||
## Output
|
||||
|
||||
If at least one cluster was found, the tool outputs a folder with `json`, `geojson`, `text` and a kepler.gl `html` files.
|
||||
If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt`, `csv` files. A kepler.gl `html` file is generated as well.
|
||||
|
||||
### JSON
|
||||
|
||||
@@ -123,7 +121,7 @@ Encodes a single `FeatureCollection`, containing all points as `Feature` objects
|
||||
}
|
||||
```
|
||||
|
||||
### txt
|
||||
### Text
|
||||
|
||||
Encodes cluster as blocks separated by a newline, where each line in a cluster block contains one point.
|
||||
|
||||
@@ -134,6 +132,39 @@ id 9, name Rosanna Foggo, lat -6.2074293, lon 106.8915948
|
||||
// ...
|
||||
```
|
||||
|
||||
### CSV
|
||||
|
||||
Encodes each event in one line with `cluster_id` information associated.
|
||||
|
||||
```csv
|
||||
cluster_id,name,lat,lon
|
||||
9,Rosanna Foggo,-6.2074293,106.8915948
|
||||
...
|
||||
```
|
||||
|
||||
### kepler.gl
|
||||
|
||||

|
||||
|
||||
## Develop
|
||||
|
||||
It is assumed that you are using **Python3.9+**. It is encouraged to [setup a virtualenv](https://wiki.archlinux.org/title/Python/Virtual_environment#venv>) for development.
|
||||
|
||||
```sh
|
||||
# install dependencies & dev-dependencies
|
||||
# PIP
|
||||
pip install -e .[dev,full]
|
||||
# PIPENV
|
||||
pipenv install --dev -e .
|
||||
|
||||
# install a git hook that runs the code formatter before each commit.
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
We use [Black](https://github.com/psf/black) as our code formatter. If you don't want to use the `pre-commit` hook, you can run the formatter manually or via an editor plugin.
|
||||
|
||||
## Release
|
||||
|
||||
1. Update [version.py](geoclustering/version.py)
|
||||
2. Run `scripts/release.sh`
|
||||
3. Confirm GH action completed successfully
|
||||
@@ -1,6 +1,5 @@
|
||||
from pathlib import Path
|
||||
import click
|
||||
import os
|
||||
import webbrowser
|
||||
|
||||
import geoclustering.clustering as clustering
|
||||
@@ -8,12 +7,8 @@ import geoclustering.encoding as encoding
|
||||
import geoclustering.io as io
|
||||
|
||||
|
||||
def print_debug(s):
|
||||
click.secho(s, fg="bright_black")
|
||||
|
||||
|
||||
@click.command(
|
||||
help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
|
||||
help="Tool to cluster geolocations. A cluster is created when a certain number of points (defined with --size) each are within a given distance (defined with --distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
|
||||
)
|
||||
@click.option(
|
||||
"--distance",
|
||||
@@ -48,40 +43,53 @@ def print_debug(s):
|
||||
)
|
||||
@click.option(
|
||||
"--open",
|
||||
"_open",
|
||||
is_flag=True,
|
||||
help="Open the generated visualization in the default browser automatically.",
|
||||
)
|
||||
@click.option("--debug", is_flag=True, help="Print debug output.")
|
||||
@click.argument("filename", type=click.Path(exists=True))
|
||||
def main(distance, size, output, filename, algorithm, open, debug):
|
||||
if debug:
|
||||
print_debug(f"Reading input from {Path(filename).absolute()}")
|
||||
def main(distance, size, output, filename, algorithm, _open, debug):
|
||||
def print_debug(s):
|
||||
if debug:
|
||||
click.secho(s, fg="bright_black")
|
||||
|
||||
df = io.read_csv_file(filename)
|
||||
if debug:
|
||||
print_debug(f"Read {len(df)} valid coordinates")
|
||||
print_debug(f"Read {len(df)} valid coordinates from {Path(filename).absolute()}")
|
||||
|
||||
clusters = clustering.cluster_locations(
|
||||
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
|
||||
)
|
||||
|
||||
if not bool(clusters):
|
||||
click.echo("Did not find clusters matching input parameters.")
|
||||
click.secho("Did not find clusters matching input parameters.", fg="yellow")
|
||||
return
|
||||
|
||||
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
|
||||
|
||||
encoded = encoding.encode_clusters(clusters)
|
||||
|
||||
io.write_output_file(output, "result.txt", encoded["string"])
|
||||
io.write_output_file(output, "result.json", encoded["json"])
|
||||
io.write_output_file(output, "result.geojson", encoded["geojson"])
|
||||
io.write_output_file(output, "result.csv", encoded["csv"])
|
||||
|
||||
vis = io.write_visualization(output, "result.html", encoded["geojson"])
|
||||
if vis is None:
|
||||
print_debug("Skipped generating visualization: kepler is not installed.")
|
||||
|
||||
click.echo(f"Output files saved to {Path(output).absolute()}")
|
||||
|
||||
if open:
|
||||
print_debug(f"Opening visualization in default browser")
|
||||
webbrowser.open_new_tab("file://" + str(vis.absolute()))
|
||||
if _open:
|
||||
if vis:
|
||||
webbrowser.open_new_tab("file://" + str(vis.absolute()))
|
||||
print_debug("Opened visualization in default browser.")
|
||||
else:
|
||||
click.secho(
|
||||
"Can't open kepler.gl: package not installed. Please re-install geoclustering with `pip install geoclustering[full]`.",
|
||||
fg="yellow",
|
||||
)
|
||||
|
||||
click.secho("Clustering completed.", fg="green")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import geojson
|
||||
import csv
|
||||
import io # not io.py
|
||||
|
||||
|
||||
class NpEncoder(json.JSONEncoder):
|
||||
@@ -47,7 +49,7 @@ class JSONEncoder:
|
||||
|
||||
for record in cluster:
|
||||
cluster_data["points"].append(record)
|
||||
self.state.append(cluster_data)
|
||||
self.state.append(cluster_data)
|
||||
|
||||
def get(self):
|
||||
return json.dumps(self.state, cls=NpEncoder)
|
||||
@@ -74,13 +76,37 @@ class GeoJSONEncoder:
|
||||
return json.dumps(geojson.FeatureCollection(self.state), cls=NpEncoder)
|
||||
|
||||
|
||||
class CSVEncoder:
|
||||
"""Encodes clustering result as a CSV"""
|
||||
|
||||
def __init__(self):
|
||||
self.state = io.StringIO()
|
||||
self.writer = False
|
||||
|
||||
def visitor(self, cluster_id, cluster):
|
||||
if not self.writer:
|
||||
self.writer = csv.DictWriter(
|
||||
self.state,
|
||||
fieldnames=["cluster_id"] + list(cluster[0].keys()),
|
||||
quoting=csv.QUOTE_NONNUMERIC,
|
||||
lineterminator="\n",
|
||||
)
|
||||
self.writer.writeheader()
|
||||
|
||||
for record in cluster:
|
||||
self.writer.writerow({**record, "cluster_id": cluster_id})
|
||||
|
||||
def get(self):
|
||||
return self.state.getvalue()
|
||||
|
||||
|
||||
def encode_clusters(clusters):
|
||||
json_encoder = JSONEncoder()
|
||||
geojson_encoder = GeoJSONEncoder()
|
||||
string_encoder = StringEncoder()
|
||||
csv_encoder = CSVEncoder()
|
||||
|
||||
encoders = [json_encoder, geojson_encoder, string_encoder]
|
||||
|
||||
encoders = [json_encoder, geojson_encoder, string_encoder, csv_encoder]
|
||||
for cluster_id, cluster in clusters.items():
|
||||
for encoder in encoders:
|
||||
encoder.visitor(cluster_id, cluster)
|
||||
@@ -89,4 +115,5 @@ def encode_clusters(clusters):
|
||||
"json": json_encoder.get(),
|
||||
"geojson": geojson_encoder.get(),
|
||||
"string": string_encoder.get(),
|
||||
"csv": csv_encoder.get(),
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from keplergl import KeplerGl
|
||||
from pathlib import Path
|
||||
from pkg_resources import resource_filename
|
||||
import json
|
||||
@@ -7,6 +6,14 @@ import numpy as np
|
||||
import os
|
||||
import sys
|
||||
|
||||
# kepler is optional, check if installed.
|
||||
try:
|
||||
from keplergl import KeplerGl
|
||||
except:
|
||||
has_kepler = False
|
||||
else:
|
||||
has_kepler = True
|
||||
|
||||
|
||||
class HiddenPrints:
|
||||
"""Disables stdout prints for a block of code."""
|
||||
@@ -38,23 +45,35 @@ def is_valid_lon(val: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def is_not_none(val: any) -> bool:
|
||||
return val is not None
|
||||
|
||||
|
||||
def read_csv_file(filename):
|
||||
"""Read input csv file, dropping rows that don't have valid location data."""
|
||||
df = pd.read_csv(filename)
|
||||
initial_rows = len(df)
|
||||
# replace NaN for all fields not to break kepler parsing.
|
||||
df = pd.read_csv(filename).replace({np.nan: None})
|
||||
|
||||
df = df.dropna(subset=["lat", "lon"])
|
||||
df = df.replace(
|
||||
{np.nan: None}
|
||||
) # replace for other fields not to break kepler parsing
|
||||
print(f"Ignored {initial_rows - len(df)} coordinates with NaN")
|
||||
# construct an index of values with valid lat & lon.
|
||||
valid_index = df.lat.apply(is_valid_lat) & df.lon.apply(is_valid_lon)
|
||||
df_invalid = df[~valid_index]
|
||||
|
||||
count_invalid = len(df_invalid)
|
||||
if count_invalid:
|
||||
df_not_empty = df_invalid[
|
||||
(df_invalid.lat.apply(is_not_none) | df_invalid.lon.apply(is_not_none))
|
||||
]
|
||||
|
||||
count_not_empty = len(df_not_empty)
|
||||
count_empty = count_invalid - count_not_empty
|
||||
|
||||
if count_empty:
|
||||
print(f"Removed {count_empty} empty coordinate pairs.")
|
||||
|
||||
if count_not_empty:
|
||||
print(f"Removed {count_not_empty} invalid coordinate pairs:")
|
||||
print(df_not_empty[["lat", "lon"]].to_string())
|
||||
|
||||
valid_index = df.lat.astype(str).apply(is_valid_lat) & df.lon.astype(str).apply(
|
||||
is_valid_lon
|
||||
)
|
||||
if len(df_invalid := df[~valid_index]):
|
||||
print(f"Found {len(df_invalid)} invalid coordinate pairs, ignoring:")
|
||||
print(df_invalid[["lat", "lon"]].to_string())
|
||||
return df[valid_index]
|
||||
|
||||
|
||||
@@ -77,6 +96,10 @@ def write_output_file(dirname, filename, data):
|
||||
|
||||
def write_visualization(dirname, filename, data):
|
||||
"""Write a visualization, ensuring parent directories."""
|
||||
|
||||
if not has_kepler:
|
||||
return None
|
||||
|
||||
# Hide kepler stdout output.
|
||||
with HiddenPrints():
|
||||
map = KeplerGl()
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
_MAJOR = "0"
|
||||
_MINOR = "2"
|
||||
_MINOR = "4"
|
||||
# On main and in a nightly release the patch should be one ahead of the last
|
||||
# released build.
|
||||
_PATCH = "0"
|
||||
_PATCH = "1"
|
||||
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
||||
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
||||
_SUFFIX = ""
|
||||
|
||||
3
pytest.ini
Normal file
3
pytest.ini
Normal file
@@ -0,0 +1,3 @@
|
||||
[pytest]
|
||||
testpaths = tests/
|
||||
python_files = *.py
|
||||
6
setup.py
6
setup.py
@@ -28,12 +28,14 @@ setup(
|
||||
install_requires=[
|
||||
"click",
|
||||
"geojson",
|
||||
"keplergl",
|
||||
"numpy",
|
||||
"pandas",
|
||||
"scikit-learn",
|
||||
],
|
||||
extras_require={"dev": ["black", "wheel"]},
|
||||
extras_require={
|
||||
"dev": ["black", "wheel", "pre-commit", "pytest"],
|
||||
"full": ["keplergl"],
|
||||
},
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
)
|
||||
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
41
tests/clustering.py
Normal file
41
tests/clustering.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from geoclustering.clustering import cluster_locations
|
||||
from tests.helpers import read_fixture_csv
|
||||
|
||||
|
||||
df = read_fixture_csv("clustering.csv")
|
||||
|
||||
|
||||
def has_member(list, name):
|
||||
return any(x for x in list if x["name"] == name)
|
||||
|
||||
|
||||
def test_clustering_all():
|
||||
# there should be one cluster with all members but Erin.
|
||||
res = cluster_locations(
|
||||
df=df, algorithm="dbscan", radius_km=1.97, min_cluster_size=4
|
||||
)
|
||||
assert len(res.values()) == 1
|
||||
assert len(res[0]) == 4
|
||||
|
||||
|
||||
def test_clustering_split():
|
||||
res = cluster_locations(
|
||||
df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=2
|
||||
)
|
||||
# there should be two cluster: Alice & Bob and Carol & Dan
|
||||
assert len(res.values()) == 2
|
||||
cluster_one = res[0]
|
||||
cluster_two = res[1]
|
||||
assert len(cluster_one) == 2
|
||||
assert has_member(cluster_one, "Alice")
|
||||
assert has_member(cluster_one, "Bob")
|
||||
assert has_member(cluster_two, "Carol")
|
||||
assert has_member(cluster_two, "Dan")
|
||||
|
||||
|
||||
def test_clustering_none():
|
||||
# there should be no clusters now.
|
||||
res = cluster_locations(
|
||||
df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=3
|
||||
)
|
||||
assert len(res.values()) == 0
|
||||
30
tests/encoding.py
Normal file
30
tests/encoding.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from geoclustering.encoding import encode_clusters
|
||||
from tests.helpers import read_fixture_csv, read_fixture_content
|
||||
|
||||
|
||||
df = read_fixture_csv("clustering.csv")
|
||||
|
||||
|
||||
def test_encoders():
|
||||
clusters = {
|
||||
0: [
|
||||
{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362},
|
||||
{"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097},
|
||||
],
|
||||
1: [
|
||||
{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246},
|
||||
{
|
||||
"id": 4,
|
||||
"name": "Dan",
|
||||
"lat": 52.52443559865125,
|
||||
"lon": 13.41261723049818,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
res = encode_clusters(clusters)
|
||||
|
||||
assert res["string"] == read_fixture_content("snapshots/result.txt")
|
||||
assert res["json"] == read_fixture_content("snapshots/result.json")
|
||||
assert res["geojson"] == read_fixture_content("snapshots/result.geojson")
|
||||
assert res["csv"] == read_fixture_content("snapshots/result.csv")
|
||||
6
tests/fixtures/clustering.csv
vendored
Normal file
6
tests/fixtures/clustering.csv
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
id,name,lat,lon
|
||||
1,Alice,52.523955,13.442362
|
||||
2,Bob,52.526659,13.448097
|
||||
3,Carol,52.525626,13.419246
|
||||
4,Dan,52.52443559865125,13.41261723049818
|
||||
5,Erin,52.524838991760774,13.383188597040382
|
||||
|
9
tests/fixtures/io.csv
vendored
Normal file
9
tests/fixtures/io.csv
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
id,name,lat,lon
|
||||
1,Alice,,
|
||||
2,,52.523955,13.442362
|
||||
,,-90.12,132.23
|
||||
4,,78.234,-180.1212
|
||||
5,Bob,52.524838991760774,13.383188597040382
|
||||
6,Peter,91.234,
|
||||
7,Horst,,23.23
|
||||
7,Erin,foo,bar
|
||||
|
5
tests/fixtures/snapshots/result.csv
vendored
Normal file
5
tests/fixtures/snapshots/result.csv
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
"cluster_id","id","name","lat","lon"
|
||||
0,1,"Alice",52.523955,13.442362
|
||||
0,2,"Bob",52.526659,13.448097
|
||||
1,3,"Carol",52.525626,13.419246
|
||||
1,4,"Dan",52.52443559865125,13.41261723049818
|
||||
|
1
tests/fixtures/snapshots/result.geojson
vendored
Normal file
1
tests/fixtures/snapshots/result.geojson
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.442362, 52.523955]}, "properties": {"id": 1, "name": "Alice", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.448097, 52.526659]}, "properties": {"id": 2, "name": "Bob", "cluster_id": 0}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.419246, 52.525626]}, "properties": {"id": 3, "name": "Carol", "cluster_id": 1}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [13.412617, 52.524436]}, "properties": {"id": 4, "name": "Dan", "cluster_id": 1}}]}
|
||||
1
tests/fixtures/snapshots/result.json
vendored
Normal file
1
tests/fixtures/snapshots/result.json
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]
|
||||
7
tests/fixtures/snapshots/result.txt
vendored
Normal file
7
tests/fixtures/snapshots/result.txt
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
Cluster 0
|
||||
id 1, name Alice, lat 52.523955, lon 13.442362
|
||||
id 2, name Bob, lat 52.526659, lon 13.448097
|
||||
|
||||
Cluster 1
|
||||
id 3, name Carol, lat 52.525626, lon 13.419246
|
||||
id 4, name Dan, lat 52.52443559865125, lon 13.41261723049818
|
||||
16
tests/helpers.py
Normal file
16
tests/helpers.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import os
|
||||
from geoclustering.io import read_csv_file
|
||||
|
||||
|
||||
def get_fixture_path(filename):
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir_path, "fixtures", filename)
|
||||
|
||||
|
||||
def read_fixture_csv(filename):
|
||||
return read_csv_file(get_fixture_path(filename))
|
||||
|
||||
|
||||
def read_fixture_content(filename):
|
||||
with open(get_fixture_path(filename)) as f:
|
||||
return f.read()
|
||||
25
tests/io.py
Normal file
25
tests/io.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
from geoclustering.io import write_output_file
|
||||
from tests.helpers import read_fixture_csv
|
||||
|
||||
|
||||
def test_csv_filters():
|
||||
df = read_fixture_csv("io.csv")
|
||||
# entries 2 & 5 in fixture are valid.
|
||||
assert len(df) == 2
|
||||
assert df.iloc[0]["name"] == None
|
||||
assert df.iloc[1]["name"] == "Bob"
|
||||
|
||||
|
||||
def test_write_output_file():
|
||||
p = "./this/dir/does/not/exist"
|
||||
f = "test.txt"
|
||||
write_output_file(p, f, "test")
|
||||
|
||||
path = Path(p) / f
|
||||
|
||||
with open(path) as f:
|
||||
assert f.read() == "test"
|
||||
|
||||
shutil.rmtree(Path("./this"))
|
||||
Reference in New Issue
Block a user