mirror of
https://github.com/bellingcat/geoclustering.git
synced 2026-06-07 19:18:30 +03:00
Add new output format: csv with cluster info (#18)
Co-authored-by: msramalho <19508417+msramalho@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
dc7e12642e
commit
e9a7519168
1
Pipfile
1
Pipfile
@@ -16,6 +16,7 @@ black = "*"
|
||||
pre-commit = "*"
|
||||
pytest = "*"
|
||||
wheel = "*"
|
||||
geoclustering = {editable = true, path = "."}
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
||||
|
||||
1163
Pipfile.lock
generated
1163
Pipfile.lock
generated
File diff suppressed because it is too large
Load Diff
12
README.md
12
README.md
@@ -72,7 +72,7 @@ id,name,lat,lon
|
||||
|
||||
## Output
|
||||
|
||||
If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt` files. A kepler.gl `html` file is generated as well.
|
||||
If at least one cluster was found, the tool outputs a folder with output as `json`, `geojson`, `txt`, `csv` files. A kepler.gl `html` file is generated as well.
|
||||
|
||||
### JSON
|
||||
|
||||
@@ -132,6 +132,16 @@ id 9, name Rosanna Foggo, lat -6.2074293, lon 106.8915948
|
||||
// ...
|
||||
```
|
||||
|
||||
### CSV
|
||||
|
||||
Encodes each event in one line with `cluster_id` information associated.
|
||||
|
||||
```csv
|
||||
cluster_id,name,lat,lon
|
||||
9,Rosanna Foggo,-6.2074293,106.8915948
|
||||
...
|
||||
```
|
||||
|
||||
### kepler.gl
|
||||
|
||||

|
||||
|
||||
@@ -68,10 +68,10 @@ def main(distance, size, output, filename, algorithm, _open, debug):
|
||||
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
|
||||
|
||||
encoded = encoding.encode_clusters(clusters)
|
||||
|
||||
io.write_output_file(output, "result.txt", encoded["string"])
|
||||
io.write_output_file(output, "result.json", encoded["json"])
|
||||
io.write_output_file(output, "result.geojson", encoded["geojson"])
|
||||
io.write_output_file(output, "result.csv", encoded["csv"])
|
||||
|
||||
vis = io.write_visualization(output, "result.html", encoded["geojson"])
|
||||
if vis is None:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import geojson
|
||||
import csv
|
||||
import io # not io.py
|
||||
|
||||
|
||||
class NpEncoder(json.JSONEncoder):
|
||||
@@ -74,13 +76,36 @@ class GeoJSONEncoder:
|
||||
return json.dumps(geojson.FeatureCollection(self.state), cls=NpEncoder)
|
||||
|
||||
|
||||
class CSVEncoder:
|
||||
"""Encodes clustering result as a CSV"""
|
||||
|
||||
def __init__(self):
|
||||
self.state = io.StringIO()
|
||||
self.writer = False
|
||||
|
||||
def visitor(self, cluster_id, cluster):
|
||||
if not self.writer:
|
||||
self.writer = csv.DictWriter(
|
||||
self.state,
|
||||
fieldnames=["cluster_id"] + list(cluster[0].keys()),
|
||||
quoting=csv.QUOTE_NONNUMERIC,
|
||||
)
|
||||
self.writer.writeheader()
|
||||
|
||||
for record in cluster:
|
||||
self.writer.writerow({**record, "cluster_id": cluster_id})
|
||||
|
||||
def get(self):
|
||||
return self.state.getvalue()
|
||||
|
||||
|
||||
def encode_clusters(clusters):
|
||||
json_encoder = JSONEncoder()
|
||||
geojson_encoder = GeoJSONEncoder()
|
||||
string_encoder = StringEncoder()
|
||||
csv_encoder = CSVEncoder()
|
||||
|
||||
encoders = [json_encoder, geojson_encoder, string_encoder]
|
||||
|
||||
encoders = [json_encoder, geojson_encoder, string_encoder, csv_encoder]
|
||||
for cluster_id, cluster in clusters.items():
|
||||
for encoder in encoders:
|
||||
encoder.visitor(cluster_id, cluster)
|
||||
@@ -89,4 +114,5 @@ def encode_clusters(clusters):
|
||||
"json": json_encoder.get(),
|
||||
"geojson": geojson_encoder.get(),
|
||||
"string": string_encoder.get(),
|
||||
"csv": csv_encoder.get(),
|
||||
}
|
||||
|
||||
2
tests/fixtures/snapshots/result.json
vendored
2
tests/fixtures/snapshots/result.json
vendored
@@ -1 +1 @@
|
||||
[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]
|
||||
[{"cluster_id": 0, "points": [{"id": 1, "name": "Alice", "lat": 52.523955, "lon": 13.442362}, {"id": 2, "name": "Bob", "lat": 52.526659, "lon": 13.448097}]}, {"cluster_id": 1, "points": [{"id": 3, "name": "Carol", "lat": 52.525626, "lon": 13.419246}, {"id": 4, "name": "Dan", "lat": 52.52443559865125, "lon": 13.41261723049818}]}]
|
||||
Reference in New Issue
Block a user