mirror of
https://github.com/bellingcat/geoclustering.git
synced 2026-06-08 03:28:30 +03:00
@@ -39,6 +39,12 @@ pip install .
|
||||
```
|
||||
Usage: geoclustering [OPTIONS] FILENAME
|
||||
|
||||
Tool to cluster geolocations. A cluster is created when a certain number of
|
||||
points (--size) each are within a given distance (--distance) of at least
|
||||
one other point in the cluster. Input is supplied as a csv file. At a
|
||||
minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are
|
||||
reflected to the output.
|
||||
|
||||
Options:
|
||||
-d, --distance FLOAT (in km) Max. distance between two points in
|
||||
a cluster. [required]
|
||||
@@ -52,6 +58,7 @@ Options:
|
||||
Default: dbscan
|
||||
--open Open the generated visualization in the
|
||||
default browser automatically.
|
||||
--debug Print debug output.
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from pathlib import Path
|
||||
import click
|
||||
import os
|
||||
import webbrowser
|
||||
|
||||
import geoclustering.clustering as clustering
|
||||
@@ -6,7 +8,13 @@ import geoclustering.encoding as encoding
|
||||
import geoclustering.io as io
|
||||
|
||||
|
||||
@click.command()
|
||||
def print_debug(s):
|
||||
click.secho(s, fg="bright_black")
|
||||
|
||||
|
||||
@click.command(
|
||||
help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output."
|
||||
)
|
||||
@click.option(
|
||||
"--distance",
|
||||
"-d",
|
||||
@@ -43,9 +51,15 @@ import geoclustering.io as io
|
||||
is_flag=True,
|
||||
help="Open the generated visualization in the default browser automatically.",
|
||||
)
|
||||
@click.option("--debug", is_flag=True, help="Print debug output.")
|
||||
@click.argument("filename", type=click.Path(exists=True))
|
||||
def main(distance, size, output, filename, algorithm, open):
|
||||
def main(distance, size, output, filename, algorithm, open, debug):
|
||||
if debug:
|
||||
print_debug(f"Reading input from {Path(filename).absolute()}")
|
||||
|
||||
df = io.read_csv_file(filename)
|
||||
if debug:
|
||||
print_debug(f"Read {len(df)} valid coordinates")
|
||||
|
||||
clusters = clustering.cluster_locations(
|
||||
df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size
|
||||
@@ -55,14 +69,18 @@ def main(distance, size, output, filename, algorithm, open):
|
||||
click.echo("Did not find clusters matching input parameters.")
|
||||
return
|
||||
|
||||
print_debug(f"Found {len(clusters)} valid clusters using {algorithm}")
|
||||
|
||||
encoded = encoding.encode_clusters(clusters)
|
||||
|
||||
io.write_output_file(output, "result.txt", encoded["string"])
|
||||
io.write_output_file(output, "result.json", encoded["json"])
|
||||
io.write_output_file(output, "result.geojson", encoded["geojson"])
|
||||
vis = io.write_visualization(output, "result.html", encoded["geojson"])
|
||||
click.echo(f"Output files saved to {Path(output).absolute()}")
|
||||
|
||||
if open:
|
||||
print_debug(f"Opening visualization in default browser")
|
||||
webbrowser.open_new_tab("file://" + str(vis.absolute()))
|
||||
|
||||
|
||||
|
||||
@@ -2,9 +2,22 @@ from keplergl import KeplerGl
|
||||
from pathlib import Path
|
||||
from pkg_resources import resource_filename
|
||||
import json
|
||||
import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
class HiddenPrints:
|
||||
"""Disables stdout prints for a block of code."""
|
||||
|
||||
def __enter__(self):
|
||||
self._original_stdout = sys.stdout
|
||||
sys.stdout = open(os.devnull, "w")
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
sys.stdout.close()
|
||||
sys.stdout = self._original_stdout
|
||||
|
||||
|
||||
def is_valid_lat(val: str) -> bool:
|
||||
@@ -64,7 +77,10 @@ def write_output_file(dirname, filename, data):
|
||||
|
||||
def write_visualization(dirname, filename, data):
|
||||
"""Write a visualization, ensuring parent directories."""
|
||||
map = KeplerGl()
|
||||
# Hide kepler stdout output.
|
||||
with HiddenPrints():
|
||||
map = KeplerGl()
|
||||
|
||||
map.add_data(data=data, name="clusters")
|
||||
|
||||
# config configures a default color scheme for our clusters layer.
|
||||
@@ -73,6 +89,9 @@ def write_visualization(dirname, filename, data):
|
||||
map.config = json.loads(f.read())
|
||||
|
||||
filepath = ensure_file_path(dirname, filename)
|
||||
map.save_to_html(file_name=str(filepath), center_map=True)
|
||||
|
||||
# Hide kepler stdout output.
|
||||
with HiddenPrints():
|
||||
map.save_to_html(file_name=str(filepath), center_map=True)
|
||||
|
||||
return filepath
|
||||
|
||||
Reference in New Issue
Block a user