diff --git a/README.md b/README.md index c646caa..b1b65a6 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,12 @@ pip install . ``` Usage: geoclustering [OPTIONS] FILENAME + Tool to cluster geolocations. A cluster is created when a certain number of + points (--size) each are within a given distance (--distance) of at least + one other point in the cluster. Input is supplied as a csv file. At a + minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are + reflected to the output. + Options: -d, --distance FLOAT (in km) Max. distance between two points in a cluster. [required] @@ -52,6 +58,7 @@ Options: Default: dbscan --open Open the generated visualization in the default browser automatically. + --debug Print debug output. --help Show this message and exit. ``` diff --git a/geoclustering/__main__.py b/geoclustering/__main__.py index a2b71d5..96e1d74 100644 --- a/geoclustering/__main__.py +++ b/geoclustering/__main__.py @@ -1,4 +1,6 @@ +from pathlib import Path import click +import os import webbrowser import geoclustering.clustering as clustering @@ -6,7 +8,13 @@ import geoclustering.encoding as encoding import geoclustering.io as io -@click.command() +def print_debug(s): + click.secho(s, fg="bright_black") + + +@click.command( + help="Tool to cluster geolocations. A cluster is created when a certain number of points (--size) each are within a given distance (--distance) of at least one other point in the cluster. Input is supplied as a csv file. At a minimum, each row needs to have a 'lat' and a 'lon' column. Other rows are reflected to the output." +) @click.option( "--distance", "-d", @@ -43,9 +51,15 @@ import geoclustering.io as io is_flag=True, help="Open the generated visualization in the default browser automatically.", ) +@click.option("--debug", is_flag=True, help="Print debug output.") @click.argument("filename", type=click.Path(exists=True)) -def main(distance, size, output, filename, algorithm, open): +def main(distance, size, output, filename, algorithm, open, debug): + if debug: + print_debug(f"Reading input from {Path(filename).absolute()}") + df = io.read_csv_file(filename) + if debug: + print_debug(f"Read {len(df)} valid coordinates") clusters = clustering.cluster_locations( df=df, algorithm=algorithm, radius_km=distance, min_cluster_size=size @@ -55,14 +69,18 @@ def main(distance, size, output, filename, algorithm, open): click.echo("Did not find clusters matching input parameters.") return + print_debug(f"Found {len(clusters)} valid clusters using {algorithm}") + encoded = encoding.encode_clusters(clusters) io.write_output_file(output, "result.txt", encoded["string"]) io.write_output_file(output, "result.json", encoded["json"]) io.write_output_file(output, "result.geojson", encoded["geojson"]) vis = io.write_visualization(output, "result.html", encoded["geojson"]) + click.echo(f"Output files saved to {Path(output).absolute()}") if open: + print_debug(f"Opening visualization in default browser") webbrowser.open_new_tab("file://" + str(vis.absolute())) diff --git a/geoclustering/io.py b/geoclustering/io.py index d1b1b90..707d030 100644 --- a/geoclustering/io.py +++ b/geoclustering/io.py @@ -2,9 +2,22 @@ from keplergl import KeplerGl from pathlib import Path from pkg_resources import resource_filename import json -import json import pandas as pd import numpy as np +import os +import sys + + +class HiddenPrints: + """Disables stdout prints for a block of code.""" + + def __enter__(self): + self._original_stdout = sys.stdout + sys.stdout = open(os.devnull, "w") + + def __exit__(self, exc_type, exc_val, exc_tb): + sys.stdout.close() + sys.stdout = self._original_stdout def is_valid_lat(val: str) -> bool: @@ -64,7 +77,10 @@ def write_output_file(dirname, filename, data): def write_visualization(dirname, filename, data): """Write a visualization, ensuring parent directories.""" - map = KeplerGl() + # Hide kepler stdout output. + with HiddenPrints(): + map = KeplerGl() + map.add_data(data=data, name="clusters") # config configures a default color scheme for our clusters layer. @@ -73,6 +89,9 @@ def write_visualization(dirname, filename, data): map.config = json.loads(f.read()) filepath = ensure_file_path(dirname, filename) - map.save_to_html(file_name=str(filepath), center_map=True) + + # Hide kepler stdout output. + with HiddenPrints(): + map.save_to_html(file_name=str(filepath), center_map=True) return filepath