diff --git a/geocluster/cli.py b/geocluster/__main__.py
similarity index 100%
rename from geocluster/cli.py
rename to geocluster/__main__.py
diff --git a/geocluster/clustering.py b/geocluster/clustering.py
index 3771b6e..dd2b011 100644
--- a/geocluster/clustering.py
+++ b/geocluster/clustering.py
@@ -50,5 +50,5 @@ def cluster_locations(df, algorithm, radius_km, min_cluster_size):
             n_jobs=-1,
         )
 
-    X = np.radians(np.array(coordinates))
+    X = np.radians(np.array(coordinates).astype(float))
     return to_cluster_dict(df, clustering.fit(X))
diff --git a/geocluster/encoding.py b/geocluster/encoding.py
index c4c410e..d9da123 100644
--- a/geocluster/encoding.py
+++ b/geocluster/encoding.py
@@ -64,8 +64,8 @@ class GeoJSONEncoder:
                 "cluster_id": cluster_id,
             }
 
-            lon = props.pop("lon")
-            lat = props.pop("lat")
+            lon = float(props.pop("lon"))
+            lat = float(props.pop("lat"))
 
             point = geojson.Point((lon, lat))
             self.state.append(geojson.Feature(geometry=point, properties=props))
diff --git a/geocluster/io.py b/geocluster/io.py
index 466a338..d680125 100644
--- a/geocluster/io.py
+++ b/geocluster/io.py
@@ -4,11 +4,43 @@ from pkg_resources import resource_filename
 import json
 import json
 import pandas as pd
+import numpy as np
+
+
+def is_valid_lat(val: str) -> bool:
+    """Given a string, check if it corresponds to a valid decimal latitude value"""
+    try:
+        val = float(val)
+        return val >= -90 and val <= 90
+    except:
+        return False
+
+
+def is_valid_lon(val: str) -> bool:
+    """Given a string, check if it corresponds to a valid decimal longitude value"""
+    try:
+        val = float(val)
+        return val >= -180 and val <= 180
+    except:
+        return False
 
 
 def read_csv_file(filename):
     """Read input csv file, dropping rows that don't have valid location data."""
-    return pd.read_csv(filename).dropna(subset=["lat", "lon"])
+    df = pd.read_csv(filename)
+    initial_rows = len(df)
+
+    df = df.dropna(subset=["lat", "lon"])
+    df.replace({np.nan: None})  # replace for other fields not to break kepler parsing
+    print(f"Ignored {initial_rows - len(df)} coordinates with NaN")
+
+    valid_index = df.lat.astype(str).apply(is_valid_lat) & df.lon.astype(str).apply(
+        is_valid_lon
+    )
+    if len(df_invalid := df[~valid_index]):
+        print(f"Found {len(df_invalid)} invalid coordinate pairs, ignoring:")
+        print(df_invalid[["lat", "lon"]].to_string())
+    return df[valid_index]
 
 
 def ensure_file_path(dirname, filename):