mirror of
https://github.com/bellingcat/geoclustering.git
synced 2026-06-07 19:18:30 +03:00
42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
from geoclustering.clustering import cluster_locations
|
|
from tests.helpers import read_fixture_csv
|
|
|
|
|
|
df = read_fixture_csv("clustering.csv")
|
|
|
|
|
|
def has_member(list, name):
|
|
return any(x for x in list if x["name"] == name)
|
|
|
|
|
|
def test_clustering_all():
|
|
# there should be one cluster with all members but Erin.
|
|
res = cluster_locations(
|
|
df=df, algorithm="dbscan", radius_km=1.97, min_cluster_size=4
|
|
)
|
|
assert len(res.values()) == 1
|
|
assert len(res[0]) == 4
|
|
|
|
|
|
def test_clustering_split():
|
|
res = cluster_locations(
|
|
df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=2
|
|
)
|
|
# there should be two cluster: Alice & Bob and Carol & Dan
|
|
assert len(res.values()) == 2
|
|
cluster_one = res[0]
|
|
cluster_two = res[1]
|
|
assert len(cluster_one) == 2
|
|
assert has_member(cluster_one, "Alice")
|
|
assert has_member(cluster_one, "Bob")
|
|
assert has_member(cluster_two, "Carol")
|
|
assert has_member(cluster_two, "Dan")
|
|
|
|
|
|
def test_clustering_none():
|
|
# there should be no clusters now.
|
|
res = cluster_locations(
|
|
df=df, algorithm="dbscan", radius_km=0.5, min_cluster_size=3
|
|
)
|
|
assert len(res.values()) == 0
|