mirror of
https://github.com/bellingcat/gesara-entity-viz.git
synced 2026-06-08 03:28:33 +03:00
used numeric index instead of label name for node keys, removed redundant fields, to reduce size of dataset_entities.json and increase performance
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -60,7 +60,7 @@ if __name__ == "__main__":
|
||||
).reset_index(drop=True)
|
||||
_edges_df = nx.to_pandas_edgelist(G=G)
|
||||
|
||||
nodes_df = _nodes_df[["x", "y", "label", "size", "frequency", "Modularity Class"]]
|
||||
nodes_df = _nodes_df[["x", "y", "label", "size", "Modularity Class"]]
|
||||
edges_df = _edges_df[["source", "target"]]
|
||||
|
||||
clusters_to_combine = {
|
||||
@@ -73,11 +73,14 @@ if __name__ == "__main__":
|
||||
lambda c: "100" if c in clusters_to_combine else str(c)
|
||||
)
|
||||
nodes_df.drop("Modularity Class", axis="columns", inplace=True)
|
||||
nodes_df["key"] = nodes_df["label"]
|
||||
|
||||
label_to_index = {t[0] : i for i, t in enumerate(Counter(list(edges_df['source']) + list(edges_df['target'])).most_common())}
|
||||
|
||||
nodes_df["key"] = nodes_df['label'].map(label_to_index)
|
||||
nodes_df["size"] /= NODE_SCALING
|
||||
nodes = nodes_df.to_dict(orient="records")
|
||||
|
||||
edges = [[e["source"], e["target"]] for e in edges_df.to_dict(orient="records")]
|
||||
edges = [[label_to_index[e["source"]], label_to_index[e["target"]]] for e in edges_df.to_dict(orient="records")]
|
||||
data = {
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
|
||||
Reference in New Issue
Block a user