mirror of
https://github.com/bellingcat/gesara-entity-viz.git
synced 2026-06-13 05:58:33 +03:00
used numeric index instead of label name for node keys, removed redundant fields, to reduce size of dataset_entities.json and increase performance
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -60,7 +60,7 @@ if __name__ == "__main__":
|
|||||||
).reset_index(drop=True)
|
).reset_index(drop=True)
|
||||||
_edges_df = nx.to_pandas_edgelist(G=G)
|
_edges_df = nx.to_pandas_edgelist(G=G)
|
||||||
|
|
||||||
nodes_df = _nodes_df[["x", "y", "label", "size", "frequency", "Modularity Class"]]
|
nodes_df = _nodes_df[["x", "y", "label", "size", "Modularity Class"]]
|
||||||
edges_df = _edges_df[["source", "target"]]
|
edges_df = _edges_df[["source", "target"]]
|
||||||
|
|
||||||
clusters_to_combine = {
|
clusters_to_combine = {
|
||||||
@@ -73,11 +73,14 @@ if __name__ == "__main__":
|
|||||||
lambda c: "100" if c in clusters_to_combine else str(c)
|
lambda c: "100" if c in clusters_to_combine else str(c)
|
||||||
)
|
)
|
||||||
nodes_df.drop("Modularity Class", axis="columns", inplace=True)
|
nodes_df.drop("Modularity Class", axis="columns", inplace=True)
|
||||||
nodes_df["key"] = nodes_df["label"]
|
|
||||||
|
label_to_index = {t[0] : i for i, t in enumerate(Counter(list(edges_df['source']) + list(edges_df['target'])).most_common())}
|
||||||
|
|
||||||
|
nodes_df["key"] = nodes_df['label'].map(label_to_index)
|
||||||
nodes_df["size"] /= NODE_SCALING
|
nodes_df["size"] /= NODE_SCALING
|
||||||
nodes = nodes_df.to_dict(orient="records")
|
nodes = nodes_df.to_dict(orient="records")
|
||||||
|
|
||||||
edges = [[e["source"], e["target"]] for e in edges_df.to_dict(orient="records")]
|
edges = [[label_to_index[e["source"]], label_to_index[e["target"]]] for e in edges_df.to_dict(orient="records")]
|
||||||
data = {
|
data = {
|
||||||
"nodes": nodes,
|
"nodes": nodes,
|
||||||
"edges": edges,
|
"edges": edges,
|
||||||
|
|||||||
Reference in New Issue
Block a user