mirror of
https://github.com/bellingcat/gesara-entity-viz.git
synced 2026-06-11 04:58:32 +03:00
added Python scripts, did a lot of data cleaning, changed node size to be proportional to number of channels
This commit is contained in:
90
scripts/generate_visualization.py
Normal file
90
scripts/generate_visualization.py
Normal file
@@ -0,0 +1,90 @@
|
||||
from collections import Counter
|
||||
import json
|
||||
|
||||
import networkx as nx
|
||||
import pandas as pd
|
||||
|
||||
import colorcet
|
||||
|
||||
|
||||
# Colors used for different graph modularity classes
|
||||
COLORS = colorcet.glasbey_dark
|
||||
|
||||
OUTPUT_JSON = "../public/dataset_entities.json"
|
||||
|
||||
NODE_SCALING = 0.5
|
||||
# GraphML file generated by Gephi
|
||||
INPUT_GRAPHML = "data/entity_network_layout.graphml"
|
||||
CLUSTERS = [
|
||||
{"key": "13", "clusterLabel": "US politics"},
|
||||
{"key": "0", "clusterLabel": "Covid and social media"},
|
||||
{"key": "4", "clusterLabel": "Digital soldiers and world leaders"},
|
||||
{"key": "17", "clusterLabel": "US conspiracies"},
|
||||
{"key": "19", "clusterLabel": "Elite human trafficking"},
|
||||
{"key": "9", "clusterLabel": "Big Tech and technocracy"},
|
||||
{"key": "16", "clusterLabel": "Epstein and pedophilia"},
|
||||
{"key": "24", "clusterLabel": "Satanic US Cabal"},
|
||||
{"key": "8", "clusterLabel": "Global Cabal"},
|
||||
{"key": "1", "clusterLabel": "Kennedy and royal conspiracies"},
|
||||
{"key": "14", "clusterLabel": "Pharma and Anti-Vax"},
|
||||
{"key": "23", "clusterLabel": "US sports and celebrities"},
|
||||
{"key": "21", "clusterLabel": "Technological conspiracies"},
|
||||
{"key": "25", "clusterLabel": "Fake news media"},
|
||||
{"key": "7", "clusterLabel": "Vaccine injuries"},
|
||||
{"key": "30", "clusterLabel": "Academia and countries"},
|
||||
{"key": "39", "clusterLabel": "BLM/Antifa and Democrats"},
|
||||
{"key": "11", "clusterLabel": "Big corporations and airlines"},
|
||||
{"key": "15", "clusterLabel": "Alternative doctors"},
|
||||
{"key": "2", "clusterLabel": "Conservative legal institutions"},
|
||||
{"key": "26", "clusterLabel": "Big Pharma"},
|
||||
{"key": "27", "clusterLabel": "Freedom convoys"},
|
||||
{"key": "12", "clusterLabel": "US state-level politics"},
|
||||
{"key": "34", "clusterLabel": "Chemistry and alternative medicine"},
|
||||
{"key": "20", "clusterLabel": "Mass shooters"},
|
||||
{"key": "3", "clusterLabel": "Public health"},
|
||||
{"key": "10", "clusterLabel": "Right-wing media"},
|
||||
{"key": "37", "clusterLabel": "Payment platforms"},
|
||||
{"key": "42", "clusterLabel": "Vote audit"},
|
||||
]
|
||||
BOUNDING_BOX = {"x": [-300, 400], "y": [-600, 150]}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
G = nx.read_graphml(path=INPUT_GRAPHML)
|
||||
|
||||
_nodes_df = pd.DataFrame.from_dict(
|
||||
dict(G.nodes(data=True)), orient="index"
|
||||
).reset_index(drop=True)
|
||||
_edges_df = nx.to_pandas_edgelist(G=G)
|
||||
|
||||
nodes_df = _nodes_df[["x", "y", "label", "size", "frequency", "Modularity Class"]]
|
||||
edges_df = _edges_df[["source", "target"]]
|
||||
|
||||
clusters_to_combine = {
|
||||
k
|
||||
for k, v in Counter(nodes_df["Modularity Class"]).items()
|
||||
if k not in set([int(c["key"]) for c in CLUSTERS])
|
||||
}
|
||||
print("clusters_to_combine: ", clusters_to_combine)
|
||||
nodes_df["cluster"] = nodes_df["Modularity Class"].apply(
|
||||
lambda c: "100" if c in clusters_to_combine else str(c)
|
||||
)
|
||||
nodes_df.drop("Modularity Class", axis="columns", inplace=True)
|
||||
nodes_df["key"] = nodes_df["label"]
|
||||
nodes_df["size"] /= NODE_SCALING
|
||||
nodes = nodes_df.to_dict(orient="records")
|
||||
|
||||
edges = [[e["source"], e["target"]] for e in edges_df.to_dict(orient="records")]
|
||||
data = {
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
"clusters": [
|
||||
{**cluster, "color": COLORS[i]} for i, cluster in enumerate(CLUSTERS)
|
||||
]
|
||||
+ [{"key": "100", "clusterLabel": "Other", "color": "#999999"}],
|
||||
"bbox": BOUNDING_BOX,
|
||||
}
|
||||
|
||||
with open(OUTPUT_JSON, "w") as f:
|
||||
json.dump(obj=data, fp=f, separators=(",", ":"))
|
||||
Reference in New Issue
Block a user