added Python scripts, did a lot of data cleaning, changed node size to be proportional to number of channels

2026-06-08 03:28:33 +03:00 · 2022-12-15 19:42:55 -06:00
parent 88b43dfd31
commit 9b9e71fab8
9 changed files with 2915 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,8 @@ chrome-user-data
 .vscode
 *.swp
 *.swo
+*.graphml
+scripts/pruning/

 packages/react-devtools-core/dist
 packages/react-devtools-extensions/chrome/build
--- a/README.md
+++ b/README.md
@@ -1,12 +1,22 @@
-# Sigma.js full-featured demo
+# GESARA Named Entity Network Visualization

-This project aims to provide a full-features "real life" application using sigma.js. It was bootstrapped with [Create React App](https://github.com/facebook/create-react-app) and uses [react-sigma-v2](https://github.com/sim51/react-sigma-v2) to interface sigma.js with React.
+This project generates a visualization of [named entities](https://spacy.io/usage/linguistic-features#named-entities) in English-language posts archived in a database of Telegram channels that have posted about the GESARA conspiracy theory.

-## Dataset
+This visualization was developed by Bellingcat based on an excellent [Sigma.js demo](https://github.com/jacomyal/sigma.js/tree/main/demo), and uses [react-sigma-v2](https://github.com/sim51/react-sigma-v2) to interface sigma.js with React.

-The dataset has been kindly crafted by the [Sciences-Po médialab](https://medialab.sciencespo.fr/) and [OuestWare](https://www.ouestware.com/en/) teams using [Seealsology](https://densitydesign.github.io/strumentalia-seealsology/). It represents a network of Wikipedia pages, connected by ["See also"](https://en.wikipedia.org/wiki/See_also) links. It then was tagged by hand.
+## Python Scripts

-## Available Scripts
+In the `scripts/` subdirectory, you can run Python scripts that were used to generate the network and visualization:
+
+### `generate_network.py`
+
+Extracts the data from a PostgreSQL database, cleans the entity data, generates a NetworkX graph, prunes the edges using the [Marginal Likelihood Filter](https://github.com/naviddianati/GraphPruning), and exports the pruned graph.
+
+### `generate_visualization.py`
+
+After visualizing the network using [Gephi](https://gephi.org/) (using the Force Atlas 2 algorithm, with the "LinLog mode" and "Prevent Overlap" options enabled, and exporting as the file `entity_network_layout.graphml`), this script converts the node, edge, and cluster data into a format readable by this sigma.js project.
+
+## NPM Scripts

 In the project directory, you can run:

--- a/public/dataset_entities.json
+++ b/public/dataset_entities.json
--- a/scripts/data/entity_consolidation.json
+++ b/scripts/data/entity_consolidation.json
--- a/scripts/data/ignore_consolidation.json
+++ b/scripts/data/ignore_consolidation.json
@@ -0,0 +1,82 @@
+{
+    "starting": {
+        "big-tech": ["big tech"],
+        "central-bank": ["central bank"],
+        "central-banks": ["central bank"],
+        "fbi.cia": ["fbi", "cia"],
+        "big pharma.big tech": ["big pharma", "big tech"],
+        "nih.cdc": ["nih", "cdc"],
+        "facebook twitter": ["facebook", "twitter"]
+    },
+    "separations": [
+        "skull & bones",
+        "fox & friends",
+        "c & d",
+        "rothschild & co",
+        "bill & melinda",
+        "bill & melinda gates",
+        "alice & wonderland",
+        "johnson & johnson",
+        "es&s",
+        "non-compliance",
+        "qanon-coin",
+        "q-trust",
+        "jay-z",
+        "trump-era",
+        "bye-den",
+        "co-founder",
+        "ezra cohen-watnick",
+        "pharma-fascism",
+        "deep-state",
+        "anti-trump",
+        "q-team",
+        "pro-america",
+        "anti-biden",
+        "a-rod",
+        "co-chair",
+        "chy-na",
+        "pro-life",
+        "anti-vaxx",
+        "guillain-barre",
+        "jean-pierre",
+        "pfizer-biontech",
+        "co-executive",
+        "non-fda",
+        "isis-k",
+        "un-jabbed",
+        "ingersoll lockwood, inc"
+    ],
+    "plurals": [
+        "chaos",
+        "kim clements",
+        "jeff bezos",
+        "dan andrews",
+        "peters",
+        "greggs",
+        "corps",
+        "rolling stones",
+        "davids",
+        "reuters",
+        "nicolas",
+        "rogers",
+        "dumbs",
+        "coles",
+        "stevens",
+        "williams",
+        "tass",
+        "roberts",
+        "clements",
+        "james woods",
+        "robin williams",
+        "hanks",
+        "joseph goebbels",
+        "stew peters",
+        "usgs",
+        "richards",
+        "owens",
+        "daniels",
+        "isis",
+        "andrews",
+        "evans"
+    ]
+}
--- a/scripts/generate_network.py
+++ b/scripts/generate_network.py
@@ -0,0 +1,385 @@
+import os
+import re
+from itertools import chain, combinations
+from collections import Counter
+from pathlib import Path
+import json
+from functools import cache
+
+import sqlalchemy
+import pandas as pd
+import networkx as nx
+
+import emoji
+from unidecode import unidecode
+import igraph as ig
+import numpy as np
+import nltk
+
+# https://github.com/naviddianati/GraphPruning
+from pruning import unimodal
+
+
+# SQL connection string to database
+CONNECTION_STRING = os.environ["GESARA_DB"]
+# SQL query to retrieve English-language named entities for each post
+SQL_QUERY = """
+SELECT channel, string_agg(entity_text, '|') as named_entities FROM 
+    (SELECT id, 
+    channel,
+    json_array_elements(named_entities) ->> 'type' AS entity_type, 
+    json_array_elements(named_entities) ->> 'text' AS entity_text 
+    FROM posts 
+    WHERE detected_language = 'en' 
+    AND json_array_length(named_entities) > 0) tmp
+WHERE entity_type IN ('ORG', 'PERSON')
+GROUP BY id, channel;
+"""
+# Directory containing input and output data
+DATA_DIR = Path("./data")
+# JSON file containing entity replacement terms
+CONSOLIDATION_JSON = DATA_DIR / "entity_consolidation.json"
+# JSON file containing information about entities to ignore during consolidation
+IGNORE_CONSOLIDATION_JSON = DATA_DIR / "ignore_consolidation.json"
+
+# Ignore entities that are stopwords in common languages
+STOPWORD_LANGUAGES = [
+    "danish",
+    "dutch",
+    "english",
+    "french",
+    "german",
+    "italian",
+    "portuguese",
+    "russian",
+    "spanish",
+]
+
+# Ignore entities mentioned in fewer than this many posts in database
+POST_FREQUENCY_THRESHOLD = 200
+# Ignore entities mentioned by fewer than this many channels in database
+CHANNEL_FREQUENCY_THRESHOLD = 175
+# Ignore edges between entities co-occurring in fewer than this many posts
+EDGE_WEIGHT_THRESHOLD = 10
+# Prune away this percent of the least significant edges
+PRUNING_PERCENTILE = 90
+
+# GraphML file to write network data to
+OUTPUT_GRAPHML = DATA_DIR / f"entity_network.graphml"
+
+
+def create_replacement_dict():
+    """Load JSON file containing entity replacements (e.g. "donald j trump" and "djt" both refer to the same person) and create dict for renaming entities."""
+
+    with open(CONSOLIDATION_JSON, "r") as f:
+        entity_consolidation = json.load(f)
+
+    replacement_dict = {}
+    for original, replacement_list in entity_consolidation.items():
+        for replacement in replacement_list:
+            replacement_dict[replacement] = original
+
+    return replacement_dict
+
+
+def create_ignore_list():
+    """Create list of entities to ignore."""
+
+    ignore_entities = set(
+        chain.from_iterable(
+            [nltk.corpus.stopwords.words(language) for language in STOPWORD_LANGUAGES]
+        )
+    )
+
+    return ignore_entities
+
+
+replacement_dict = create_replacement_dict()
+
+
+def create_consolidation_dict(results):
+    """Some entities are actually two or more entities separated by a "&" or ",". There are also plural versions of some entities, which results in redundancy. This function creates a dict that maps entities to their replacement, for example changing `'fauci & bill gates'` to `['anthony fauci', 'bill gates']`.
+
+    1. Load JSON file containing information about which entities to consolidate
+    2. Find all redundant plural entities
+    3. Find all entities separated by comman, ampersand, and dash
+    """
+
+    with open(IGNORE_CONSOLIDATION_JSON, "r") as f:
+        ignore_consolidation = json.load(f)
+    ignore_plurals = set(ignore_consolidation["plurals"])
+    ignore_separations = set(ignore_consolidation["separations"])
+    consolidation_dict = ignore_consolidation["starting"]
+
+    unique_entities = set(chain.from_iterable(results["valid_entities"]))
+    replaced_entities = set(replacement_dict.keys())
+
+    for entity in unique_entities:
+        if (entity in ignore_plurals) or (entity in consolidation_dict):
+            continue
+        elif (len(entity) > 3) & (entity.endswith("s")):
+            if (entity + "s") in replaced_entities:
+                consolidation_dict[entity] = [replacement_dict[entity[:-1]]]
+            elif (entity[:-1]) in unique_entities:
+                consolidation_dict[entity] = [entity[:-1]]
+
+    for character in [",", "&", "-"]:
+        for entity in unique_entities:
+            _entity = entity.replace(f" {character} ", character)
+            if (entity in ignore_separations) or (entity in consolidation_dict):
+                continue
+            elif (character in _entity) & (len(_entity) > 4):
+                terms = [term.strip() for term in _entity.split(character)]
+                transformed_terms = []
+                for term in terms:
+                    if term in replaced_entities:
+                        transformed_terms.append(replacement_dict[term])
+                    elif term in unique_entities:
+                        transformed_terms.append(term)
+                    else:
+                        transformed_terms = []
+                        break
+                if transformed_terms:
+                    consolidation_dict[entity] = transformed_terms
+
+    return consolidation_dict
+
+
+def consolidate_entities(entities, consolidation_dict):
+    """Apply the dict generated by `create_consolidation_dict` to a list of entities"""
+
+    new_entities = []
+    for entity in entities:
+        new_entity = consolidation_dict.get(entity, [entity])
+        new_entities.extend(new_entity)
+    return set(new_entities)
+
+
+@cache
+def process_entity(s):
+    """All-purpose text cleaning of very dirty entities list"""
+
+    _s = unidecode(s).lower()
+
+    if any([bad in _s for bad in ["http", "www.", "t.me", ">>", "@", "/", ">"]]):
+        return None
+    if _s.startswith("@"):
+        return None
+    if len(emoji.emoji_list(_s)) > 0:
+        return None
+    if len(re.findall("[\U0001F1E6-\U0001F1FF]", _s)) > 0:
+        return None
+    if len(re.findall(r"^[^a-zA-Z0-9]$", _s)) == 1:
+        return None
+
+    _s = re.sub("\s\s+", " ", _s)
+
+    if _s.endswith("'s"):
+        _s = "'s".join(_s.split("'s")[:-1])
+    if _s.endswith("’s"):
+        _s = "'s".join(_s.split("’s")[:-1])
+    if _s.endswith("'"):
+        _s = "'".join(_s.split("'")[:-1])
+    if _s.startswith("the"):
+        _s = "the".join(_s.split("the")[1:])
+    if _s.startswith("now - "):
+        _s = "now - ".join(_s.split("now - ")[1:])
+    if _s.startswith("new - "):
+        _s = "new - ".join(_s.split("new - ")[1:])
+    if _s.startswith("in - "):
+        _s = "in - ".join(_s.split("in - ")[1:])
+    if _s.startswith('"'):
+        _s = '"'.join(_s.split('"')[1:])
+    if _s.startswith("'"):
+        _s = "'".join(_s.split("'")[1:])
+    if _s.startswith("a "):
+        _s = "a ".join(_s.split("a ")[1:])
+    if _s.startswith("inbox -"):
+        _s = "inbox -".join(_s.split("inbox -")[1:])
+    if _s.endswith("- telegram"):
+        _s = "- telegram".join(_s.split("- telegram")[:-1])
+    _s = _s.replace("\xa0", " ")
+
+    for bad in ")(…º×•#:|[]":
+        _s = _s.replace(bad, "")
+    for bad in ["\n"]:
+        _s = _s.replace(bad, " ")
+    _s = _s.replace("\u2800", " ")
+    _s = _s.replace("u.s.", "us")
+    _s = _s.replace("u s.", "us")
+    _s = _s.replace("u.n.", "un")
+
+    _s = _s.strip('&"\'".!-,*~+= ')
+
+    if _s.startswith('"'):
+        if _s.count('"') % 2 == 1:
+            _s = _s[1:]
+    if _s.startswith("'"):
+        if _s.count("'") % 2 == 1:
+            _s = _s[1:]
+    for pattern in [r"(\d+)(a|p)m", r"(\d+)"]:
+        if re.findall(pattern, _s):
+            return None
+
+    entity = unidecode(_s).strip().lower()
+
+    if renamed_entity := replacement_dict.get(entity):
+        return renamed_entity
+    else:
+        if len(entity) <= 2:
+            return None
+        else:
+            return entity
+
+
+def process_entities(el):
+    """Clean all entities in the list of entities"""
+
+    return set(filter(None, [process_entity(d) for d in el.split("|")]))
+
+
+def retrieve_data():
+    """Retrieve English-language named entities from database and clean data.
+
+    1. Inititalize SQL connection
+    2. Retrieve English-language named entities grouped by post
+    3. Process each list of named entities
+    """
+
+    engine = sqlalchemy.create_engine(url=CONNECTION_STRING)
+    results = pd.read_sql(sql=SQL_QUERY, con=engine)
+    results["named_entities"] = results["named_entities"].apply(process_entities)
+
+    return results
+
+
+def create_edge_list(results):
+    """Create an edge list from the named entity data.
+
+    1. Ignore entities that are mentioned less than `POST_FREQUENCY_THRESHOLD` times in the database or are stopwords in other languages
+    2. Create list of co-occurring, frequently-mentioned entities
+    3. Loop through all (sorted and unique) combinations of co-occurring entities to generate edge list
+    """
+
+    ignore_list = create_ignore_list()
+
+    all_entities = list(chain.from_iterable(results["named_entities"]))
+    c = Counter(all_entities)
+    valid_entities = (
+        set(
+            [
+                entity
+                for entity, frequency in c.items()
+                if frequency >= POST_FREQUENCY_THRESHOLD
+            ]
+        )
+        - ignore_list
+    )
+
+    _valid_entity_lists = [
+        [entity for entity in entity_set if entity in valid_entities]
+        for entity_set in results["named_entities"]
+    ]
+    valid_entity_lists = [
+        entity_list for entity_list in _valid_entity_lists if len(entity_list) > 1
+    ]
+    valid_entity_idxs = [
+        True if len(entity_list) > 1 else False for entity_list in _valid_entity_lists
+    ]
+
+    results = results[valid_entity_idxs][["channel"]]
+    results["valid_entities"] = valid_entity_lists
+
+    consolidation_dict = create_consolidation_dict(results=results)
+    results["valid_entities"] = results["valid_entities"].apply(
+        lambda entities: consolidate_entities(
+            entities=entities, consolidation_dict=consolidation_dict
+        )
+    )
+
+    entity_n_channels = dict(
+        results[["channel", "valid_entities"]]
+        .explode("valid_entities")
+        .groupby("valid_entities")["channel"]
+        .nunique()
+    )
+
+    results["entity_tuples"] = results["valid_entities"].apply(
+        lambda entity_list: list(
+            tuple(sorted(combo)) for combo in combinations(entity_list, r=2)
+        )
+    )
+
+    edf = results[["channel", "entity_tuples"]].explode("entity_tuples")
+    grouped = edf.groupby("entity_tuples")["channel"].count()
+
+    edge_list = [
+        (source, target, weight)
+        for (source, target), weight in zip(grouped.index, grouped.values)
+        if weight > EDGE_WEIGHT_THRESHOLD
+    ]
+
+    return edge_list, entity_n_channels
+
+
+def create_and_filter_graph(edge_list, entity_frequency, pruning_percentile):
+    """Create a network from an edge list.
+
+    1. Initialize a graph from the specified edge list and remove unconnected components
+    2. Convert from NetworkX to igraph format, apply Marginal Likelihood Filter (MLF)
+    3. Prune edges of the graph to a specified significance percentile and convert back to NetworkX format
+    4. Set node attributes and remove unconnected components
+    """
+
+    G = nx.Graph()
+    G.add_weighted_edges_from(edge_list)
+    nodes_to_remove = [
+        entity
+        for entity, frequency in entity_frequency.items()
+        if frequency <= CHANNEL_FREQUENCY_THRESHOLD
+    ]
+    G.remove_nodes_from(nodes_to_remove)
+    G = G.subgraph(max(nx.connected_components(G), key=len))
+
+    h = ig.Graph.from_networkx(G)
+    mlf = unimodal.MLF(directed=False)
+    H = mlf.fit_transform(h)
+
+    df_edges = H.get_edge_dataframe()
+    df_nodes = H.get_vertex_dataframe()
+    node_id_to_name = dict(zip(df_nodes.index, df_nodes.values.flatten()))
+
+    filtered_edgelist = []
+
+    for _, t in df_edges[
+        df_edges["significance"]
+        >= np.percentile(a=df_edges["significance"], q=pruning_percentile)
+    ].iterrows():
+        edge = (
+            node_id_to_name[t["source"]],
+            node_id_to_name[t["target"]],
+            t["weight"],
+        )
+        filtered_edgelist.append(edge)
+
+    J = nx.Graph()
+    J.add_weighted_edges_from(filtered_edgelist)
+    nodes_set = set(J.nodes)
+    _entity_frequency = {k: v for k, v in entity_frequency.items() if k in nodes_set}
+    nx.set_node_attributes(G=J, values=_entity_frequency, name="frequency")
+    J = J.subgraph(max(nx.connected_components(J), key=len))
+
+    return J
+
+
+if __name__ == "__main__":
+
+    results = retrieve_data()
+    edge_list, entity_frequency = create_edge_list(results=results)
+    J = create_and_filter_graph(
+        edge_list=edge_list,
+        entity_frequency=entity_frequency,
+        pruning_percentile=PRUNING_PERCENTILE,
+    )
+
+    nx.write_graphml(G=J, path=OUTPUT_GRAPHML)
--- a/scripts/generate_visualization.py
+++ b/scripts/generate_visualization.py
@@ -0,0 +1,90 @@
+from collections import Counter
+import json
+
+import networkx as nx
+import pandas as pd
+
+import colorcet
+
+
+# Colors used for different graph modularity classes
+COLORS = colorcet.glasbey_dark
+
+OUTPUT_JSON = "../public/dataset_entities.json"
+
+NODE_SCALING = 0.5
+# GraphML file generated by Gephi
+INPUT_GRAPHML = "data/entity_network_layout.graphml"
+CLUSTERS = [
+    {"key": "13", "clusterLabel": "US politics"},
+    {"key": "0", "clusterLabel": "Covid and social media"},
+    {"key": "4", "clusterLabel": "Digital soldiers and world leaders"},
+    {"key": "17", "clusterLabel": "US conspiracies"},
+    {"key": "19", "clusterLabel": "Elite human trafficking"},
+    {"key": "9", "clusterLabel": "Big Tech and technocracy"},
+    {"key": "16", "clusterLabel": "Epstein and pedophilia"},
+    {"key": "24", "clusterLabel": "Satanic US Cabal"},
+    {"key": "8", "clusterLabel": "Global Cabal"},
+    {"key": "1", "clusterLabel": "Kennedy and royal conspiracies"},
+    {"key": "14", "clusterLabel": "Pharma and Anti-Vax"},
+    {"key": "23", "clusterLabel": "US sports and celebrities"},
+    {"key": "21", "clusterLabel": "Technological conspiracies"},
+    {"key": "25", "clusterLabel": "Fake news media"},
+    {"key": "7", "clusterLabel": "Vaccine injuries"},
+    {"key": "30", "clusterLabel": "Academia and countries"},
+    {"key": "39", "clusterLabel": "BLM/Antifa and Democrats"},
+    {"key": "11", "clusterLabel": "Big corporations and airlines"},
+    {"key": "15", "clusterLabel": "Alternative doctors"},
+    {"key": "2", "clusterLabel": "Conservative legal institutions"},
+    {"key": "26", "clusterLabel": "Big Pharma"},
+    {"key": "27", "clusterLabel": "Freedom convoys"},
+    {"key": "12", "clusterLabel": "US state-level politics"},
+    {"key": "34", "clusterLabel": "Chemistry and alternative medicine"},
+    {"key": "20", "clusterLabel": "Mass shooters"},
+    {"key": "3", "clusterLabel": "Public health"},
+    {"key": "10", "clusterLabel": "Right-wing media"},
+    {"key": "37", "clusterLabel": "Payment platforms"},
+    {"key": "42", "clusterLabel": "Vote audit"},
+]
+BOUNDING_BOX = {"x": [-300, 400], "y": [-600, 150]}
+
+
+if __name__ == "__main__":
+
+    G = nx.read_graphml(path=INPUT_GRAPHML)
+
+    _nodes_df = pd.DataFrame.from_dict(
+        dict(G.nodes(data=True)), orient="index"
+    ).reset_index(drop=True)
+    _edges_df = nx.to_pandas_edgelist(G=G)
+
+    nodes_df = _nodes_df[["x", "y", "label", "size", "frequency", "Modularity Class"]]
+    edges_df = _edges_df[["source", "target"]]
+
+    clusters_to_combine = {
+        k
+        for k, v in Counter(nodes_df["Modularity Class"]).items()
+        if k not in set([int(c["key"]) for c in CLUSTERS])
+    }
+    print("clusters_to_combine: ", clusters_to_combine)
+    nodes_df["cluster"] = nodes_df["Modularity Class"].apply(
+        lambda c: "100" if c in clusters_to_combine else str(c)
+    )
+    nodes_df.drop("Modularity Class", axis="columns", inplace=True)
+    nodes_df["key"] = nodes_df["label"]
+    nodes_df["size"] /= NODE_SCALING
+    nodes = nodes_df.to_dict(orient="records")
+
+    edges = [[e["source"], e["target"]] for e in edges_df.to_dict(orient="records")]
+    data = {
+        "nodes": nodes,
+        "edges": edges,
+        "clusters": [
+            {**cluster, "color": COLORS[i]} for i, cluster in enumerate(CLUSTERS)
+        ]
+        + [{"key": "100", "clusterLabel": "Other", "color": "#999999"}],
+        "bbox": BOUNDING_BOX,
+    }
+
+    with open(OUTPUT_JSON, "w") as f:
+        json.dump(obj=data, fp=f, separators=(",", ":"))
--- a/src/views/ClustersPanel.tsx
+++ b/src/views/ClustersPanel.tsx
@@ -59,7 +59,7 @@ const ClustersPanel: FC<{
      }
    >
      <p>
-        <i className="text-muted">Click a cluster to show/hide related pages from the network.</i>
+        <i className="text-muted">Click a cluster to show/hide related entities from the network. Cluster names were chosen to best describe most entities within the cluster, but do not always perfectly describe all entities within the cluster.</i>
      </p>
      <p className="buttons">
        <button className="btn" onClick={() => setClusters(mapValues(keyBy(clusters, "key"), () => true))}>
--- a/src/views/DescriptionPanel.tsx
+++ b/src/views/DescriptionPanel.tsx
@@ -17,10 +17,13 @@ const DescriptionPanel: FC = () => {
        This visualisation represents a <i>network</i> of{" "}
        <a target="_blank" rel="noreferrer" href="https://spacy.io/usage/linguistic-features#named-entities">
          named entities
-        </a> in English-language posts archived in a database of Telegram channels that have posted about GESARA. Each{" "}
+        </a> in English-language posts archived in a database of Telegram channels that have posted about the GESARA conspiracy theory. Each{" "}
        <i>node</i> represents an entity, <i>edges</i> between nodes indicate that one or more posts contain both entities
        .
      </p>
+      <p>
+        This kind of visualization shows the ecosystem of the people, organizations, and ideas these conspiracy Telegram channels talk about, as well as the connections between them.
+      </p>
      <p>
        Some social media channels were identified by researchers from{" "}
        <a target="_blank" rel="noreferrer" href="https://www.bellingcat.com/">
@@ -50,21 +53,20 @@ const DescriptionPanel: FC = () => {
          sigma.js
        </a>
        . You can read the source code{" "}
-        <a target="_blank" rel="noreferrer" href="https://github.com/jacomyal/sigma.js/tree/main/demo">
+        <a target="_blank" rel="noreferrer" href="https://github.com/bellingcat/gesara-entity-viz">
          on GitHub
        </a>
        .
      </p>
      <p>
-        Node sizes are related to the number of times the entity was mentioned in the database.
-      </p>
-      <p>
+        The network was visualized using{" "}
+        <a target="_blank" rel="noreferrer" href="https://gephi.org/">
+        Gephi
+        </a>. Node sizes are related to the number of channels the entity was posted about in the database.
        Nodes are colored based a{" "}
        <a target="_blank" rel="noreferrer" href="https://arxiv.org/abs/0803.0476">
        community detection algorithm
        </a>.
-      </p>
-      <p>
        For visualisation purposes, edges were pruned using the{" "}
        <a target="_blank" rel="noreferrer" href="https://github.com/naviddianati/GraphPruning">
          Marginal Likelihood Filter