mirror of
https://github.com/bellingcat/sugartrail.git
synced 2026-06-07 19:18:30 +03:00
536 lines
17 KiB
Plaintext
536 lines
17 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b6926e35",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Sugartrail "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "f17ebdd2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sugartrail\n",
|
|
"import pandas as pd\n",
|
|
"import ipywidgets as widgets\n",
|
|
"from IPython.display import display, HTML\n",
|
|
"import requests\n",
|
|
"import shutil\n",
|
|
"from pathlib import Path\n",
|
|
"import logging"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "a94bfb25",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%%capture\n",
|
|
"logging.basicConfig(\n",
|
|
" level=logging.DEBUG,\n",
|
|
" format='%(asctime)s - %(levelname)s - %(message)s',\n",
|
|
")\n",
|
|
"\n",
|
|
"# Create a logger instance\n",
|
|
"logger = logging.getLogger(__name__)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cbc5e202",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%%capture\n",
|
|
"network = sugartrail.base.Network()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1704e377",
|
|
"metadata": {},
|
|
"source": [
|
|
"1. Insert your [Companies House API](https://developer.company-information.service.gov.uk/how-to-create-an-application) key:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0632780b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"API_input = widgets.Text(\n",
|
|
" value='',\n",
|
|
" placeholder='Insert API Key',\n",
|
|
" disabled=False\n",
|
|
")\n",
|
|
"\n",
|
|
"company_text = widgets.Text(\n",
|
|
" value='',\n",
|
|
" placeholder='Insert Company ID',\n",
|
|
" disabled=True\n",
|
|
")\n",
|
|
"\n",
|
|
"auth_status = widgets.HTML(\n",
|
|
" value=\"\",\n",
|
|
")\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"auth_button = widgets.Button(description='Authenticate')\n",
|
|
"auth_button.on_click(lambda bt: auth())\n",
|
|
"\n",
|
|
"init_button = widgets.Button(description='Initialise', disabled=True)\n",
|
|
"# init_button.style.button_color = '#696969'\n",
|
|
"init_button.on_click(lambda bt: init_network()) \n",
|
|
"\n",
|
|
"def auth():\n",
|
|
" auth_button.disabled=True\n",
|
|
" API_input.disabled=True\n",
|
|
" sugartrail.api.basic_auth.username = API_input.value\n",
|
|
" if sugartrail.api.test():\n",
|
|
" auth_status.value = u'\\u2705: Login successful'\n",
|
|
" auth_button.button_style='success'\n",
|
|
" company_text.disabled = False\n",
|
|
" init_button.disabled = False\n",
|
|
" else:\n",
|
|
" auth_button.disabled=False\n",
|
|
" API_input.disabled=False\n",
|
|
" auth_status.value = u'\\u274c: Invalid API key'\n",
|
|
"\n",
|
|
"display(API_input, auth_button, auth_status)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "2bd8c5be",
|
|
"metadata": {},
|
|
"source": [
|
|
"2. Insert the unique company registration number (CRN) for a company you would like to investigate:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d5f9b6ad",
|
|
"metadata": {},
|
|
"source": [
|
|
"<!-- Insert a valid [Companies House Public Data API key](https://developer.company-information.service.gov.uk/get-started/) as `username` string value below. If you don't want to use the API and would prefer loading a pre-built network, uncomment and run the cell below and then run the final cell to build and load the map. -->"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "128106c5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"init_status = widgets.HTML(\n",
|
|
" value=\"\",\n",
|
|
") \n",
|
|
"\n",
|
|
"depth_selector = widgets.BoundedIntText(\n",
|
|
" value=1,\n",
|
|
" min=1,\n",
|
|
" max=20,\n",
|
|
" step=1,\n",
|
|
" disabled=True\n",
|
|
")\n",
|
|
"\n",
|
|
"generate_network_button = widgets.Button(description='Build Network', disabled=True)\n",
|
|
"generate_network_button.on_click(lambda bt: generate_network()) \n",
|
|
"\n",
|
|
"navigation_button = widgets.Button(description='Update', disabled=True)\n",
|
|
"navigation_button.on_click(lambda bt: config_network()) \n",
|
|
"\n",
|
|
"def init_network():\n",
|
|
" init_button.disabled=True\n",
|
|
" response = sugartrail.api.get_company(str(company_text.value))\n",
|
|
" company_text.disabled=True\n",
|
|
" if response:\n",
|
|
" network.company_id = str(company_text.value)\n",
|
|
" init_status.value = u'\\u2705: Initialisation successful for ' + str(response['company_name']) \n",
|
|
" init_button.button_style='success'\n",
|
|
" navigation_button.disabled = False\n",
|
|
" depth_selector.disabled = False\n",
|
|
" generate_network_button.disabled = False\n",
|
|
" logger.debug(f\"Company number entered: {str(company_text.value)}\")\n",
|
|
" \n",
|
|
" else:\n",
|
|
" init_button.disabled=False\n",
|
|
" company_text.disabled=False\n",
|
|
" init_status.value = u'\\u274c: Initialisation Failed. No records for company: ' + str(company_text.value)\n",
|
|
"\n",
|
|
"display(company_text, init_button, init_status)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "96ffe953",
|
|
"metadata": {},
|
|
"source": [
|
|
"3. Select the criteria you would like to build your network from (optional):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6eb9065a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"officers_for_company = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get officers for each company',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"historic_addresses = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get historic addresses for each company',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"psc_address = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get PSC addresses for each company',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"officer_appointments = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get appointments for each officer',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_appointments_inttext = widgets.IntText(\n",
|
|
" value=50,\n",
|
|
" disabled=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_appointments = widgets.HBox([widgets.Label('Maximum number of appointments for each officer: '), max_appointments_inttext])\n",
|
|
"\n",
|
|
"officer_correspondance = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get correspondance address for each officer',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"duplicate_officers = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get duplicates for each officer',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_duplicates_inttext = widgets.IntText(\n",
|
|
" value=50,\n",
|
|
" disabled=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_duplicates = widgets.HBox([widgets.Label('Maximum number of duplicate officers for each officer: '), max_duplicates_inttext])\n",
|
|
"\n",
|
|
"officers_for_address = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get officers at each address',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_officers_inttext = widgets.IntText(\n",
|
|
" value=50,\n",
|
|
" disabled=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_officers_at_address = widgets.HBox([widgets.Label('Maximum number of officers corresponding from address: '), max_officers_inttext])\n",
|
|
"\n",
|
|
"companies_for_address = widgets.Checkbox(\n",
|
|
" value=True,\n",
|
|
" description='Get companies registered at each address',\n",
|
|
" disabled=False,\n",
|
|
" indent=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_companies_inttext = widgets.IntText(\n",
|
|
" value=50,\n",
|
|
" disabled=False\n",
|
|
")\n",
|
|
"\n",
|
|
"max_companies_at_address = widgets.HBox([widgets.Label('Maximum number of companies registered to address: '), max_companies_inttext])\n",
|
|
"\n",
|
|
"checkboxes = [officers_for_company, \n",
|
|
" historic_addresses, \n",
|
|
" psc_address, \n",
|
|
" officer_appointments, \n",
|
|
" max_appointments, \n",
|
|
" officer_correspondance,\n",
|
|
" duplicate_officers,\n",
|
|
" max_duplicates, \n",
|
|
" officers_for_address,\n",
|
|
" max_officers_at_address,\n",
|
|
" companies_for_address,\n",
|
|
" max_companies_at_address,\n",
|
|
" navigation_button\n",
|
|
" ]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7da39d70",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def config_network():\n",
|
|
" network.hop.get_company_officers = officers_for_company.value\n",
|
|
" network.hop.get_company_address_history = historic_addresses.value\n",
|
|
" network.hop.get_psc_correspondance_address = psc_address.value\n",
|
|
" network.hop.get_officer_appointments = officer_appointments.value\n",
|
|
" network.hop.officer_appointments_maxsize = max_appointments_inttext.value\n",
|
|
" network.hop.get_officer_correspondance_address = officer_correspondance.value\n",
|
|
" network.hop.get_officer_duplicates = duplicate_officers.value\n",
|
|
" network.hop.officer_duplicates_maxsize = max_duplicates_inttext.value\n",
|
|
" network.hop.get_officers_at_address = officers_for_address.value\n",
|
|
" network.hop.officers_at_address_maxsize = max_officers_inttext.value\n",
|
|
" network.hop.get_companies_at_address = companies_for_address.value\n",
|
|
" network.hop.companies_at_address_maxsize = max_companies_inttext.value\n",
|
|
" depth_selector.disabled = False\n",
|
|
" generate_network_button.disabled = False\n",
|
|
" navigation_button.disabled = True\n",
|
|
" navigation_button.button_style='success'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bec983fc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"accordion_config = widgets.Accordion(children=[widgets.VBox(checkboxes)])\n",
|
|
"accordion_config.set_title(0, 'Config')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1c68e6b4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"accordion_config"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "addafb36",
|
|
"metadata": {},
|
|
"source": [
|
|
"4. Select the depth of the network you would like to build:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ea0e8392",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"build_status = widgets.HTML(\n",
|
|
" value=\"\",\n",
|
|
")\n",
|
|
"\n",
|
|
"build_map_button=widgets.Button(description='Build Map', disabled=True)\n",
|
|
"build_map_button.on_click(lambda bt: generate_map()) \n",
|
|
"\n",
|
|
"def generate_network():\n",
|
|
" with output_box:\n",
|
|
" depth_selector.disabled = True\n",
|
|
" generate_network_button.disabled = True\n",
|
|
" network.perform_hop(depth_selector.value)\n",
|
|
" network.run_map_preprocessing()\n",
|
|
" build_map_button.disabled = False\n",
|
|
" generate_network_button.button_style = \"success\"\n",
|
|
" \n",
|
|
" \n",
|
|
"output_box = widgets.Output()\n",
|
|
"display(depth_selector, generate_network_button, build_status, output_box)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "03ffce05",
|
|
"metadata": {},
|
|
"source": [
|
|
"5. Visualise network on a map:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6449cd96",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"map_container = widgets.HTML(\n",
|
|
" value=\"\",\n",
|
|
")\n",
|
|
"\n",
|
|
"map_data,path_table = sugartrail.mapvis.build_map(network, clear_widget=False) \n",
|
|
"hbox_map = widgets.HBox([path_table])\n",
|
|
"vbox_map = widgets.VBox([map_data, hbox_map])\n",
|
|
"\n",
|
|
"accordion_map = widgets.Accordion(children=[vbox_map])\n",
|
|
"accordion_map.set_title(0, 'Map')\n",
|
|
"\n",
|
|
"tabs = ['Companies', 'Addresses', 'Officers', 'Company Details']\n",
|
|
"children = [widgets.Output() for tab in tabs]\n",
|
|
"tab = widgets.Tab()\n",
|
|
"tab.children = children\n",
|
|
"for i,title in enumerate(tabs):\n",
|
|
" tab.set_title(i, title)\n",
|
|
"\n",
|
|
"accordion_data = widgets.Accordion(children=[tab])\n",
|
|
"accordion_data.set_title(0, 'Data')\n",
|
|
"\n",
|
|
"html_buttons = '''<html>\n",
|
|
"<head>\n",
|
|
"<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n",
|
|
"</head>\n",
|
|
"<body>\n",
|
|
"<a href=\"{filename}\" download=\"{filename}\">\n",
|
|
"<button class=\"p-Widget jupyter-widgets jupyter-button widget-button\">Download Network</button>\n",
|
|
"</a>\n",
|
|
"</body>\n",
|
|
"</html>\n",
|
|
"'''\n",
|
|
"\n",
|
|
"def generate_download_zip(company_text, network):\n",
|
|
" Path(str(company_text.value)).mkdir(parents=True, exist_ok=True)\n",
|
|
" df_company_ids = pd.DataFrame(network.company_ids)\n",
|
|
" df_company_records = pd.DataFrame(network.company_records)\n",
|
|
" df_addresses = pd.DataFrame(network.addresses)\n",
|
|
" df_officer_ids = pd.DataFrame(network.officer_ids)\n",
|
|
" files = {'companies': df_company_ids, 'addresses': df_addresses, 'officers': df_officer_ids, 'company_details': df_company_records}\n",
|
|
" for key in files:\n",
|
|
" files[key].to_csv(str(company_text.value) + '/' + key + '.csv')\n",
|
|
" file = str(company_text.value) + '.json'\n",
|
|
" network.save(file, './' + str(company_text.value) + \"/\")\n",
|
|
" zip_filename = str(company_text.value) + '.zip'\n",
|
|
" shutil.make_archive(str(company_text.value), 'zip', str(company_text.value))\n",
|
|
" return zip_filename\n",
|
|
"\n",
|
|
"def generate_map():\n",
|
|
" map_data,path_table = sugartrail.mapvis.build_map(network, clear_widget=False) \n",
|
|
" hbox_map = widgets.HBox([path_table])\n",
|
|
" vbox_map.children = [map_data, hbox_map]\n",
|
|
" accordion_map.selected_index=0\n",
|
|
" accordion_data.selected_index=0\n",
|
|
" build_map_button.disabled = True\n",
|
|
" build_map_button.button_style = \"Success\"\n",
|
|
" with tab.children[0]:\n",
|
|
" display(pd.DataFrame(network.company_ids))\n",
|
|
" with tab.children[1]:\n",
|
|
" display(pd.DataFrame(network.addresses))\n",
|
|
" with tab.children[2]:\n",
|
|
" display(pd.DataFrame(network.officer_ids))\n",
|
|
" with tab.children[3]:\n",
|
|
" display(pd.DataFrame(network.company_records))\n",
|
|
" zip_filename = generate_download_zip(company_text, network)\n",
|
|
" html_button = html_buttons.format(filename=zip_filename)\n",
|
|
" with download_link:\n",
|
|
" display(HTML(html_button))\n",
|
|
"\n",
|
|
"download_link = widgets.Output()\n",
|
|
"display(build_map_button, map_container)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "684a116e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"accordion_map"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "65552da1",
|
|
"metadata": {},
|
|
"source": [
|
|
"Each marker represents a company in the network. <span style=\"color:green;font-weight:bold\">Green markers</span> represent active companies based at the address, <span style=\"color:red;font-weight:bold\">red markers</span> represent active companies no longer based at the address and <span style=\"color:black;font-weight:bold\">black markers</span> represent dissolved companies once based at the address.\n",
|
|
"\n",
|
|
"Select a marker to display additional information:\n",
|
|
"\n",
|
|
"- pop-up with the selected company's name and address\n",
|
|
"- table containing the most efficient paths from the origin to the selected company\n",
|
|
"- antpaths for each company in the network. <span style=\"color:red;font-weight:bold\">Red antpaths</span> represents the path through all the historic addresses for the selected company. <span style=\"color:black;font-weight:bold\">Black antpaths</span> represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1e328d41",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"accordion_data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9224dce5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"download_link"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "80449fa3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.15"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|