{ "cells": [ { "cell_type": "markdown", "id": "b6926e35", "metadata": {}, "source": [ "# Sugartrail " ] }, { "cell_type": "code", "execution_count": 1, "id": "f17ebdd2", "metadata": {}, "outputs": [], "source": [ "import sugartrail\n", "import pandas as pd\n", "import ipywidgets as widgets\n", "from IPython.display import display, HTML\n", "import requests\n", "import shutil\n", "from pathlib import Path\n", "import logging" ] }, { "cell_type": "code", "execution_count": 2, "id": "a94bfb25", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "logging.basicConfig(\n", " level=logging.DEBUG,\n", " format='%(asctime)s - %(levelname)s - %(message)s',\n", ")\n", "\n", "# Create a logger instance\n", "logger = logging.getLogger(__name__)" ] }, { "cell_type": "code", "execution_count": null, "id": "cbc5e202", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "network = sugartrail.base.Network()" ] }, { "cell_type": "markdown", "id": "1704e377", "metadata": {}, "source": [ "1. Insert your [Companies House API](https://developer.company-information.service.gov.uk/how-to-create-an-application) key:" ] }, { "cell_type": "code", "execution_count": null, "id": "0632780b", "metadata": {}, "outputs": [], "source": [ "API_input = widgets.Text(\n", " value='',\n", " placeholder='Insert API Key',\n", " disabled=False\n", ")\n", "\n", "company_text = widgets.Text(\n", " value='',\n", " placeholder='Insert Company ID',\n", " disabled=True\n", ")\n", "\n", "auth_status = widgets.HTML(\n", " value=\"\",\n", ")\n", "\n", "\n", "\n", "auth_button = widgets.Button(description='Authenticate')\n", "auth_button.on_click(lambda bt: auth())\n", "\n", "init_button = widgets.Button(description='Initialise', disabled=True)\n", "# init_button.style.button_color = '#696969'\n", "init_button.on_click(lambda bt: init_network()) \n", "\n", "def auth():\n", " auth_button.disabled=True\n", " API_input.disabled=True\n", " sugartrail.api.basic_auth.username = API_input.value\n", " if sugartrail.api.test():\n", " auth_status.value = u'\\u2705: Login successful'\n", " auth_button.button_style='success'\n", " company_text.disabled = False\n", " init_button.disabled = False\n", " else:\n", " auth_button.disabled=False\n", " API_input.disabled=False\n", " auth_status.value = u'\\u274c: Invalid API key'\n", "\n", "display(API_input, auth_button, auth_status)" ] }, { "cell_type": "markdown", "id": "2bd8c5be", "metadata": {}, "source": [ "2. Insert the unique company registration number (CRN) for a company you would like to investigate:" ] }, { "cell_type": "markdown", "id": "d5f9b6ad", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "execution_count": null, "id": "128106c5", "metadata": {}, "outputs": [], "source": [ "init_status = widgets.HTML(\n", " value=\"\",\n", ") \n", "\n", "depth_selector = widgets.BoundedIntText(\n", " value=1,\n", " min=1,\n", " max=20,\n", " step=1,\n", " disabled=True\n", ")\n", "\n", "generate_network_button = widgets.Button(description='Build Network', disabled=True)\n", "generate_network_button.on_click(lambda bt: generate_network()) \n", "\n", "navigation_button = widgets.Button(description='Update', disabled=True)\n", "navigation_button.on_click(lambda bt: config_network()) \n", "\n", "def init_network():\n", " init_button.disabled=True\n", " response = sugartrail.api.get_company(str(company_text.value))\n", " company_text.disabled=True\n", " if response:\n", " network.company_id = str(company_text.value)\n", " init_status.value = u'\\u2705: Initialisation successful for ' + str(response['company_name']) \n", " init_button.button_style='success'\n", " navigation_button.disabled = False\n", " depth_selector.disabled = False\n", " generate_network_button.disabled = False\n", " logger.debug(f\"Company number entered: {str(company_text.value)}\")\n", " \n", " else:\n", " init_button.disabled=False\n", " company_text.disabled=False\n", " init_status.value = u'\\u274c: Initialisation Failed. No records for company: ' + str(company_text.value)\n", "\n", "display(company_text, init_button, init_status)" ] }, { "cell_type": "markdown", "id": "96ffe953", "metadata": {}, "source": [ "3. Select the criteria you would like to build your network from (optional):" ] }, { "cell_type": "code", "execution_count": null, "id": "6eb9065a", "metadata": {}, "outputs": [], "source": [ "officers_for_company = widgets.Checkbox(\n", " value=True,\n", " description='Get officers for each company',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "historic_addresses = widgets.Checkbox(\n", " value=True,\n", " description='Get historic addresses for each company',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "psc_address = widgets.Checkbox(\n", " value=True,\n", " description='Get PSC addresses for each company',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "officer_appointments = widgets.Checkbox(\n", " value=True,\n", " description='Get appointments for each officer',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_appointments_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_appointments = widgets.HBox([widgets.Label('Maximum number of appointments for each officer: '), max_appointments_inttext])\n", "\n", "officer_correspondance = widgets.Checkbox(\n", " value=True,\n", " description='Get correspondance address for each officer',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "duplicate_officers = widgets.Checkbox(\n", " value=True,\n", " description='Get duplicates for each officer',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_duplicates_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_duplicates = widgets.HBox([widgets.Label('Maximum number of duplicate officers for each officer: '), max_duplicates_inttext])\n", "\n", "officers_for_address = widgets.Checkbox(\n", " value=True,\n", " description='Get officers at each address',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_officers_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_officers_at_address = widgets.HBox([widgets.Label('Maximum number of officers corresponding from address: '), max_officers_inttext])\n", "\n", "companies_for_address = widgets.Checkbox(\n", " value=True,\n", " description='Get companies registered at each address',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_companies_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_companies_at_address = widgets.HBox([widgets.Label('Maximum number of companies registered to address: '), max_companies_inttext])\n", "\n", "checkboxes = [officers_for_company, \n", " historic_addresses, \n", " psc_address, \n", " officer_appointments, \n", " max_appointments, \n", " officer_correspondance,\n", " duplicate_officers,\n", " max_duplicates, \n", " officers_for_address,\n", " max_officers_at_address,\n", " companies_for_address,\n", " max_companies_at_address,\n", " navigation_button\n", " ]\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7da39d70", "metadata": {}, "outputs": [], "source": [ "def config_network():\n", " network.hop.get_company_officers = officers_for_company.value\n", " network.hop.get_company_address_history = historic_addresses.value\n", " network.hop.get_psc_correspondance_address = psc_address.value\n", " network.hop.get_officer_appointments = officer_appointments.value\n", " network.hop.officer_appointments_maxsize = max_appointments_inttext.value\n", " network.hop.get_officer_correspondance_address = officer_correspondance.value\n", " network.hop.get_officer_duplicates = duplicate_officers.value\n", " network.hop.officer_duplicates_maxsize = max_duplicates_inttext.value\n", " network.hop.get_officers_at_address = officers_for_address.value\n", " network.hop.officers_at_address_maxsize = max_officers_inttext.value\n", " network.hop.get_companies_at_address = companies_for_address.value\n", " network.hop.companies_at_address_maxsize = max_companies_inttext.value\n", " depth_selector.disabled = False\n", " generate_network_button.disabled = False\n", " navigation_button.disabled = True\n", " navigation_button.button_style='success'" ] }, { "cell_type": "code", "execution_count": null, "id": "bec983fc", "metadata": {}, "outputs": [], "source": [ "accordion_config = widgets.Accordion(children=[widgets.VBox(checkboxes)])\n", "accordion_config.set_title(0, 'Config')" ] }, { "cell_type": "code", "execution_count": null, "id": "1c68e6b4", "metadata": {}, "outputs": [], "source": [ "accordion_config" ] }, { "cell_type": "markdown", "id": "addafb36", "metadata": {}, "source": [ "4. Select the depth of the network you would like to build:" ] }, { "cell_type": "code", "execution_count": null, "id": "ea0e8392", "metadata": {}, "outputs": [], "source": [ "build_status = widgets.HTML(\n", " value=\"\",\n", ")\n", "\n", "build_map_button=widgets.Button(description='Build Map', disabled=True)\n", "build_map_button.on_click(lambda bt: generate_map()) \n", "\n", "def generate_network():\n", " with output_box:\n", " depth_selector.disabled = True\n", " generate_network_button.disabled = True\n", " network.perform_hop(depth_selector.value)\n", " network.run_map_preprocessing()\n", " build_map_button.disabled = False\n", " generate_network_button.button_style = \"success\"\n", " \n", " \n", "output_box = widgets.Output()\n", "display(depth_selector, generate_network_button, build_status, output_box)" ] }, { "cell_type": "markdown", "id": "03ffce05", "metadata": {}, "source": [ "5. Visualise network on a map:" ] }, { "cell_type": "code", "execution_count": null, "id": "6449cd96", "metadata": {}, "outputs": [], "source": [ "map_container = widgets.HTML(\n", " value=\"\",\n", ")\n", "\n", "map_data,path_table = sugartrail.mapvis.build_map(network, clear_widget=False) \n", "hbox_map = widgets.HBox([path_table])\n", "vbox_map = widgets.VBox([map_data, hbox_map])\n", "\n", "accordion_map = widgets.Accordion(children=[vbox_map])\n", "accordion_map.set_title(0, 'Map')\n", "\n", "tabs = ['Companies', 'Addresses', 'Officers', 'Company Details']\n", "children = [widgets.Output() for tab in tabs]\n", "tab = widgets.Tab()\n", "tab.children = children\n", "for i,title in enumerate(tabs):\n", " tab.set_title(i, title)\n", "\n", "accordion_data = widgets.Accordion(children=[tab])\n", "accordion_data.set_title(0, 'Data')\n", "\n", "html_buttons = '''\n", "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "'''\n", "\n", "def generate_download_zip(company_text, network):\n", " Path(str(company_text.value)).mkdir(parents=True, exist_ok=True)\n", " df_company_ids = pd.DataFrame(network.company_ids)\n", " df_company_records = pd.DataFrame(network.company_records)\n", " df_addresses = pd.DataFrame(network.addresses)\n", " df_officer_ids = pd.DataFrame(network.officer_ids)\n", " files = {'companies': df_company_ids, 'addresses': df_addresses, 'officers': df_officer_ids, 'company_details': df_company_records}\n", " for key in files:\n", " files[key].to_csv(str(company_text.value) + '/' + key + '.csv')\n", " file = str(company_text.value) + '.json'\n", " network.save(file, './' + str(company_text.value) + \"/\")\n", " zip_filename = str(company_text.value) + '.zip'\n", " shutil.make_archive(str(company_text.value), 'zip', str(company_text.value))\n", " return zip_filename\n", "\n", "def generate_map():\n", " map_data,path_table = sugartrail.mapvis.build_map(network, clear_widget=False) \n", " hbox_map = widgets.HBox([path_table])\n", " vbox_map.children = [map_data, hbox_map]\n", " accordion_map.selected_index=0\n", " accordion_data.selected_index=0\n", " build_map_button.disabled = True\n", " build_map_button.button_style = \"Success\"\n", " with tab.children[0]:\n", " display(pd.DataFrame(network.company_ids))\n", " with tab.children[1]:\n", " display(pd.DataFrame(network.addresses))\n", " with tab.children[2]:\n", " display(pd.DataFrame(network.officer_ids))\n", " with tab.children[3]:\n", " display(pd.DataFrame(network.company_records))\n", " zip_filename = generate_download_zip(company_text, network)\n", " html_button = html_buttons.format(filename=zip_filename)\n", " with download_link:\n", " display(HTML(html_button))\n", "\n", "download_link = widgets.Output()\n", "display(build_map_button, map_container)" ] }, { "cell_type": "code", "execution_count": null, "id": "684a116e", "metadata": {}, "outputs": [], "source": [ "accordion_map" ] }, { "cell_type": "markdown", "id": "65552da1", "metadata": {}, "source": [ "Each marker represents a company in the network. Green markers represent active companies based at the address, red markers represent active companies no longer based at the address and black markers represent dissolved companies once based at the address.\n", "\n", "Select a marker to display additional information:\n", "\n", "- pop-up with the selected company's name and address\n", "- table containing the most efficient paths from the origin to the selected company\n", "- antpaths for each company in the network. Red antpaths represents the path through all the historic addresses for the selected company. Black antpaths represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table." ] }, { "cell_type": "code", "execution_count": null, "id": "1e328d41", "metadata": {}, "outputs": [], "source": [ "accordion_data" ] }, { "cell_type": "code", "execution_count": null, "id": "9224dce5", "metadata": {}, "outputs": [], "source": [ "download_link" ] }, { "cell_type": "code", "execution_count": null, "id": "80449fa3", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 5 }