{ "cells": [ { "cell_type": "markdown", "id": "b6926e35", "metadata": {}, "source": [ "# Sugartrail " ] }, { "cell_type": "code", "execution_count": 1, "id": "f17ebdd2", "metadata": {}, "outputs": [], "source": [ "from sugartrail import mapview, api, base\n", "import pandas as pd\n", "import ipywidgets as widgets\n", "from IPython.display import display, HTML\n", "import requests\n", "import shutil\n", "from pathlib import Path" ] }, { "cell_type": "code", "execution_count": 2, "id": "cbc5e202", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "network = base.Network()" ] }, { "cell_type": "markdown", "id": "1704e377", "metadata": {}, "source": [ "1. Insert your [Companies House API](https://developer.company-information.service.gov.uk/how-to-create-an-application) key:" ] }, { "cell_type": "code", "execution_count": 3, "id": "0632780b", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1161d0ba01644c1aa3c0a45f82658a7b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Text(value='', placeholder='Insert API Key')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ccd5abbda2f140888ee19075be763d79", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(description='Authenticate', style=ButtonStyle())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "88984a8fbde64f7491a6ce08316758f4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='')" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "API_input = widgets.Text(\n", " value='',\n", " placeholder='Insert API Key',\n", " disabled=False\n", ")\n", "\n", "company_text = widgets.Text(\n", " value='',\n", " placeholder='Insert Company ID',\n", " disabled=True\n", ")\n", "\n", "auth_status = widgets.HTML(\n", " value=\"\",\n", ")\n", "\n", "\n", "\n", "auth_button = widgets.Button(description='Authenticate')\n", "auth_button.on_click(lambda bt: auth())\n", "\n", "init_button = widgets.Button(description='Initialise', disabled=True)\n", "# init_button.style.button_color = '#696969'\n", "init_button.on_click(lambda bt: init_network()) \n", "\n", "def auth():\n", " auth_button.disabled=True\n", " API_input.disabled=True\n", " api.basic_auth.username = API_input.value\n", " if api.test():\n", " auth_status.value = u'\\u2705: Login successful'\n", " auth_button.button_style='success'\n", " company_text.disabled = False\n", " init_button.disabled = False\n", " else:\n", " auth_button.disabled=False\n", " API_input.disabled=False\n", " auth_status.value = u'\\u274c: Invalid API key'\n", "\n", "display(API_input, auth_button, auth_status)" ] }, { "cell_type": "markdown", "id": "2bd8c5be", "metadata": {}, "source": [ "2. Insert the unique company registration number (CRN) for a company you would like to investigate:" ] }, { "cell_type": "markdown", "id": "d5f9b6ad", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "execution_count": 4, "id": "128106c5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1c410b4c2eeb4345b7bcdfcf1622c502", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Text(value='', placeholder='Insert Company ID')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e44b1eae883d4d30abc7a21b5f65b207", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(description='Initialise', style=ButtonStyle())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "028a5edf0df54912a206ceea49c025a6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='')" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "init_status = widgets.HTML(\n", " value=\"\",\n", ") \n", "\n", "depth_selector = widgets.BoundedIntText(\n", " value=1,\n", " min=1,\n", " max=20,\n", " step=1,\n", " disabled=True\n", ")\n", "\n", "generate_network_button = widgets.Button(description='Build Network', disabled=True)\n", "generate_network_button.on_click(lambda bt: generate_network()) \n", "\n", "navigation_button = widgets.Button(description='Update', disabled=True)\n", "navigation_button.on_click(lambda bt: config_network()) \n", "\n", "def init_network():\n", " init_button.disabled=True\n", " response = api.get_company(str(company_text.value))\n", " company_text.disabled=True\n", " if response:\n", " network.company_id = str(company_text.value)\n", " init_status.value = u'\\u2705: Initialisation successful for ' + str(response['company_name']) \n", " init_button.button_style='success'\n", " navigation_button.disabled = False\n", " depth_selector.disabled = False\n", " generate_network_button.disabled = False\n", " \n", " else:\n", " init_button.disabled=False\n", " company_text.disabled=False\n", " init_status.value = u'\\u274c: Initialisation Failed. No records for company: ' + str(company_text.value)\n", "\n", "display(company_text, init_button, init_status)" ] }, { "cell_type": "markdown", "id": "96ffe953", "metadata": {}, "source": [ "3. Select the criteria you would like to build your network from (optional):" ] }, { "cell_type": "code", "execution_count": 5, "id": "6eb9065a", "metadata": {}, "outputs": [], "source": [ "officers_for_company = widgets.Checkbox(\n", " value=True,\n", " description='Get officers for each company',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "historic_addresses = widgets.Checkbox(\n", " value=True,\n", " description='Get historic addresses for each company',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "psc_address = widgets.Checkbox(\n", " value=True,\n", " description='Get PSC addresses for each company',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "officer_appointments = widgets.Checkbox(\n", " value=True,\n", " description='Get appointments for each officer',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_appointments_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_appointments = widgets.HBox([widgets.Label('Maximum number of appointments for each officer: '), max_appointments_inttext])\n", "\n", "officer_correspondance = widgets.Checkbox(\n", " value=True,\n", " description='Get correspondance address for each officer',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "duplicate_officers = widgets.Checkbox(\n", " value=True,\n", " description='Get duplicates for each officer',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_duplicates_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_duplicates = widgets.HBox([widgets.Label('Maximum number of duplicate officers for each officer: '), max_duplicates_inttext])\n", "\n", "officers_for_address = widgets.Checkbox(\n", " value=True,\n", " description='Get officers at each address',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_officers_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_officers_at_address = widgets.HBox([widgets.Label('Maximum number of officers corresponding from address: '), max_officers_inttext])\n", "\n", "companies_for_address = widgets.Checkbox(\n", " value=True,\n", " description='Get companies registered at each address',\n", " disabled=False,\n", " indent=False\n", ")\n", "\n", "max_companies_inttext = widgets.IntText(\n", " value=50,\n", " disabled=False\n", ")\n", "\n", "max_companies_at_address = widgets.HBox([widgets.Label('Maximum number of companies registered to address: '), max_companies_inttext])\n", "\n", "checkboxes = [officers_for_company, \n", " historic_addresses, \n", " psc_address, \n", " officer_appointments, \n", " max_appointments, \n", " officer_correspondance,\n", " duplicate_officers,\n", " max_duplicates, \n", " officers_for_address,\n", " max_officers_at_address,\n", " companies_for_address,\n", " max_companies_at_address,\n", " navigation_button\n", " ]\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "7da39d70", "metadata": {}, "outputs": [], "source": [ "def config_network():\n", " network.hop.get_company_officers = officers_for_company.value\n", " network.hop.get_company_address_history = historic_addresses.value\n", " network.hop.get_psc_correspondance_address = psc_address.value\n", " network.hop.get_officer_appointments = officer_appointments.value\n", " network.hop.officer_appointments_maxsize = max_appointments_inttext.value\n", " network.hop.get_officer_correspondance_address = officer_correspondance.value\n", " network.hop.get_officer_duplicates = duplicate_officers.value\n", " network.hop.officer_duplicates_maxsize = max_duplicates_inttext.value\n", " network.hop.get_officers_at_address = officers_for_address.value\n", " network.hop.officers_at_address_maxsize = max_officers_inttext.value\n", " network.hop.get_companies_at_address = companies_for_address.value\n", " network.hop.companies_at_address_maxsize = max_companies_inttext.value\n", " depth_selector.disabled = False\n", " generate_network_button.disabled = False\n", " navigation_button.disabled = True\n", " navigation_button.button_style='success'" ] }, { "cell_type": "code", "execution_count": 7, "id": "bec983fc", "metadata": {}, "outputs": [], "source": [ "accordion_config = widgets.Accordion(children=[widgets.VBox(checkboxes)])\n", "accordion_config.set_title(0, 'Config')" ] }, { "cell_type": "code", "execution_count": 8, "id": "1c68e6b4", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9c4b347bd3ac4fc894bab6e419d1515f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Accordion(children=(VBox(children=(Checkbox(value=True, description='Get officers for each company', indent=Fa…" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accordion_config" ] }, { "cell_type": "markdown", "id": "addafb36", "metadata": {}, "source": [ "4. Select the depth of the network you would like to build:" ] }, { "cell_type": "code", "execution_count": 9, "id": "ea0e8392", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e1b9d44b6b5c4885865e137ce860e1c4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "BoundedIntText(value=1, max=20, min=1)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bdf08191a94448b9b9e714b2e70d1f40", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(description='Build Network', style=ButtonStyle())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e31d7367788d4e0d9f1e396c44223425", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "93193405b6b044a98c36b21a761fd65f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "build_status = widgets.HTML(\n", " value=\"\",\n", ")\n", "\n", "build_map_button=widgets.Button(description='Build Map', disabled=True)\n", "build_map_button.on_click(lambda bt: generate_map()) \n", "\n", "def generate_network():\n", " with output_box:\n", " depth_selector.disabled = True\n", " generate_network_button.disabled = True\n", " network.perform_hop(depth_selector.value)\n", " network.run_map_preprocessing()\n", " build_map_button.disabled = False\n", " generate_network_button.button_style = \"success\"\n", " \n", " \n", "output_box = widgets.Output()\n", "display(depth_selector, generate_network_button, build_status, output_box)" ] }, { "cell_type": "markdown", "id": "03ffce05", "metadata": {}, "source": [ "5. Visualise network on a map:" ] }, { "cell_type": "code", "execution_count": 10, "id": "6449cd96", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2e3ae2038efe4a8486ec3bcc35d6a3b9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(description='Build Map', style=ButtonStyle())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3a2ab7ede61e48f6bcc95150e10cd500", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='')" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "map_container = widgets.HTML(\n", " value=\"\",\n", ")\n", "\n", "map_data,path_table = mapview.build_map(network, clear_widget=False) \n", "hbox_map = widgets.HBox([path_table])\n", "vbox_map = widgets.VBox([map_data, hbox_map])\n", "\n", "accordion_map = widgets.Accordion(children=[vbox_map])\n", "accordion_map.set_title(0, 'Map')\n", "\n", "tabs = ['Companies', 'Addresses', 'Officers', 'Company Details']\n", "children = [widgets.Output() for tab in tabs]\n", "tab = widgets.Tab()\n", "tab.children = children\n", "for i,title in enumerate(tabs):\n", " tab.set_title(i, title)\n", "\n", "accordion_data = widgets.Accordion(children=[tab])\n", "accordion_data.set_title(0, 'Data')\n", "\n", "html_buttons = '''\n", "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "'''\n", "\n", "def generate_download_zip(company_text, network):\n", " Path(str(company_text.value)).mkdir(parents=True, exist_ok=True)\n", " df_company_ids = pd.DataFrame(network.company_ids)\n", " df_companies = pd.DataFrame(network.companies)\n", " df_addresses = pd.DataFrame(network.addresses)\n", " df_officer_ids = pd.DataFrame(network.officer_ids)\n", " files = {'companies': df_company_ids, 'addresses': df_addresses, 'officers': df_officer_ids, 'company_details': df_companies}\n", " for key in files:\n", " files[key].to_csv(str(company_text.value) + '/' + key + '.csv')\n", " file = str(company_text.value) + '.json'\n", " network.save(file, './' + str(company_text.value) + \"/\")\n", " zip_filename = str(company_text.value) + '.zip'\n", " shutil.make_archive(str(company_text.value), 'zip', str(company_text.value))\n", " return zip_filename\n", "\n", "def generate_map():\n", " map_data,path_table = mapview.build_map(network, clear_widget=False) \n", " hbox_map = widgets.HBox([path_table])\n", " vbox_map.children = [map_data, hbox_map]\n", " accordion_map.selected_index=0\n", " accordion_data.selected_index=0\n", " build_map_button.disabled = True\n", " build_map_button.button_style = \"Success\"\n", " with tab.children[0]:\n", " display(pd.DataFrame(network.company_ids))\n", " with tab.children[1]:\n", " display(pd.DataFrame(network.addresses))\n", " with tab.children[2]:\n", " display(pd.DataFrame(network.officer_ids))\n", " with tab.children[3]:\n", " display(pd.DataFrame(network.companies))\n", " zip_filename = generate_download_zip(company_text, network)\n", " html_button = html_buttons.format(filename=zip_filename)\n", " with download_link:\n", " display(HTML(html_button))\n", "\n", "download_link = widgets.Output()\n", "display(build_map_button, map_container)" ] }, { "cell_type": "code", "execution_count": 11, "id": "684a116e", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9b914a00be10491f82169cf06e9fe013", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Accordion(children=(VBox(children=(Map(center=[50, 0], controls=(ZoomControl(options=['position', 'zoom_in_tex…" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accordion_map" ] }, { "cell_type": "code", "execution_count": 12, "id": "1e328d41", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1952860a8548480dad48dbd10b764791", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Accordion(children=(Tab(children=(Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': ' co…" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accordion_data" ] }, { "cell_type": "code", "execution_count": 13, "id": "9224dce5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e7dd9b6842f440b4bbb7d080bafefab0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output(outputs=({'output_type': 'display_data', 'data': {'text/plain': '