implemented voila dashboard and setup

This commit is contained in:
seangreaves
2022-12-31 13:51:42 +00:00
parent 3371acdbcc
commit 7f0e8be388
21 changed files with 3045 additions and 1263 deletions

50
.gitignore vendored Normal file
View File

@@ -0,0 +1,50 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
# Sphinx documentation
docs/_build/

File diff suppressed because it is too large Load Diff

BIN
assets/.DS_Store vendored

Binary file not shown.

Binary file not shown.

100
config/requirements.txt Normal file
View File

@@ -0,0 +1,100 @@
anyio==3.6.2
appnope==0.1.3
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
arrow==1.2.3
asttokens==2.2.1
attrs==22.2.0
Babel==2.11.0
backcall==0.2.0
beautifulsoup4==4.11.1
bleach==5.0.1
bqplot==0.12.36
branca==0.6.0
cffi==1.15.1
charset-normalizer==2.1.1
comm==0.1.2
debugpy==1.6.4
decorator==5.1.1
defusedxml==0.7.1
entrypoints==0.4
executing==1.2.0
fastjsonschema==2.16.2
fqdn==1.5.1
idna==3.4
importlib-metadata==5.2.0
ipykernel==6.19.4
ipyleaflet==0.17.2
ipython==8.7.0
ipython-genutils==0.2.0
ipywidgets==8.0.4
isoduration==20.11.0
jedi==0.18.2
Jinja2==3.1.2
json5==0.9.10
jsonpointer==2.3
jsonschema==4.17.3
jupyter-events==0.5.0
jupyter-server==1.23.4
jupyter_client==7.4.1
jupyter_core==5.1.1
jupyter_server_terminals==0.4.3
jupyterlab-pygments==0.2.2
jupyterlab-widgets==3.0.5
jupyterlab_server==2.17.0
MarkupSafe==2.1.1
matplotlib-inline==0.1.6
mistune==2.0.4
nbclassic==0.4.8
nbclient==0.7.2
nbconvert==7.2.7
nbformat==5.7.1
nest-asyncio==1.5.6
notebook==6.5.2
notebook_shim==0.2.2
numpy==1.24.0
packaging==22.0
pandas==1.5.2
pandocfilters==1.5.0
parso==0.8.3
pexpect==4.8.0
pickleshare==0.7.5
platformdirs==2.6.0
prometheus-client==0.15.0
prompt-toolkit==3.0.36
psutil==5.9.4
ptyprocess==0.7.0
pure-eval==0.2.2
pycparser==2.21
Pygments==2.13.0
pyrsistent==0.19.2
python-dateutil==2.8.2
python-json-logger==2.0.4
pytz==2022.7
PyYAML==6.0
pyzmq==24.0.1
regex==2022.10.31
requests==2.28.1
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
Send2Trash==1.8.0
six==1.16.0
sniffio==1.3.0
soupsieve==2.3.2.post1
stack-data==0.6.2
terminado==0.17.1
tinycss2==1.2.1
tornado==6.2
traitlets==5.8.0
traittypes==0.2.1
uri-template==1.2.0
urllib3==1.26.13
voila==0.4.0
wcwidth==0.2.5
webcolors==1.12
webencodings==0.5.1
websocket-client==1.4.2
websockets==10.4
widgetsnbextension==4.0.5
xyzservices==2022.9.0
zipp==3.11.0

View File

@@ -0,0 +1,302 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b6926e35",
"metadata": {},
"source": [
"# sugartrail "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f17ebdd2",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import mapview, api, base\n",
"import ipywidgets as widgets\n",
"from IPython.display import display\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbc5e202",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"network = base.Network()"
]
},
{
"cell_type": "markdown",
"id": "1704e377",
"metadata": {},
"source": [
"1. Insert your [Companies House API](https://developer.company-information.service.gov.uk/how-to-create-an-application) key:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0632780b",
"metadata": {},
"outputs": [],
"source": [
"API_input = widgets.Text(\n",
" value='',\n",
" placeholder='Insert API Key',\n",
" disabled=False\n",
")\n",
"\n",
"auth_status = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"auth_button = widgets.Button(description='Authenticate',button_style='success')\n",
"auth_button.on_click(lambda bt: auth())\n",
"\n",
"def auth():\n",
" auth_button.disabled=True\n",
" API_input.disabled=True\n",
" api.basic_auth.username = API_input.value\n",
" if api.test():\n",
" auth_status.value = u'\\u2705: Login Successful'\n",
" company_text.disabled = False\n",
" init_button.disabled = False\n",
" else:\n",
" auth_button.disabled=False\n",
" API_input.disabled=False\n",
" auth_status.value = u'\\u274c: Invalid API key'\n",
"\n",
"display(API_input, auth_button, auth_status)"
]
},
{
"cell_type": "markdown",
"id": "2bd8c5be",
"metadata": {},
"source": [
"2. Insert the unique company registration number (CRN) for a company you would like to investigate:"
]
},
{
"cell_type": "markdown",
"id": "d5f9b6ad",
"metadata": {},
"source": [
"<!-- Insert a valid [Companies House Public Data API key](https://developer.company-information.service.gov.uk/get-started/) as `username` string value below. If you don't want to use the API and would prefer loading a pre-built network, uncomment and run the cell below and then run the final cell to build and load the map. -->"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "128106c5",
"metadata": {},
"outputs": [],
"source": [
"company_text = widgets.Text(\n",
" value='',\n",
" placeholder='Insert Company ID',\n",
" disabled=True\n",
")\n",
"\n",
"init_status = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"init_button = widgets.Button(description='Initialise',button_style='success', disabled=True)\n",
"init_button.on_click(lambda bt: init_network()) \n",
"\n",
"def init_network():\n",
" init_button.disabled=True\n",
" company_text.disabled=True\n",
" api.basic_auth.username = API_input.value\n",
" response = api.get_company(str(company_text.value))\n",
" if response:\n",
" network.company_id = str(company_text.value)\n",
" init_status.value = u'\\u2705: Initialisation Successful for ' + str(response['company_name']) \n",
" depth_selector.disabled = False\n",
" generate_network_button.disabled = False\n",
" else:\n",
" auth_button.disabled=False\n",
" API_input.disabled=False\n",
" init_status.value = u'\\u274c: Initialisation Failed. No records for company: ' + str(company_text.value) + ' found.'\n",
"\n",
"display(company_text, init_button, init_status)"
]
},
{
"cell_type": "markdown",
"id": "addafb36",
"metadata": {},
"source": [
"3. Select the depth of the network you would like to build:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea0e8392",
"metadata": {},
"outputs": [],
"source": [
"depth_selector = widgets.BoundedIntText(\n",
" value=1,\n",
" min=1,\n",
" max=5,\n",
" step=1,\n",
" disabled=True\n",
")\n",
"\n",
"generate_network_button = widgets.Button(description='Build Network',button_style='success', disabled=True)\n",
"generate_network_button.on_click(lambda bt: generate_network()) \n",
"\n",
"\n",
"build_status = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"def generate_network():\n",
" with output_box:\n",
" depth_selector.disabled = True\n",
" generate_network_button.disabled = True\n",
" network.perform_hop(depth_selector.value + 1)\n",
" network.run_map_preprocessing()\n",
" build_map_button.disabled = False\n",
" \n",
" \n",
"output_box = widgets.Output()\n",
"display(depth_selector, generate_network_button, build_status, output_box)"
]
},
{
"cell_type": "markdown",
"id": "03ffce05",
"metadata": {},
"source": [
"4. Visualise network on a map:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6449cd96",
"metadata": {},
"outputs": [],
"source": [
"build_map_button=widgets.Button(description='Build Map',button_style='success', disabled=True)\n",
"build_map_button.on_click(lambda bt: generate_map()) \n",
"\n",
"map_container = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"def generate_map():\n",
" map_data,path_table = mapview.build_map(network, clear_widget=False) \n",
" hbox = widgets.HBox([path_table])\n",
" vbox.children = [map_data, hbox]\n",
" accordion.selected_index=0\n",
" accordi0n.open(0)\n",
" build_map_button.disabled = True\n",
"\n",
"display(build_map_button, map_container)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a5a07e3b",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"empty_network = base.Network()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "01dca0cf",
"metadata": {
"scrolled": true,
"tags": [
"7"
]
},
"outputs": [],
"source": [
"map_data,path_table = mapview.build_map(network, clear_widget=False) \n",
"hbox = widgets.HBox([path_table])\n",
"vbox = widgets.VBox([map_data, hbox])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "684a116e",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "08a9390c88cb49cfb5705f3a0b378ced",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Accordion(children=(VBox(children=(Map(bottom=87768.0, center=[51.27331450324598, -3.223454500000008], control…"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"accordion = widgets.Accordion(children=[vbox])\n",
"accordion.set_title(0, 'Map')\n",
"# accordion.set_title(1, 'Text')\n",
"accordion"
]
},
{
"cell_type": "markdown",
"id": "457bf4d0",
"metadata": {},
"source": [
"<!-- Each marker represents a company in the network. Green markers represent active companies based at the address, red markers represent active companies no longer based at the address and black markers represent dissolved companies once based at the address. \n",
"\n",
"Select a marker to display additional information: \n",
"- pop-up with the selected company's name and address\n",
"- table containing the most efficient paths from the origin to the selected company\n",
"- antpaths for each company in the network. Red antpath represents the path through all the historic addresses for the selected company. Black antpath represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table. -->"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,302 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b6926e35",
"metadata": {},
"source": [
"# sugartrail "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f17ebdd2",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import mapview, api, base\n",
"import ipywidgets as widgets\n",
"from IPython.display import display\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbc5e202",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"network = base.Network()"
]
},
{
"cell_type": "markdown",
"id": "1704e377",
"metadata": {},
"source": [
"1. Insert your [Companies House API](https://developer.company-information.service.gov.uk/how-to-create-an-application) key:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0632780b",
"metadata": {},
"outputs": [],
"source": [
"API_input = widgets.Text(\n",
" value='',\n",
" placeholder='Insert API Key',\n",
" disabled=False\n",
")\n",
"\n",
"auth_status = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"auth_button = widgets.Button(description='Authenticate',button_style='success')\n",
"auth_button.on_click(lambda bt: auth())\n",
"\n",
"def auth():\n",
" auth_button.disabled=True\n",
" API_input.disabled=True\n",
" api.basic_auth.username = API_input.value\n",
" if api.test():\n",
" auth_status.value = u'\\u2705: Login Successful'\n",
" company_text.disabled = False\n",
" init_button.disabled = False\n",
" else:\n",
" auth_button.disabled=False\n",
" API_input.disabled=False\n",
" auth_status.value = u'\\u274c: Invalid API key'\n",
"\n",
"display(API_input, auth_button, auth_status)"
]
},
{
"cell_type": "markdown",
"id": "2bd8c5be",
"metadata": {},
"source": [
"2. Insert the unique company registration number (CRN) for a company you would like to investigate:"
]
},
{
"cell_type": "markdown",
"id": "d5f9b6ad",
"metadata": {},
"source": [
"<!-- Insert a valid [Companies House Public Data API key](https://developer.company-information.service.gov.uk/get-started/) as `username` string value below. If you don't want to use the API and would prefer loading a pre-built network, uncomment and run the cell below and then run the final cell to build and load the map. -->"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "128106c5",
"metadata": {},
"outputs": [],
"source": [
"company_text = widgets.Text(\n",
" value='',\n",
" placeholder='Insert Company ID',\n",
" disabled=True\n",
")\n",
"\n",
"init_status = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"init_button = widgets.Button(description='Initialise',button_style='success', disabled=True)\n",
"init_button.on_click(lambda bt: init_network()) \n",
"\n",
"def init_network():\n",
" init_button.disabled=True\n",
" company_text.disabled=True\n",
" api.basic_auth.username = API_input.value\n",
" response = api.get_company(str(company_text.value))\n",
" if response:\n",
" network.company_id = str(company_text.value)\n",
" init_status.value = u'\\u2705: Initialisation Successful for ' + str(response['company_name']) \n",
" depth_selector.disabled = False\n",
" generate_network_button.disabled = False\n",
" else:\n",
" auth_button.disabled=False\n",
" API_input.disabled=False\n",
" init_status.value = u'\\u274c: Initialisation Failed. No records for company: ' + str(company_text.value) + ' found.'\n",
"\n",
"display(company_text, init_button, init_status)"
]
},
{
"cell_type": "markdown",
"id": "addafb36",
"metadata": {},
"source": [
"3. Select the depth of the network you would like to build:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea0e8392",
"metadata": {},
"outputs": [],
"source": [
"depth_selector = widgets.BoundedIntText(\n",
" value=1,\n",
" min=1,\n",
" max=5,\n",
" step=1,\n",
" disabled=True\n",
")\n",
"\n",
"generate_network_button = widgets.Button(description='Build Network',button_style='success', disabled=True)\n",
"generate_network_button.on_click(lambda bt: generate_network()) \n",
"\n",
"\n",
"build_status = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"def generate_network():\n",
" with output_box:\n",
" depth_selector.disabled = True\n",
" generate_network_button.disabled = True\n",
" network.perform_hop(depth_selector.value + 1)\n",
" network.run_map_preprocessing()\n",
" build_map_button.disabled = False\n",
" \n",
" \n",
"output_box = widgets.Output()\n",
"display(depth_selector, generate_network_button, build_status, output_box)"
]
},
{
"cell_type": "markdown",
"id": "03ffce05",
"metadata": {},
"source": [
"4. Visualise network on a map:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6449cd96",
"metadata": {},
"outputs": [],
"source": [
"build_map_button=widgets.Button(description='Build Map',button_style='success', disabled=True)\n",
"build_map_button.on_click(lambda bt: generate_map()) \n",
"\n",
"map_container = widgets.HTML(\n",
" value=\"\",\n",
")\n",
"\n",
"def generate_map():\n",
" map_data,path_table = mapview.build_map(network, clear_widget=False) \n",
" hbox = widgets.HBox([path_table])\n",
" vbox.children = [map_data, hbox]\n",
" accordion.selected_index=0\n",
" accordi0n.open(0)\n",
" build_map_button.disabled = True\n",
"\n",
"display(build_map_button, map_container)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a5a07e3b",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"empty_network = base.Network()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "01dca0cf",
"metadata": {
"scrolled": true,
"tags": [
"7"
]
},
"outputs": [],
"source": [
"map_data,path_table = mapview.build_map(network, clear_widget=False) \n",
"hbox = widgets.HBox([path_table])\n",
"vbox = widgets.VBox([map_data, hbox])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "684a116e",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "08a9390c88cb49cfb5705f3a0b378ced",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Accordion(children=(VBox(children=(Map(bottom=87768.0, center=[51.27331450324598, -3.223454500000008], control…"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"accordion = widgets.Accordion(children=[vbox])\n",
"accordion.set_title(0, 'Map')\n",
"# accordion.set_title(1, 'Text')\n",
"accordion"
]
},
{
"cell_type": "markdown",
"id": "457bf4d0",
"metadata": {},
"source": [
"<!-- Each marker represents a company in the network. Green markers represent active companies based at the address, red markers represent active companies no longer based at the address and black markers represent dissolved companies once based at the address. \n",
"\n",
"Select a marker to display additional information: \n",
"- pop-up with the selected company's name and address\n",
"- table containing the most efficient paths from the origin to the selected company\n",
"- antpaths for each company in the network. Red antpath represents the path through all the historic addresses for the selected company. Black antpath represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table. -->"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -30,12 +30,13 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "81c37bf3",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import api, mapview, base\n",
"from ipywidgets import VBox, HBox\n",
"\n",
"api.basic_auth.username = \"\""
]
@@ -94,7 +95,7 @@
"id": "f73b17d8",
"metadata": {},
"source": [
"![title](assets/images/spy.png)"
"![title](../assets/images/spy.png)"
]
},
{
@@ -110,7 +111,7 @@
"id": "e21f3c98",
"metadata": {},
"source": [
"![title](assets/images/scrooge.png)"
"![title](../assets/images/scrooge.png)"
]
},
{
@@ -440,7 +441,6 @@
},
"outputs": [],
"source": [
"from ipywidgets import VBox, HBox\n",
"map_data, path_table = mapview.build_map(network) \n",
"hbox = HBox([path_table])\n",
"vbox = VBox([map_data, hbox])\n",
@@ -467,7 +467,7 @@
"id": "f6674e52",
"metadata": {},
"source": [
"<img src=\"assets/images/kingdom_table.png\" alt=\"Drawing\" style=\"width: 700px;\"/>\n"
"<img src=\"../assets/images/kingdom_table.png\" alt=\"Drawing\" style=\"width: 700px;\"/>\n"
]
},
{
@@ -505,7 +505,7 @@
"source": [
"import pickle\n",
"\n",
"with open('assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n",
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n",
" pickle.dump(network, handle)"
]
},
@@ -516,7 +516,7 @@
"metadata": {},
"outputs": [],
"source": [
"with open('assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n",
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n",
" network = pickle.load(handle)"
]
}
@@ -537,7 +537,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.9.15"
}
},
"nbformat": 4,

View File

@@ -122,7 +122,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/western_crown.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/western_crown.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 470-482 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -174,7 +174,7 @@
"outputs": [],
"source": [
"# import pickle\n",
"# with open('assets/networks/western_crown_network.pickle', 'rb') as handle:\n",
"# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n",
"# western_crown_network = pickle.load(handle)"
]
},
@@ -231,7 +231,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(western_crown_network.find_path('05548476'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
"pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
]
},
{
@@ -240,7 +240,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/537.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/537.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 537 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -265,7 +265,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/524.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/524.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 524 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -291,7 +291,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/470.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/470.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\">470-482 Oxford Street</figcaption>\n",
"</figure>"
]
@@ -311,7 +311,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/447.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/447.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\">447 Oxford Street</figcaption>\n",
"</figure>"
]
@@ -331,7 +331,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/407.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/407.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 407-409 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -351,7 +351,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/269.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/269.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 267-269 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -371,7 +371,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/263.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/263.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 263-265 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -400,7 +400,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/240.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/240.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 240-242 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -426,7 +426,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/158.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/158.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 158 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -447,7 +447,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/146.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/146.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 146-148 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -478,7 +478,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/142.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/142.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 142 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -499,7 +499,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/41.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/41.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 41 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -519,7 +519,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/37.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/37.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 37-39 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -556,7 +556,7 @@
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"assets/images/4.png\" style=\"width:100%\">\n",
"<img src=\"../assets/images/4.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 4 Oxford Street </figcaption>\n",
"</figure>"
]
@@ -607,7 +607,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.9.15"
}
},
"nbformat": 4,

View File

@@ -0,0 +1,440 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b2110da7",
"metadata": {},
"source": [
"*In this tutorial we will investigate addresses with a large number of companies registered via the API and Companies House Data Product download.*"
]
},
{
"cell_type": "markdown",
"id": "25528662",
"metadata": {},
"source": [
"### Busy Addresses and API Limits"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab9e8ee0",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import base, api, mapview\n",
"import pandas as pd\n",
"api.basic_auth.username = \"\""
]
},
{
"cell_type": "markdown",
"id": "00c6a5be",
"metadata": {},
"source": [
"When navigating Companies House there are times that we will run into some very popular addresses. For example lets say build a network from [this officer](https://find-and-update.company-information.service.gov.uk/officers/Nd2URspq4bvLy-hwzDZ0_p7FGJw/appointments):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "723f234a",
"metadata": {},
"outputs": [],
"source": [
"officer_id = \"Nd2URspq4bvLy-hwzDZ0_p7FGJw\"\n",
"network = base.Network(officer_id=officer_id)\n",
"network.perform_hop(2)"
]
},
{
"cell_type": "markdown",
"id": "edad561e",
"metadata": {},
"source": [
"Within 2 hops we've got over 60 addresses (although many of them look like duplicate entries):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eea32631",
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ce897c0",
"metadata": {},
"outputs": [],
"source": [
"network.addresses['address'].unique()"
]
},
{
"cell_type": "markdown",
"id": "8c17fff5",
"metadata": {},
"source": [
"If we check out the `maxsize_entities` property of our Network class, we will see a dataframe containing all of the addresses and officers that have exceeded the maxsize limits imposed in the Hop class. In this case, we can see one of the addresses in the network has over 4800 companies based there."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8b8d3c20",
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities"
]
},
{
"cell_type": "markdown",
"id": "5ad7b443",
"metadata": {},
"source": [
"Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f94f731",
"metadata": {},
"outputs": [],
"source": [
"network.hop.companies_at_address_maxsize"
]
},
{
"cell_type": "markdown",
"id": "2d4edaf0",
"metadata": {},
"source": [
"If we check `companies_id` we'll notice it hasn't had 4800 companies added to it:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3ef12fe",
"metadata": {},
"outputs": [],
"source": [
"len(network.company_ids['company_id'].unique())"
]
},
{
"cell_type": "markdown",
"id": "d177f1b5",
"metadata": {},
"source": [
"Including limits is useful to avoid our databases getting clogged up with random companies. \n",
"Although lets pause to briefly explore what address would have thousands of companies registered there?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bb8bdf1",
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities['node'][0]"
]
},
{
"cell_type": "markdown",
"id": "e8644d6b",
"metadata": {},
"source": [
"![title](../assets/images/regent_storefront.jpeg)"
]
},
{
"cell_type": "markdown",
"id": "40354a28",
"metadata": {},
"source": [
"\"3rd Floor, 207, Regent Street\" is a \"virtual office\" run by a company called [Hold Everything](https://www.hold-everything.com/). Businesses can use this address for correspondance/registration for £24 a month:"
]
},
{
"cell_type": "markdown",
"id": "11b08c79",
"metadata": {},
"source": [
"![title](../assets/images/exclusive.png)"
]
},
{
"cell_type": "markdown",
"id": "2c9e85ed",
"metadata": {},
"source": [
"However the large number of companies registered at a single address can lead to many instances of mistaken identity. Just because a company is registered at a virtual office does not mean it has any connection with other companies registered there.:"
]
},
{
"cell_type": "markdown",
"id": "be5e4352",
"metadata": {},
"source": [
"![title](../assets/images/review.png)"
]
},
{
"cell_type": "markdown",
"id": "282ba8ea",
"metadata": {},
"source": [
"Numerous media outlets have reported on fraudulent companies that use virtual offices and incorporation services: \n",
"- Kemp House, 162 City Road | Capital Officer: [Mystery group took millions in furlough funds - Financial Times](https://www.ft.com/content/b3c70369-5170-47ca-b779-fc0898fd29e6)\n",
"- 20-22 Wenlock Road | Made Simple: [Court shuts down companies behind £9m truffle scam - Gov.uk](https://www.gov.uk/government/news/court-shuts-down-companies-behind-9m-truffle-scam)\n",
"- 2 Woodberry Down | A1 Company Services [How A Suburban North London House Is Connected To The Paul Manafort Indictment - Huffington Post](https://www.huffingtonpost.co.uk/entry/manfort-london-connection_uk_59f72f50e4b07fdc5fbf92c7)\n",
"- 29 Harley Street | Formations House [Offshore in central London: the curious case of 29 Harley Street - The Guardian](https://www.theguardian.com/business/2016/apr/19/offshore-central-london-curious-case-29-harley-street)\n",
"- 63-66 Hatton Garden | Valemont Properties Ltd [The Global Laundromat: how did it work and who benefited? - The Guardian](https://www.theguardian.com/world/2017/mar/20/the-global-laundromat-how-did-it-work-and-who-benefited)"
]
},
{
"cell_type": "markdown",
"id": "a85fdcfa",
"metadata": {},
"source": [
"If we wanted to get all companies listed at 207 Regent Street we can adjust our maxsize limits to `None` and attempt to perform a hop again:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb0c02d0",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network = base.Network(address='3rd Floor, 207 Regent Street London W1B 3HH England')\n",
"regent_street_network.hop.companies_at_address_maxsize = None\n",
"regent_street_network.hop.officers_at_address_maxsize = None\n",
"regent_street_network.perform_hop(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3dc0f165",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.company_ids"
]
},
{
"cell_type": "markdown",
"id": "cff1061e",
"metadata": {},
"source": [
"Such large networks can still be interesting to analyse. For instance if we perform another hop this will get all the officers for every company at the address. This will take several hours to build as we have lots of companies to analyse, however if we want to save time we could just uncomment and load a pre-made network below: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef262359",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.perform_hop(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38937142",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n",
" regent_street_network = pickle.load(handle)"
]
},
{
"cell_type": "markdown",
"id": "d6e330ee",
"metadata": {},
"source": [
"Analysing the most frequently occuring officers running businesses from 207 Regent Street returns some very busy officers and incorporation agents:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e97fa3b",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.officer_ids['name'].value_counts()"
]
},
{
"cell_type": "markdown",
"id": "d6a22e40",
"metadata": {},
"source": [
"A quick news lookup on two of the officers in the top 5, J. Beardsley of Helve TCS Limited and S. Poppleton reveal these names to be connected to several known instances of fraud:\n",
"- [Fraudster duo jailed for their part in defrauding millions of pounds from over 100 victims - Crown Prosecution Service](https://www.cps.gov.uk/cps/news/fraudster-duo-jailed-their-part-defrauding-millions-pounds-over-100-victims)\n",
"- [Print farming companies struck off - Printweek](https://www.printweek.com/news/article/print-farming-companies-struck-off)\n",
"- [Rogue book publishers slammed shut by the courts - Gov.uk](https://www.gov.uk/government/news/rogue-book-publishers-slammed-shut-by-the-courts)"
]
},
{
"cell_type": "markdown",
"id": "f0699a27",
"metadata": {},
"source": [
"### Busier Addresses and Downloaded Data"
]
},
{
"cell_type": "markdown",
"id": "944525cb",
"metadata": {},
"source": [
"There are situations where some addresses have thousands or even tens of thousands of companies registered. Companies House provides two methods for getting company data, API and data product. We used the API to get the information above which returns all active and dissolved companies registered to the address. We get the same result when we attempt to perform an advanced company search using this address through the website:"
]
},
{
"cell_type": "markdown",
"id": "c307994f",
"metadata": {},
"source": [
"![title](../assets/images/regent.png)"
]
},
{
"cell_type": "markdown",
"id": "517e6aaa",
"metadata": {},
"source": [
"Unfortunately the API is limited to returing 5000 result max. This is fine in our case with 207 Regent Street because we're just under the limit. However there are much bigger fish out there for instance, '75 Shelton Street':"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f40ee11",
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
"shelton_street_network.perform_hop(1)\n",
"shelton_street_network.maxsize_entities"
]
},
{
"cell_type": "markdown",
"id": "6f1abb52",
"metadata": {},
"source": [
"We can already see its over 5000 limit for the API. If we check online we can see the number is huge: "
]
},
{
"cell_type": "markdown",
"id": "03b64f03",
"metadata": {},
"source": [
"![title](../assets/images/shelton.png)"
]
},
{
"cell_type": "markdown",
"id": "f9fda7a6",
"metadata": {},
"source": [
"This is where the data product comes in. We can download it in one go and use it to get all of the \"active\" companies. To use the data product:\n",
"1. Download it from [here](http://download.companieshouse.gov.uk/en_output.html) (might take some time as its a pretty large file ~430Mb)\n",
"2. Move it to local directory `assets/company_data/` and unzip the file \n",
"3. Load into a dataframe which we can pass to our network class\n",
"\n",
"Might take a minute to load. How adjust the file string below and attempt to load it into `company_data`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d9d0080",
"metadata": {},
"outputs": [],
"source": [
"company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
]
},
{
"cell_type": "markdown",
"id": "2273cf39",
"metadata": {},
"source": [
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e273ce0",
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
"shelton_street_network.hop.companies_at_address_maxsize = None\n",
"shelton_street_network.hop.officers_at_address_maxsize = None\n",
"shelton_street_network.get_officers_at_address = False\n",
"shelton_street_network.perform_hop(1, company_data= company_data)"
]
},
{
"cell_type": "markdown",
"id": "820a908d",
"metadata": {},
"source": [
"If we check `company_ids` we have over 70000 companies that we could build a network from if we had lots of time on our hands:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12acb915",
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network.company_ids"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -37,7 +37,7 @@
"# # network build from Domain Foundation, company_id = \"11951034\"\n",
"# import pickle\n",
"\n",
"# with open('assets/networks/domain_corp_network.pickle', 'rb') as handle:\n",
"# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n",
"# network = pickle.load(handle)"
]
},
@@ -121,7 +121,7 @@
"execution_count": null,
"id": "01dca0cf",
"metadata": {
"scrolled": true,
"scrolled": false,
"tags": [
"7"
]

View File

@@ -0,0 +1,545 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "0639ca05",
"metadata": {},
"source": [
"*In this tutorial we will walk through the capabilities of the tool in depth.*"
]
},
{
"cell_type": "markdown",
"id": "538c9eb1",
"metadata": {},
"source": [
"### Introduction \n",
"\n",
"'Sugartrail' was developed to make it easier and faster for researchers to explore connections between companies, persons and addresses within [Companies House](https://www.gov.uk/government/organisations/companies-house). Researchers can build networks of connected companies, persons and addresses based on a defined set of connectivity criteria and then visualise these connections through an [OpenStreetMaps interface](https://ipyleaflet.readthedocs.io/en/latest/index.html)."
]
},
{
"cell_type": "markdown",
"id": "eee8d524",
"metadata": {},
"source": [
"### Prerequisites\n",
"\n",
"Sugartrail uses the [Companies House Public Data API](https://developer-specs.company-information.service.gov.uk/companies-house-public-data-api/reference) to gather data on connected companies, persons and addresses. To access this API you will need a key which you can aquire by registering a [user account](https://developer.company-information.service.gov.uk/get-started/). Once you've aquired the key, insert it below as the string value of `api.basic_auth.username`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "81c37bf3",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import api, mapview, base\n",
"from ipywidgets import VBox, HBox\n",
"\n",
"api.basic_auth.username = \"\""
]
},
{
"cell_type": "markdown",
"id": "ad4599dc",
"metadata": {},
"source": [
"Lets make a test request to validate everything works by attempting to get all the officers who work at [this company](https://find-and-update.company-information.service.gov.uk/company/12411673). "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51a1dd4f",
"metadata": {},
"outputs": [],
"source": [
"company_id = \"12411673\"\n",
"api.get_company_officers(company_id)"
]
},
{
"cell_type": "markdown",
"id": "29d8dd26",
"metadata": {},
"source": [
"### Initialising Networks \n",
"\n",
"To create a network we start from a single company, person or address. Networks are build and stored with the `Network` class. Lets go ahead and create a new network:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63bc00fa",
"metadata": {},
"outputs": [],
"source": [
"network = base.Network()"
]
},
{
"cell_type": "markdown",
"id": "aeedf139",
"metadata": {},
"source": [
"`Network` accepts either a company ID, officer ID or address string as the initial node. For example, [this company](https://find-and-update.company-information.service.gov.uk/company/12411673): `company_id` = \"12411673\"\n",
"\n",
"If we wanted to search by address, then `address` = \"513 Tong Street, Flat 5, Bradford, England, BD4 6NA\""
]
},
{
"cell_type": "markdown",
"id": "f73b17d8",
"metadata": {},
"source": [
"![title](../assets/images/spy.png)"
]
},
{
"cell_type": "markdown",
"id": "b3caccb6",
"metadata": {},
"source": [
"For [this officer](https://find-and-update.company-information.service.gov.uk/officers/6WODVBRaegvY3UvEhcQxg0OsPkc/appointments), `officer_id` = \"6WODVBRaegvY3UvEhcQxg0OsPkc\""
]
},
{
"cell_type": "markdown",
"id": "e21f3c98",
"metadata": {},
"source": [
"![title](../assets/images/scrooge.png)"
]
},
{
"cell_type": "markdown",
"id": "a6198a80",
"metadata": {},
"source": [
"Lets build the network from `company_id`: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "31eea99d",
"metadata": {},
"outputs": [],
"source": [
"network.company_id=\"11004735\""
]
},
{
"cell_type": "markdown",
"id": "7bd5060d",
"metadata": {},
"source": [
"We could also just initialise the network by passing `company_id` as an input: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c70f41f",
"metadata": {},
"outputs": [],
"source": [
"network = base.Network(company_id=\"11004735\")"
]
},
{
"cell_type": "markdown",
"id": "cd0f2a9e",
"metadata": {},
"source": [
"Data about companies, persons and addresses are stored in several attributes within the `Network` class. If we check the `company_ids` property, we will find the entry we just created:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e12f5461",
"metadata": {},
"outputs": [],
"source": [
"network.company_ids"
]
},
{
"cell_type": "markdown",
"id": "91c14cbb",
"metadata": {},
"source": [
"Each company is represented by its unique ID (`company_id`), number of hops from the origin company (`n`) and the company, address or person it connects to. As we've only saved the origin company so far, there isn't any information on links or connected nodes. There are also attributes for storing officer ids (`officer_ids`) and (`addresses`) although they have no information in them yet:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "33ed61e2",
"metadata": {},
"outputs": [],
"source": [
"network.officer_ids"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5a52e6a",
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
]
},
{
"cell_type": "markdown",
"id": "72f30427",
"metadata": {},
"source": [
"### Building Networks"
]
},
{
"cell_type": "markdown",
"id": "862f00ef",
"metadata": {},
"source": [
"We can now build the network by performing hops that will find new company IDs, officer IDs and addresses connected to the entities already stored within the network. \n",
"\n",
"There are a finite number of ways that officers, companies and addresses can be connected within Companies House:\n",
"\n",
"#### Companies \n",
"\n",
"1. Companies → Officers: companies have officers \n",
"2. Companies → Addresses: companies have a history of registered addresses \n",
"3. Companies → Addresses: companies have correspondence addresses for their persons of significant control (psc)\n",
"\n",
"#### Officers \n",
"\n",
"4. Officers → Companies: officers have appointments (companies they have a role in) \n",
"5. Officers → Addresses: officers have correspondence addresses\n",
"6. Officers → Officers: officers may have duplicate enteries within Companies House; other officers using the same name and birth date (but different values for `officer_id`\n",
"\n",
"#### Addresses \n",
"\n",
"7. Addresses → Officers: addresses are used as officer correspondence addresses \n",
"8. Addresses → Companies: addresses are used as company correspondence addresses \n",
"\n",
"To build the network we can use any combination of this connectivity criteria. The above connections are implemented as methods that get called everytime we perform a hop: \n",
"\n",
"1. get_company_officers\n",
"2. get_company_address_history\n",
"3. get_psc_correspondance_address\n",
"3. get_officer_appointments\n",
"4. get_officer_correspondance_address \n",
"5. get_officer_duplicates \n",
"6. get_officers_at_address\n",
"7. get_companies_at_address\n",
"\n",
"We can toggle each of these methods via boolean properties of the `Hop` subclass:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32643a9c",
"metadata": {},
"outputs": [],
"source": [
"network.hop.__dict__"
]
},
{
"cell_type": "markdown",
"id": "1802bb34",
"metadata": {},
"source": [
"We can see the `Hop` subclass contains all of the connections mentioned above set to `True` by default, therefore everytime we perform a hop, the network will use these methods to get data.\n",
"\n",
"We also notice that there are some properties setting a \"maxsize\" limit. These properties ensure that if the number of results returned by the method exceeds this limit then the results will not be stored within the `Network` class properties. This limit is quite important when building networks as some of these methods can return 1000s of results and if we're not interested in these results they can make it difficult to visualise meaningful connections within the network (see Tutorial 3 for more on this). \n",
"\n",
"Lets go ahead and perform one hop using these default settings and see what addresses, companies and officers are added:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "167cc25c",
"metadata": {},
"outputs": [],
"source": [
"network.perform_hop(1)"
]
},
{
"cell_type": "markdown",
"id": "2486aa17",
"metadata": {},
"source": [
"Lets now check out `company_ids`, `officer_ids` and `addresses` to see what new enteries have been added. Nothing new in `company_ids` but this is expected as none of the API methods above connect companies with companies in one hop:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6ce4047",
"metadata": {},
"outputs": [],
"source": [
"network.company_ids"
]
},
{
"cell_type": "markdown",
"id": "eb5cb2f6",
"metadata": {},
"source": [
"We can see we now have an officer below in `officer_ids` and some of the other properties in the table now have values other than None. `node_type` describes what the type of node the company is connected to (Company, Person or Address), `node_id` provides the unique id for the node (`company_id`, `officer_id` or `address`) and `link_type` describes the relationship between the company and the node."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "947c4cf1",
"metadata": {},
"outputs": [],
"source": [
"network.officer_ids"
]
},
{
"cell_type": "markdown",
"id": "a8cf6fa0",
"metadata": {},
"source": [
"We can interpret the table above as:\n",
"\n",
"There is an officer with ID=`Nd2URspq4bvLy-hwzDZ0_p7FGJw` who is an officer to a company with ID=`11004735`. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7083402a",
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
]
},
{
"cell_type": "markdown",
"id": "264de2dd",
"metadata": {},
"source": [
"We can see from the table above that:\n",
"\n",
"`3rd Floor 13 Charles Ii Street London SW1Y 4QU England` is an address that used to be home to a company (with ID=`11004735`):"
]
},
{
"cell_type": "markdown",
"id": "b4828d92",
"metadata": {},
"source": [
"For reproducibility, each time we perform a hop, the methods and limit configs are stored in "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9bb5f542",
"metadata": {},
"outputs": [],
"source": [
"network.hop_history"
]
},
{
"cell_type": "markdown",
"id": "ac1dab27",
"metadata": {},
"source": [
"Lets perform another two hops: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b0baba",
"metadata": {},
"outputs": [],
"source": [
"network.perform_hop(2)"
]
},
{
"cell_type": "markdown",
"id": "cec66fcc",
"metadata": {},
"source": [
"Now we can go ahead and visualise this in a map. To do this we need to get a bit more info that isn't present, namely the coordinates for all the addresses mentioned and the company names for each company. We can get this information via `run_map_preprocessing()`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3be52255",
"metadata": {},
"outputs": [],
"source": [
"network.run_map_preprocessing()"
]
},
{
"cell_type": "markdown",
"id": "dfa1b90c",
"metadata": {},
"source": [
"To see the information added, we can check out `address_history` and `companies` properties of our class:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b800202c",
"metadata": {},
"outputs": [],
"source": [
"network.address_history"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "37013a7e",
"metadata": {},
"outputs": [],
"source": [
"network.companies "
]
},
{
"cell_type": "markdown",
"id": "3e3b597d",
"metadata": {},
"source": [
"We can now visualise all the companies in the network with a UK address through OpenStreetMaps:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7256c5f9",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"map_data, path_table = mapview.build_map(network) \n",
"hbox = HBox([path_table])\n",
"vbox = VBox([map_data, hbox])\n",
"vbox"
]
},
{
"cell_type": "markdown",
"id": "7e225045",
"metadata": {},
"source": [
"Each marker represents a company in the network. Green markers represent active companies based at the address, red markers represent active companies no longer based at the address and black markers represent dissolved companies once based at the address. \n",
"\n",
"Select a marker to display additional information: \n",
"- pop-up with the selected company's name and address\n",
"- table containing the most efficient paths from the origin to the selected company\n",
"- antpaths for each company in the network. Red antpath represents the path through all the historic addresses for the selected company. Black antpath represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table. \n",
"\n",
"To read paths from the table we start from the bottom of the table where we find one or several rows containing our selected company (`Node`) but with differing values for `Node Index`, `Node Type` and `Link`. If we encounter multiple rows containing our selected node, this tells us there are multiple paths of equal length from the selected node (origin) to the origin. For example, consider the following table: "
]
},
{
"cell_type": "markdown",
"id": "f6674e52",
"metadata": {},
"source": [
"<img src=\"../assets/images/kingdom_table.png\" alt=\"Drawing\" style=\"width: 700px;\"/>\n"
]
},
{
"cell_type": "markdown",
"id": "fd5d9a0d",
"metadata": {},
"source": [
"Pick N Mix London Limited (E) is a 'company at address' for 3rd Floor 13 Charles Ii Street (C) which is a 'historic address' for Kingdom of Sweets Ltd (A).\n",
"\n",
"Additionally, Pick N Mix London Limited (D) is an appointment of (B) who is an officer of Kingdom of Sweets Ltd (A). "
]
},
{
"cell_type": "markdown",
"id": "4a6662be",
"metadata": {},
"source": [
"### Network Persistance"
]
},
{
"cell_type": "markdown",
"id": "a68e26ca",
"metadata": {},
"source": [
"The network object can be saved with 'pickle' and reloaded when needed:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee8d8c24",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n",
" pickle.dump(network, handle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e7c5578",
"metadata": {},
"outputs": [],
"source": [
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n",
" network = pickle.load(handle)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,615 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d484fa3a",
"metadata": {},
"source": [
"*In this tutorial we will explore how many of Oxford Street's souvenir and candy shops are connected.*"
]
},
{
"cell_type": "markdown",
"id": "bb98746a",
"metadata": {},
"source": [
"In a [recent article]('https://www.ft.com/candy') in the Financial Times, journalists mapped a number of candy shops on Oxford Street and noted that the company officers appear to form a loose network \"with some sharing residential or business addresses, or taking ownership of a business for months at a time before ceding to another shareholder\". Using the addresses and companies mentioned in the article, I compiled the following relevant records from Companies House:\n",
"\n",
"- CITY SOUVENIRS LTD (539 Oxford Street): 08658716\n",
"- London Hot Accessories Limited (537 Oxford Street): 10116914\n",
"- AMERICAN SWEET DREAMS LIMITED (524 Oxford Street): 13938312\n",
"- Western Crown Limited (470-482 Oxford Street): 13455377\n",
"- CANDYLICIOUS WHOLESALE LIMITED (470-472 Oxford Street): 14091125\n",
"- FREAKNAUGHTY LTD (407 Oxford Street): 11730327\n",
"- NASTY BANG LTD (324 Oxford Street): 14223273\n",
"- Quality Products and Merchandise Ltd (321-323 Oxford Street): 14518117\n",
"- Kingdom of Sweets (270 Oxford Street): 11004735\n",
"- LND Accesorize Limited (271 Oxford Street): 11601607\n",
"- Candystreet (146-148 Oxford Street): 12415826\n",
"- E & A Accessories Limited (35 Oxford Street): 14261732\n",
"- Breeze Vape Limited (33 Oxford Street): 14050986\n",
"- Drip Vape (33 Oxford Street): 14055609\n",
"- Gift 4 You (4 Oxford Street): 11439227\n",
"\n",
"The following companies were also mentioned however I couldn't find a relevant record in Companies House:\n",
"\n",
"- London Dream (476 Oxford Street)\n",
"- Welcome London (399-403 Oxford Street)\n",
"- American Candy Shop (385-389 Oxford Street)\n",
"- Candy World (363-367 Oxford Street)\n",
"- Tobacco & Cigarettes Sold Here (273 Oxford Street)\n",
"- Unique Gifts (159 Oxford Street)\n",
"- Toys and Gifts (142-144 Oxford Street)\n",
"- American Candy World (119-121 Oxford Street)\n",
"- American Sweets & Souvenirs (37-39 Oxford Street)\n",
"\n",
"In this tutorial we will build a large network of companies that connect to a single company on Oxford Street. We can use the following dictionary of companies for reference: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e43ba230",
"metadata": {},
"outputs": [],
"source": [
"candy_shops = [{\"company_name\": \"City Souvenirs\", \"address\": \"539 Oxford Street\", \"company_id\": \"08658716\"},\n",
" {\"company_name\": \"London Hot Accessories\", \"address\": \"537 Oxford Street\", \"company_id\": \"10116914\"},\n",
" {\"company_name\": \"American Sweet Dreams\", \"address\": \"524 Oxford Street\", \"company_id\": \"13938312\"},\n",
" {\"company_name\": \"London Dream\", \"address\": \"476 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"Western Crown\", \"address\": \"470-482 Oxford Street\", \"company_id\": \"13455377\"},\n",
" {\"company_name\": \"Candylicious Wholesale\", \"address\": \"470-482 Oxford Street\", \"company_id\": \"14091125\"},\n",
" {\"company_name\": \"Freakynaughty\", \"address\": \"407 Oxford Street\", \"company_id\": \"11730327\"},\n",
" {\"company_name\": \"Welcome London\", \"address\": \"399-403 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"American Candy Shop\", \"address\": \"385-389 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"Candy World\", \"address\": \"363-367 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"Nasty Bang\", \"address\": \"324 Oxford Street\", \"company_id\": \"14223273\"},\n",
" {\"company_name\": \"Quality Products and Merchandise\", \"address\": \"324 Oxford Street\", \"company_id\": \"14223273\"},\n",
" {\"company_name\": \"Tobacco & Cigarettes Sold Here\", \"address\": \"273 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"LND Accesorize\", \"address\": \"271 Oxford Street\", \"company_id\": \"11601607\"},\n",
" {\"company_name\": \"Unique Gifts\", \"address\": \"159 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"Toys and Gifts\", \"address\": \"142-144 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"Candystreet\", \"address\": \"146-148 Oxford Street\", \"company_id\": \"12415826\"},\n",
" {\"company_name\": \"American Candy World\", \"address\": \"119-121 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"American Sweets & Souvenirs\", \"address\": \"37-39 Oxford Street\", \"company_id\": \"\"},\n",
" {\"company_name\": \"E & A Accessories Limited\", \"address\": \"35 Oxford Street\", \"company_id\": \"14261732\"},\n",
" {\"company_name\": \"Breeze Vape Limited\", \"address\": \"33 Oxford Street\", \"company_id\": \"14050986\"},\n",
" {\"company_name\": \"Drip Vape\", \"address\": \"33 Oxford Street\", \"company_id\": \"14055609\"},\n",
" {\"company_name\": \"Gift 4 You\", \"address\": \"4 Oxford Street\", \"company_id\": \"11439227\"}]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c8ebc89",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import base, api, mapview\n",
"import pandas as pd\n",
"from ipywidgets import HTML, Widget, Layout, Output, VBox, HBox, Textarea"
]
},
{
"cell_type": "markdown",
"id": "26dfff93",
"metadata": {},
"source": [
"Add Companies House API key:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a5377a3",
"metadata": {},
"outputs": [],
"source": [
"api.basic_auth.username = \"\""
]
},
{
"cell_type": "markdown",
"id": "ac9946b1",
"metadata": {},
"source": [
"Lets investigate \"Western Crown\" which has an id of \"13455377\""
]
},
{
"cell_type": "markdown",
"id": "a8e5dbe1",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/western_crown.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 470-482 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79c2e3ab",
"metadata": {},
"outputs": [],
"source": [
"origin_company_id=\"13455377\"\n",
"western_crown_network = base.Network(company_id=origin_company_id)"
]
},
{
"cell_type": "markdown",
"id": "15c23378",
"metadata": {},
"source": [
"Lets impose some limits on the results. As we're doing a deeper search we want to avoid accumulating lots of irrelevant data connected to incorporation agents and virtual offices. By setting the following limits, the network will not include results that exceed these limits:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e83fd13f",
"metadata": {},
"outputs": [],
"source": [
"western_crown_network.hop.companies_at_address_maxsize = 50\n",
"western_crown_network.hop.officers_at_address_maxsize = 50\n",
"western_crown_network.hop.officer_appointments_maxsize = 50"
]
},
{
"cell_type": "markdown",
"id": "866bc18e",
"metadata": {},
"source": [
"Lets go big and perform 6 hops. It's likely to take some time to gather all the data +1 hour. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df617fda",
"metadata": {},
"outputs": [],
"source": [
"# import pickle\n",
"# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n",
"# western_crown_network = pickle.load(handle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "477823cf",
"metadata": {},
"outputs": [],
"source": [
"western_crown_network.perform_hop(6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a52276d7",
"metadata": {},
"outputs": [],
"source": [
"western_crown_network.run_map_preprocessing()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bdde00f",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# generate map\n",
"map_data,path_table = mapview.build_map(western_crown_network) \n",
"hbox = HBox([path_table])\n",
"vbox = VBox([map_data, hbox])\n",
"vbox"
]
},
{
"cell_type": "markdown",
"id": "6d3090cc",
"metadata": {},
"source": [
"### Oxford Street Connections\n",
"\n",
"From the data we've gathered, there are many companies and addresses that connect with the original address the network was built from (Western Crown Limited). To print the connections we can pass the company ID to `find_path`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6079643b",
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
]
},
{
"cell_type": "markdown",
"id": "439ba049",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/537.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 537 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "e641bf13",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [BEST OF LONDON LIMITED](https://find-and-update.company-information.service.gov.uk/company/10895963) (10895963)\n",
"\n",
"*Dissolved Companies*\n",
"- [LONDON HOT ACCESSORIES LIMITED](https://find-and-update.company-information.service.gov.uk/company/10116914) (10116914)\n",
"- [TOURISTS WORLD LTD.](https://find-and-update.company-information.service.gov.uk/company/10643744) (10643744)\n",
"- [GIFTS FOR TOURIST LIMITED](https://find-and-update.company-information.service.gov.uk/company/10910649) (10910649)"
]
},
{
"cell_type": "markdown",
"id": "67b89126",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/524.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 524 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "145f6470",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [CANDY TOWN LTD](https://find-and-update.company-information.service.gov.uk/company/11464159) (1464159)\n",
"- [ESPANZA LIMITED](https://find-and-update.company-information.service.gov.uk/company/11474248) (11474248)\n",
"\n",
"*Dissolved Companies*\n",
"- [MARGIN FREE SUPER MARKET LIMITED](https://find-and-update.company-information.service.gov.uk/company/10540083) (10540083)\n",
"- [COOL MIX LIMITED](https://find-and-update.company-information.service.gov.uk/company/11031538) (11031538)\n",
"- [ROCK GIFTS LTD](https://find-and-update.company-information.service.gov.uk/company/11588633) (11588633)"
]
},
{
"cell_type": "markdown",
"id": "004ff136",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/470.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\">470-482 Oxford Street</figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "baf21c69",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [WESTERN CROWN LIMITED](https://find-and-update.company-information.service.gov.uk/company/13455377) (13455377)"
]
},
{
"cell_type": "markdown",
"id": "2143ce03",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/447.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\">447 Oxford Street</figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "7025e057",
"metadata": {},
"source": [
"*Dissolved Companies*\n",
"- [PLANET SOUVENIRS (UK) LIMITED](https://find-and-update.company-information.service.gov.uk/company/07570906) (07570906)"
]
},
{
"cell_type": "markdown",
"id": "1b74fcca",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/407.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 407-409 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "b2b2771d",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [WESTERN CROWN LIMITED](https://find-and-update.company-information.service.gov.uk/company/13455377) (13455377)"
]
},
{
"cell_type": "markdown",
"id": "a3a6e274",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/269.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 267-269 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "5c007277",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [MOODY MOON LIMITED](https://find-and-update.company-information.service.gov.uk/company/13287820) (13287820)"
]
},
{
"cell_type": "markdown",
"id": "54301d43",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/263.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 263-265 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "4a290e19",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [CEEKEY LONDON LTD](https://find-and-update.company-information.service.gov.uk/company/11647876) (11647876)\n",
"\n",
"*Liquidated Companies*\n",
"- [RUSTIC RAY LTD](https://find-and-update.company-information.service.gov.uk/company/11758349) (11758349)\n",
"\n",
"*Dissolved Companies*\n",
"- [LILLY MAX LIMITED](https://find-and-update.company-information.service.gov.uk/company/11474310) (11474310)\n",
"- [GIFTNET LTD](https://find-and-update.company-information.service.gov.uk/company/11593230) (11593230)\n",
"- [BUMPZ LTD](https://find-and-update.company-information.service.gov.uk/company/10941293) (10941293)\n",
"- [VENGAT GIFT LTD](https://find-and-update.company-information.service.gov.uk/company/11647421) (11647421)"
]
},
{
"cell_type": "markdown",
"id": "a5883ee7",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/240.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 240-242 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "99bec6e0",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [GIFT 4 YOU LIMITED](https://find-and-update.company-information.service.gov.uk/company/11439227) (11439227)\n",
"- [WEST END MANAGEMENT LIMITED](https://find-and-update.company-information.service.gov.uk/company/11467385) (11467385)\n",
"\n",
"*Dissolved Companies*\n",
"- [TOURISTS WORLD LTD.](https://find-and-update.company-information.service.gov.uk/company/10643744) (10643744)\n",
"- [GIFTS FOR TOURIST LIMITED](https://find-and-update.company-information.service.gov.uk/company/10910649) (10910649)\n",
"- [LILLY MAX LIMITED](https://find-and-update.company-information.service.gov.uk/company/11474310) (11474310)"
]
},
{
"cell_type": "markdown",
"id": "af81028b",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/158.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 158 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "0dd6dea9",
"metadata": {},
"source": [
"*Dissolved Companies*\n",
"\n",
"- [UNIQUE GIFTS (LONDON) LTD](https://find-and-update.company-information.service.gov.uk/company/07060273) (07060273)"
]
},
{
"cell_type": "markdown",
"id": "6c3bf19e",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/146.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 146-148 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "914616c5",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [CANDYSTREET LTD](https://find-and-update.company-information.service.gov.uk/company/12415826) (12415826)\n",
"- [LUMS SWEETS LTD](https://find-and-update.company-information.service.gov.uk/company/11864536) (11864536)\n",
"- [SPARK LABEL LTD](https://find-and-update.company-information.service.gov.uk/company/13865359) (13865359)\n",
"- [SEEN CAPTURE LTD](https://find-and-update.company-information.service.gov.uk/company/11468719) (11468719)\n",
"- [PINEBIRD LTD](https://find-and-update.company-information.service.gov.uk/company/11869360) (11869360)\n",
"\n",
"*Dissolved Companies*\n",
"- [WESTERN CANDIES LTD](https://find-and-update.company-information.service.gov.uk/company/12005109) (12005109)\n",
"- [GIFTS OF GLORY LTD](https://find-and-update.company-information.service.gov.uk/company/12268339) (12268339)\n",
"- [CANDY CANE LTD](https://find-and-update.company-information.service.gov.uk/company/12005370) (12005370)\n",
"- [XEE ASSET MANAGEMENT LTD](https://find-and-update.company-information.service.gov.uk/company/10734212) (10734212)\n",
"- [GRAND STORE LTD](https://find-and-update.company-information.service.gov.uk/company/11843228) (11843228)"
]
},
{
"cell_type": "markdown",
"id": "fa1727b0",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/142.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 142 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "0dc6b948",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [GRAND EMPIRE LIMITED](https://find-and-update.company-information.service.gov.uk/company/13376158) (13376158)\n",
"- [ASUS BLUE LIMITED](https://find-and-update.company-information.service.gov.uk/company/13795800) (3795800)"
]
},
{
"cell_type": "markdown",
"id": "047afd96",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/41.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 41 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "4719929d",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [FANCY SOUVENIRS LIMITED](https://find-and-update.company-information.service.gov.uk/company/05548476) (05548476)"
]
},
{
"cell_type": "markdown",
"id": "5bd0e6f3",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/37.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 37-39 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "4f51a748",
"metadata": {},
"source": [
"*Active Companies*\n",
"\n",
"- [CHOCO-FIVES LTD](https://find-and-update.company-information.service.gov.uk/company/13967497) (13967497)\n",
"- [FANCYGREEN LTD](https://find-and-update.company-information.service.gov.uk/company/14210992) (14210992)\n",
"- [FORTLEE LTD](https://find-and-update.company-information.service.gov.uk/company/10782536) (10782536)\n",
"- [CEEKEY LONDON LTD](https://find-and-update.company-information.service.gov.uk/company/11647876) (11647876)\n",
"- [CHERRYTREE FOUNDATION](https://find-and-update.company-information.service.gov.uk/company/08632458) (08632458)\n",
"- [CHOCO-LOT LTD](https://find-and-update.company-information.service.gov.uk/company/13964334) (13964334)\n",
"- [FABIAN BELL LTD](https://find-and-update.company-information.service.gov.uk/company/11855234) (11855234)\n",
"- [SEEN CAPTURE LTD](https://find-and-update.company-information.service.gov.uk/company/11468719) (11468719)\n",
"- [ASUSGIFTS LIMITED](https://find-and-update.company-information.service.gov.uk/company/13857378) (13857378)\n",
"- [BRITCO GIFTS LIMITED](https://find-and-update.company-information.service.gov.uk/company/14472151) (14472151)\n",
"- [AH MONEY EXCHANGE LTD](https://find-and-update.company-information.service.gov.uk/company/10231441) (10231441)\n",
"\n",
"*Dissolved Companies*\n",
"- [GIFT PUNCH LIMITED](https://find-and-update.company-information.service.gov.uk/company/12387042) (12387042)\n",
"- [GIFTNET LTD](https://find-and-update.company-information.service.gov.uk/company/11593230) (11593230)\n",
"- [JUICE COLLECTIVE LTD](https://find-and-update.company-information.service.gov.uk/company/08282993) (08282993)\n",
"- [WINHAND LTD](https://find-and-update.company-information.service.gov.uk/company/11016980) (11016980)"
]
},
{
"cell_type": "markdown",
"id": "a3f9a2d7",
"metadata": {},
"source": [
"<figure>\n",
"<img src=\"../assets/images/4.png\" style=\"width:100%\">\n",
"<figcaption align = \"center\"> 4 Oxford Street </figcaption>\n",
"</figure>"
]
},
{
"cell_type": "markdown",
"id": "bef660a5",
"metadata": {},
"source": [
"*Active Companies*\n",
"- [GIFT 4 YOU LIMITED](https://find-and-update.company-information.service.gov.uk/company/11439227) (11439227)"
]
},
{
"cell_type": "markdown",
"id": "957f4c3e",
"metadata": {},
"source": [
"### Beyond Oxford Street\n",
"\n",
"From analysing the 'generate map' cell, we will notice many other companies connected to the original company located beyond Oxford Street. Further research could involve:\n",
"- building networks from some of the other companies registered to Oxford Street addresses\n",
"- building networks deeper than 6 hops to explore a wider range of connections\n",
"- analysing connections in greater detail\n",
"- analysing documents from Companies House linked to companies in the network\n",
"- identify other connected companies of interest beyond Oxford Street\n",
"- develop statistics that communicate the scale of these networks and connectivity within the UK\n",
"- analyse connections outside the UK \n",
"- run a news search on entities within the network to see if companies are connected to any newsworthy entities\n",
"- analyse hotspots for registering new companies over time to see if there are emerging popular locations, in other words where is the new Oxford Street?\n",
"- analyse other types of companies connected to souvenir and candy shops (money exchanges, security firms etc.)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,440 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b2110da7",
"metadata": {},
"source": [
"*In this tutorial we will investigate addresses with a large number of companies registered via the API and Companies House Data Product download.*"
]
},
{
"cell_type": "markdown",
"id": "25528662",
"metadata": {},
"source": [
"### Busy Addresses and API Limits"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab9e8ee0",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import base, api, mapview\n",
"import pandas as pd\n",
"api.basic_auth.username = \"\""
]
},
{
"cell_type": "markdown",
"id": "00c6a5be",
"metadata": {},
"source": [
"When navigating Companies House there are times that we will run into some very popular addresses. For example lets say build a network from [this officer](https://find-and-update.company-information.service.gov.uk/officers/Nd2URspq4bvLy-hwzDZ0_p7FGJw/appointments):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "723f234a",
"metadata": {},
"outputs": [],
"source": [
"officer_id = \"Nd2URspq4bvLy-hwzDZ0_p7FGJw\"\n",
"network = base.Network(officer_id=officer_id)\n",
"network.perform_hop(2)"
]
},
{
"cell_type": "markdown",
"id": "edad561e",
"metadata": {},
"source": [
"Within 2 hops we've got over 60 addresses (although many of them look like duplicate entries):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eea32631",
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ce897c0",
"metadata": {},
"outputs": [],
"source": [
"network.addresses['address'].unique()"
]
},
{
"cell_type": "markdown",
"id": "8c17fff5",
"metadata": {},
"source": [
"If we check out the `maxsize_entities` property of our Network class, we will see a dataframe containing all of the addresses and officers that have exceeded the maxsize limits imposed in the Hop class. In this case, we can see one of the addresses in the network has over 4800 companies based there."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8b8d3c20",
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities"
]
},
{
"cell_type": "markdown",
"id": "5ad7b443",
"metadata": {},
"source": [
"Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f94f731",
"metadata": {},
"outputs": [],
"source": [
"network.hop.companies_at_address_maxsize"
]
},
{
"cell_type": "markdown",
"id": "2d4edaf0",
"metadata": {},
"source": [
"If we check `companies_id` we'll notice it hasn't had 4800 companies added to it:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3ef12fe",
"metadata": {},
"outputs": [],
"source": [
"len(network.company_ids['company_id'].unique())"
]
},
{
"cell_type": "markdown",
"id": "d177f1b5",
"metadata": {},
"source": [
"Including limits is useful to avoid our databases getting clogged up with random companies. \n",
"Although lets pause to briefly explore what address would have thousands of companies registered there?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bb8bdf1",
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities['node'][0]"
]
},
{
"cell_type": "markdown",
"id": "e8644d6b",
"metadata": {},
"source": [
"![title](../assets/images/regent_storefront.jpeg)"
]
},
{
"cell_type": "markdown",
"id": "40354a28",
"metadata": {},
"source": [
"\"3rd Floor, 207, Regent Street\" is a \"virtual office\" run by a company called [Hold Everything](https://www.hold-everything.com/). Businesses can use this address for correspondance/registration for £24 a month:"
]
},
{
"cell_type": "markdown",
"id": "11b08c79",
"metadata": {},
"source": [
"![title](../assets/images/exclusive.png)"
]
},
{
"cell_type": "markdown",
"id": "2c9e85ed",
"metadata": {},
"source": [
"However the large number of companies registered at a single address can lead to many instances of mistaken identity. Just because a company is registered at a virtual office does not mean it has any connection with other companies registered there.:"
]
},
{
"cell_type": "markdown",
"id": "be5e4352",
"metadata": {},
"source": [
"![title](../assets/images/review.png)"
]
},
{
"cell_type": "markdown",
"id": "282ba8ea",
"metadata": {},
"source": [
"Numerous media outlets have reported on fraudulent companies that use virtual offices and incorporation services: \n",
"- Kemp House, 162 City Road | Capital Officer: [Mystery group took millions in furlough funds - Financial Times](https://www.ft.com/content/b3c70369-5170-47ca-b779-fc0898fd29e6)\n",
"- 20-22 Wenlock Road | Made Simple: [Court shuts down companies behind £9m truffle scam - Gov.uk](https://www.gov.uk/government/news/court-shuts-down-companies-behind-9m-truffle-scam)\n",
"- 2 Woodberry Down | A1 Company Services [How A Suburban North London House Is Connected To The Paul Manafort Indictment - Huffington Post](https://www.huffingtonpost.co.uk/entry/manfort-london-connection_uk_59f72f50e4b07fdc5fbf92c7)\n",
"- 29 Harley Street | Formations House [Offshore in central London: the curious case of 29 Harley Street - The Guardian](https://www.theguardian.com/business/2016/apr/19/offshore-central-london-curious-case-29-harley-street)\n",
"- 63-66 Hatton Garden | Valemont Properties Ltd [The Global Laundromat: how did it work and who benefited? - The Guardian](https://www.theguardian.com/world/2017/mar/20/the-global-laundromat-how-did-it-work-and-who-benefited)"
]
},
{
"cell_type": "markdown",
"id": "a85fdcfa",
"metadata": {},
"source": [
"If we wanted to get all companies listed at 207 Regent Street we can adjust our maxsize limits to `None` and attempt to perform a hop again:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb0c02d0",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network = base.Network(address='3rd Floor, 207 Regent Street London W1B 3HH England')\n",
"regent_street_network.hop.companies_at_address_maxsize = None\n",
"regent_street_network.hop.officers_at_address_maxsize = None\n",
"regent_street_network.perform_hop(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3dc0f165",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.company_ids"
]
},
{
"cell_type": "markdown",
"id": "cff1061e",
"metadata": {},
"source": [
"Such large networks can still be interesting to analyse. For instance if we perform another hop this will get all the officers for every company at the address. This will take several hours to build as we have lots of companies to analyse, however if we want to save time we could just uncomment and load a pre-made network below: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef262359",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.perform_hop(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38937142",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n",
" regent_street_network = pickle.load(handle)"
]
},
{
"cell_type": "markdown",
"id": "d6e330ee",
"metadata": {},
"source": [
"Analysing the most frequently occuring officers running businesses from 207 Regent Street returns some very busy officers and incorporation agents:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e97fa3b",
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.officer_ids['name'].value_counts()"
]
},
{
"cell_type": "markdown",
"id": "d6a22e40",
"metadata": {},
"source": [
"A quick news lookup on two of the officers in the top 5, J. Beardsley of Helve TCS Limited and S. Poppleton reveal these names to be connected to several known instances of fraud:\n",
"- [Fraudster duo jailed for their part in defrauding millions of pounds from over 100 victims - Crown Prosecution Service](https://www.cps.gov.uk/cps/news/fraudster-duo-jailed-their-part-defrauding-millions-pounds-over-100-victims)\n",
"- [Print farming companies struck off - Printweek](https://www.printweek.com/news/article/print-farming-companies-struck-off)\n",
"- [Rogue book publishers slammed shut by the courts - Gov.uk](https://www.gov.uk/government/news/rogue-book-publishers-slammed-shut-by-the-courts)"
]
},
{
"cell_type": "markdown",
"id": "f0699a27",
"metadata": {},
"source": [
"### Busier Addresses and Downloaded Data"
]
},
{
"cell_type": "markdown",
"id": "944525cb",
"metadata": {},
"source": [
"There are situations where some addresses have thousands or even tens of thousands of companies registered. Companies House provides two methods for getting company data, API and data product. We used the API to get the information above which returns all active and dissolved companies registered to the address. We get the same result when we attempt to perform an advanced company search using this address through the website:"
]
},
{
"cell_type": "markdown",
"id": "c307994f",
"metadata": {},
"source": [
"![title](../assets/images/regent.png)"
]
},
{
"cell_type": "markdown",
"id": "517e6aaa",
"metadata": {},
"source": [
"Unfortunately the API is limited to returing 5000 result max. This is fine in our case with 207 Regent Street because we're just under the limit. However there are much bigger fish out there for instance, '75 Shelton Street':"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f40ee11",
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
"shelton_street_network.perform_hop(1)\n",
"shelton_street_network.maxsize_entities"
]
},
{
"cell_type": "markdown",
"id": "6f1abb52",
"metadata": {},
"source": [
"We can already see its over 5000 limit for the API. If we check online we can see the number is huge: "
]
},
{
"cell_type": "markdown",
"id": "03b64f03",
"metadata": {},
"source": [
"![title](../assets/images/shelton.png)"
]
},
{
"cell_type": "markdown",
"id": "f9fda7a6",
"metadata": {},
"source": [
"This is where the data product comes in. We can download it in one go and use it to get all of the \"active\" companies. To use the data product:\n",
"1. Download it from [here](http://download.companieshouse.gov.uk/en_output.html) (might take some time as its a pretty large file ~430Mb)\n",
"2. Move it to local directory `assets/company_data/` and unzip the file \n",
"3. Load into a dataframe which we can pass to our network class\n",
"\n",
"Might take a minute to load. How adjust the file string below and attempt to load it into `company_data`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d9d0080",
"metadata": {},
"outputs": [],
"source": [
"company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
]
},
{
"cell_type": "markdown",
"id": "2273cf39",
"metadata": {},
"source": [
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e273ce0",
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
"shelton_street_network.hop.companies_at_address_maxsize = None\n",
"shelton_street_network.hop.officers_at_address_maxsize = None\n",
"shelton_street_network.get_officers_at_address = False\n",
"shelton_street_network.perform_hop(1, company_data= company_data)"
]
},
{
"cell_type": "markdown",
"id": "820a908d",
"metadata": {},
"source": [
"If we check `company_ids` we have over 70000 companies that we could build a network from if we had lots of time on our hands:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12acb915",
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network.company_ids"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

172
notebooks/quickstart.ipynb Normal file
View File

@@ -0,0 +1,172 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b6926e35",
"metadata": {},
"source": [
"*Quickstart hands-on exercise. For in-depth intro checkout Tutorial 1:*"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f17ebdd2",
"metadata": {},
"outputs": [],
"source": [
"from sugartrail import mapview, api, base\n",
"from ipywidgets import VBox, HBox"
]
},
{
"cell_type": "markdown",
"id": "d5f9b6ad",
"metadata": {},
"source": [
"Insert a valid [Companies House Public Data API key](https://developer.company-information.service.gov.uk/get-started/) as `username` string value below. If you don't want to use the API and would prefer loading a pre-built network, uncomment and run the cell below and then run the final cell to build and load the map. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a9639e6",
"metadata": {},
"outputs": [],
"source": [
"# # network build from Domain Foundation, company_id = \"11951034\"\n",
"# import pickle\n",
"\n",
"# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n",
"# network = pickle.load(handle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89b0082a",
"metadata": {},
"outputs": [],
"source": [
"api.basic_auth.username = \"\""
]
},
{
"cell_type": "markdown",
"id": "63220f29",
"metadata": {},
"source": [
"Enter the company number (as string) for a company you would like to explore. Example value is provided: "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8aca6a54",
"metadata": {},
"outputs": [],
"source": [
"company_id = \"11951034\"\n",
"network = base.Network(company_id=company_id)"
]
},
{
"cell_type": "markdown",
"id": "7de31e72",
"metadata": {
"tags": [
"5"
]
},
"source": [
"Perform `n` number of hops (3 or less at first is advised to keep the network manageable in size):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d80be86d",
"metadata": {
"tags": [
"6"
]
},
"outputs": [],
"source": [
"n = 3\n",
"network = base.Network(company_id=company_id)\n",
"network.perform_hop(n)"
]
},
{
"cell_type": "markdown",
"id": "4481c80d",
"metadata": {},
"source": [
"Now lets visualise the connections:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "022f026e",
"metadata": {},
"outputs": [],
"source": [
"network.run_map_preprocessing()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01dca0cf",
"metadata": {
"scrolled": false,
"tags": [
"7"
]
},
"outputs": [],
"source": [
"map_data,path_table = mapview.build_map(network) \n",
"hbox = HBox([path_table])\n",
"vbox = VBox([map_data, hbox])\n",
"vbox"
]
},
{
"cell_type": "markdown",
"id": "457bf4d0",
"metadata": {},
"source": [
"Each marker represents a company in the network. Green markers represent active companies based at the address, red markers represent active companies no longer based at the address and black markers represent dissolved companies once based at the address. \n",
"\n",
"Select a marker to display additional information: \n",
"- pop-up with the selected company's name and address\n",
"- table containing the most efficient paths from the origin to the selected company\n",
"- antpaths for each company in the network. Red antpath represents the path through all the historic addresses for the selected company. Black antpath represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table. "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

11
setup.py Normal file
View File

@@ -0,0 +1,11 @@
from setuptools import setup, find_packages
with open("config/requirements.txt") as requirement_file:
requirements = requirement_file.read().split()
setup(
name="sugartrail",
version="1.0.0",
install_requires=requirements,
packages=find_packages(exclude=["notebooks", "dashboard", "assets"]),
)

View File

@@ -8,6 +8,14 @@ password = ""
size = "5000"
basic_auth = requests.auth.HTTPBasicAuth(username, password)
def test():
url = "https://api.company-information.service.gov.uk/advanced-search/companies"
response = requests.get(url, auth=basic_auth)
if response.status_code == 200:
return True
else:
return False
def make_request(url, input, input_type, response_type):
time.sleep(0.5)
try:
@@ -17,13 +25,17 @@ def make_request(url, input, input_type, response_type):
if response.status_code == 200:
return response.json()
except requests.exceptions.RequestException as err:
print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
# print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.HTTPError as errh:
print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
# print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.ConnectionError as errc:
print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
# print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.Timeout as errt:
print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
# print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
def get_company_officers(company_id):
url = "https://api.company-information.service.gov.uk/company/" + company_id + "/officers"

View File

@@ -208,18 +208,18 @@ class Network:
for i,address in enumerate(selected_addresses):
self.hop.search_address(self, address, company_data)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop+1))
print("Hop number: " + str(hop))
print("Processed " + str(i+1) + "/" + str(len(selected_addresses)) + " addresses.")
for j,company in enumerate(selected_companies):
self.hop.search_company_id(self,company)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop+1))
print("Hop number: " + str(hop))
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(j+1) + "/" + str(len(selected_companies)) + " companies.")
for k,officer in enumerate(selected_officers):
self.hop.search_officer_id(self,officer)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop+1))
print("Hop number: " + str(hop))
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.")
print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.")
@@ -230,14 +230,14 @@ class Network:
self.get_company_address_history = True
self.get_psc_correspondance_address = True
self.get_officer_appointments = True
self.officer_appointments_maxsize = 2000
self.officer_appointments_maxsize = 50
self.get_officer_correspondance_address = True
self.get_officer_duplicates = True
self.officer_duplicates_maxsize = None
self.get_officers_at_address = True
self.officers_at_address_maxsize = 1000
self.officers_at_address_maxsize = 50
self.get_companies_at_address = True
self.companies_at_address_maxsize = 500
self.companies_at_address_maxsize = 50
def search_company_id(self, network, company_id):
officers = []

View File

@@ -4,8 +4,10 @@ import pandas as pd
from datetime import datetime
import functools
from string import ascii_lowercase as alc
import math
def build_map(network):
def build_map(network, clear_widget=True):
if clear_widget:
Widget.close_all()
m, path_table = load_map_data(network)
return m, path_table
@@ -14,6 +16,9 @@ def get_address_path(network, company_id):
company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
address_path = []
for index, row in company_address_history.iterrows():
if math.isnan(float(row['lat'])) or math.isnan(float(row['lon'])):
pass
else:
address_path.insert(0,[row['lat'], row['lon']])
return address_path
@@ -24,11 +29,17 @@ def locations_from_origin_path(path, network):
last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1]
lat = last_company_address_row['lat'].item()
lon = last_company_address_row['lon'].item()
if math.isnan(float(lat)):
pass
else:
locations.append([float(lat),float(lon)])
elif node['type'] == 'Address':
address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1]
lat = address_row['lat'].item()
lon = address_row['lon'].item()
if math.isnan(float(lat)) or math.isnan(float(lon)):
pass
else:
locations.append([float(lat),float(lon)])
return locations