diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d8dd28 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +# Sphinx documentation +docs/_build/ diff --git a/Tutorial 3 - Virtual Offices.ipynb b/Tutorial 3 - Virtual Offices.ipynb deleted file mode 100644 index 80a1021..0000000 --- a/Tutorial 3 - Virtual Offices.ipynb +++ /dev/null @@ -1,1218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "b2110da7", - "metadata": {}, - "source": [ - "*In this tutorial we will investigate addresses with a large number of companies registered via the API and Companies House Data Product download.*" - ] - }, - { - "cell_type": "markdown", - "id": "25528662", - "metadata": {}, - "source": [ - "### Busy Addresses and API Limits" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "ab9e8ee0", - "metadata": {}, - "outputs": [], - "source": [ - "from sugartrail import base, api, mapview\n", - "import pandas as pd\n", - "api.basic_auth.username = \"\"" - ] - }, - { - "cell_type": "markdown", - "id": "00c6a5be", - "metadata": {}, - "source": [ - "When navigating Companies House there are times that we will run into some very popular addresses. For example lets say build a network from [this officer](https://find-and-update.company-information.service.gov.uk/officers/Nd2URspq4bvLy-hwzDZ0_p7FGJw/appointments):" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "723f234a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hop number: 2\n", - "Processed 12/12 addresses.\n", - "Processed 30/30 companies.\n" - ] - } - ], - "source": [ - "officer_id = \"Nd2URspq4bvLy-hwzDZ0_p7FGJw\"\n", - "network = base.Network(officer_id=officer_id)\n", - "network.perform_hop(2)" - ] - }, - { - "cell_type": "markdown", - "id": "edad561e", - "metadata": {}, - "source": [ - "Within 2 hops we've got over 60 addresses (although many of them look like duplicate entries):" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "eea32631", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
addressnlink_typenode_typenodelatlon
03rd Floor 13 Charles Ii Street London SW1Y 4QU...1Appointment AddressPersonNd2URspq4bvLy-hwzDZ0_p7FGJwNaNNaN
113 Charles Ii Street 3rd Floor London SW1Y 4QU...1Appointment AddressPersonNd2URspq4bvLy-hwzDZ0_p7FGJwNaNNaN
28 Segedunum Business Centre Station Road Walls...1Appointment AddressPersonNd2URspq4bvLy-hwzDZ0_p7FGJwNaNNaN
3220 Wards Road Ilford IG2 7DY England1Appointment AddressPersonNd2URspq4bvLy-hwzDZ0_p7FGJwNaNNaN
48 Segedunum Business Centre Station Road Walls...1Appointment AddressPersonNd2URspq4bvLy-hwzDZ0_p7FGJwNaNNaN
........................
1757 Segedunum Business Centre Station Road Walls...2Historic AddressCompany08785140NaNNaN
17631a Station Road Whitley Bay Tyne and Wear NE2...2Historic AddressCompany08785140NaNNaN
1773rd Floor, 207 Regent Street London W1B 3HH2Historic AddressCompany08785140NaNNaN
178Overseas House 66-68 High Road Bushey Heath Bu...2Historic AddressCompany08785140NaNNaN
179Conveyit House 28 Coity Road Bridgend Mid Gla...2Historic AddressCompany08785140NaNNaN
\n", - "

180 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " address n \\\n", - "0 3rd Floor 13 Charles Ii Street London SW1Y 4QU... 1 \n", - "1 13 Charles Ii Street 3rd Floor London SW1Y 4QU... 1 \n", - "2 8 Segedunum Business Centre Station Road Walls... 1 \n", - "3 220 Wards Road Ilford IG2 7DY England 1 \n", - "4 8 Segedunum Business Centre Station Road Walls... 1 \n", - ".. ... .. \n", - "175 7 Segedunum Business Centre Station Road Walls... 2 \n", - "176 31a Station Road Whitley Bay Tyne and Wear NE2... 2 \n", - "177 3rd Floor, 207 Regent Street London W1B 3HH 2 \n", - "178 Overseas House 66-68 High Road Bushey Heath Bu... 2 \n", - "179 Conveyit House 28 Coity Road Bridgend Mid Gla... 2 \n", - "\n", - " link_type node_type node lat lon \n", - "0 Appointment Address Person Nd2URspq4bvLy-hwzDZ0_p7FGJw NaN NaN \n", - "1 Appointment Address Person Nd2URspq4bvLy-hwzDZ0_p7FGJw NaN NaN \n", - "2 Appointment Address Person Nd2URspq4bvLy-hwzDZ0_p7FGJw NaN NaN \n", - "3 Appointment Address Person Nd2URspq4bvLy-hwzDZ0_p7FGJw NaN NaN \n", - "4 Appointment Address Person Nd2URspq4bvLy-hwzDZ0_p7FGJw NaN NaN \n", - ".. ... ... ... ... ... \n", - "175 Historic Address Company 08785140 NaN NaN \n", - "176 Historic Address Company 08785140 NaN NaN \n", - "177 Historic Address Company 08785140 NaN NaN \n", - "178 Historic Address Company 08785140 NaN NaN \n", - "179 Historic Address Company 08785140 NaN NaN \n", - "\n", - "[180 rows x 7 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "network.addresses" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "7ce897c0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['3rd Floor 13 Charles Ii Street London SW1Y 4QU England',\n", - " '13 Charles Ii Street 3rd Floor London SW1Y 4QU England',\n", - " '8 Segedunum Business Centre Station Road Wallsend NE28 6HQ United Kingdom',\n", - " '220 Wards Road Ilford IG2 7DY England',\n", - " '8 Segedunum Business Centre Station Road Wallsend NE28 6HQ England',\n", - " 'Burnards Accountants 8 Segedunum Business Centre Wallsend NE28 6HQ England',\n", - " '220 Consultancy House Ilford IG2 7DY England',\n", - " '220 Consultancy House Ilford IG2 7DY United Kingdom',\n", - " 'Burnard Accountants, 8 Bankside Building Segedunum Business Centre Wallsend NE28 6HQ United Kingdom',\n", - " '8 Bankside Bldg Segedunum Business Centre Station Road Wallsend NE28 6HQ England',\n", - " '3rd Floor, 207 Regent Street London W1B 3HH England',\n", - " '73 Ingelow Road London SW8 3PE England',\n", - " 'Burnard Accountants 8 Bankside Building Segedunum Business Centre Wallsend NE28 6HQ United Kingdom',\n", - " '220 Consultancy House Wards Road Ilford IG2 7DY',\n", - " 'Burnard Accountants 8 Bankside Building Segedunum Business Centre Station Road Wallsend Tyne & Wear NE28 6HQ United Kingdom',\n", - " '220 Consultancy House Ilford IG2 7DY',\n", - " 'Lower Ground Floor One George Yard London EC3V 9DF United Kingdom',\n", - " '41 Chalton Street London NW1 1JD United Kingdom',\n", - " '220 Consultancy House Wards Road Ilford Essex IG2 7DY United Kingdom',\n", - " 'Burnards Accountant 8a Segedunum Way Wallsend Tyne and Wear NE28 8JN United Kingdom',\n", - " '41 Chalton Street London NW1 1JD',\n", - " '8 Bankside Building Segedunum Business Centre Station Road Wallsend NE28 6HQ United Kingdom',\n", - " '220 Consultancy House Wards Road Ilford Essex IG2 7DY England',\n", - " 'Burnards Accountant 8 Segedunum Way Wallsend Tyne and Wear NE28 8JN United Kingdom',\n", - " '8 Bankside Bulding Segedunum Business Centre Wallsend NE28 6HQ United Kingdom',\n", - " 'Lower Ground Floor One George Yard London EC3V 9DF',\n", - " '8 Segedunum Business Centre Station Rd, Wallsend NE28 6HQ United Kingdom',\n", - " '8a Segedunum Way Wallsend Tyne and Wear NE28 8JN United Kingdom',\n", - " '31a Station Road Whitley Bay NE26 2QZ England',\n", - " '25 Dulverton Court Adderstone Crescent Newcastle upon Tyne NE2 2HS United Kingdom',\n", - " '3rd Floor 207 Regent Street London W1B 3HH United Kingdom',\n", - " 'Universal Company Incorporations Ltd Office 3, Second Floor North Merrion Avenue HA7 4RY United Kingdom',\n", - " '8 Bankside Buildng Segedunum Business Centre Station Road Wallsend NE28 6HQ United Kingdom',\n", - " '207 Regent Street London W1B 3HH United Kingdom',\n", - " 'Consultancy House Ilford IG2 7DY United Kingdom',\n", - " 'Burnards Accountants 8 Segedunum Business Centre Station Road Wallsend NE28 6HQ England',\n", - " '8 Segedunum Business Centre Burnards Accountants Station Road Wallsend NE28 6HQ England',\n", - " 'Overseas House 66-68 Bushey WD23 1GG England',\n", - " 'Peine House Hind Hill Street Heywood OL10 1JZ England',\n", - " 'Conveyit House 28 Coity Road Bridgend CF31 1LR',\n", - " '3 rd Floor 207 Regent Street London W1B 3HH',\n", - " '3 Rd Floor 207 Regent Street London W1B 3HH England',\n", - " 'Overseas House 66-68 High Road Bushey Heath Bushey WD23 1GG',\n", - " ' Conveyit House 28 Coity Road Bridgend Mid Glamorgan CF31 1LR',\n", - " ' 10-12 Dunraven Place Bridgend Mid Glamorgan CF31 1JD United Kingdom',\n", - " '3 rd Floor 207 Regent Street London W1B 3HH England',\n", - " ' Conveyit House 28 Coity Road Bridgend Mid Glamorgan CF31 1LR United Kingdom',\n", - " 'Conveyit House 28 Coity Road Bridgend CF31 1LR United Kingdom',\n", - " ' Conveyit House 28 Coity Road Bridgend Mid Glamorgan CF31 1LR Wales',\n", - " '8 Segendunum Business Centre Station Road Wallsend NE28 6HQ United Kingdom',\n", - " 'Burnard Accountants, 8 Bankside Building Segedunum Business Centre Station Road Wallsend Tyne & Wear NE28 6HQ United Kingdom',\n", - " '8 Bankside Bldg Segedunum Business Centre C/O Burnard Accountants Wallsend NE28 6HQ England',\n", - " '207 Regent Street London W1B 3HH Great Britain',\n", - " '8 Bankside Bldg Segedunum Business Centre C/O Burnard Accountants Station Road Wallsend Tyne & Wear NE28 6HQ England',\n", - " '7 Segedunum Business Centre Station Road Wallsend NE28 6HQ United Kingdom',\n", - " '31a Station Road Whitley Bay Tyne and Wear NE26 2QZ',\n", - " 'Third Floor 207 Regent Street London W1B 3HH United Kingdom',\n", - " '5 Temple Square Temple Street Liverpool L2 5RH',\n", - " 'Conveyit House 28 Bridgend CF31 1LR United Kingdom',\n", - " 'C/O BDO LLP Liverpool L2 5RH',\n", - " 'C/O Bdo Llp 2nd Floor 2 City Place Beehive Ring Road Gatwick RH6 0PA',\n", - " '31a Station Road Whitley Bay Tyne and Wear NE26 2QZ England',\n", - " '3rd Floor, 207 Regent Street London W1B 3HH',\n", - " '73 High Street High Street Kingston Upon Thames KT1 4DA England',\n", - " '8 Segedunum Business Centre Wallsend NE28 6HQ England',\n", - " '2nd Floor, 2 City Place Beehive Ring Road Gatwick RH6 0PA'],\n", - " dtype=object)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "network.addresses['address'].unique()" - ] - }, - { - "cell_type": "markdown", - "id": "8c17fff5", - "metadata": {}, - "source": [ - "If we check out the `maxsize_entities` property of our Network class, we will see a dataframe containing all of the addresses and officers that have exceeded the maxsize limits imposed in the Hop class. In this case, we can see one of the addresses in the network has over 4800 companies based there." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8b8d3c20", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nodetypemaxsize_typesize
03rd Floor, 207 Regent Street London W1B 3HH En...AddressCompanies4814
\n", - "
" - ], - "text/plain": [ - " node type maxsize_type \\\n", - "0 3rd Floor, 207 Regent Street London W1B 3HH En... Address Companies \n", - "\n", - " size \n", - "0 4814 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "network.maxsize_entities" - ] - }, - { - "cell_type": "markdown", - "id": "5ad7b443", - "metadata": {}, - "source": [ - "Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "4f94f731", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "500" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "network.hop.companies_at_address_maxsize" - ] - }, - { - "cell_type": "markdown", - "id": "2d4edaf0", - "metadata": {}, - "source": [ - "If we check `companies_id` we'll notice it hasn't had 4800 companies added to it:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "e3ef12fe", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "314" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(network.company_ids['company_id'].unique())" - ] - }, - { - "cell_type": "markdown", - "id": "d177f1b5", - "metadata": {}, - "source": [ - "Including limits is useful to avoid our databases getting clogged up with random companies. \n", - "Although lets pause to briefly explore what address would have thousands of companies registered there?" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "8bb8bdf1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'3rd Floor, 207 Regent Street London W1B 3HH England'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "network.maxsize_entities['node'][0]" - ] - }, - { - "cell_type": "markdown", - "id": "e8644d6b", - "metadata": {}, - "source": [ - "![title](assets/images/regent_storefront.jpeg)" - ] - }, - { - "cell_type": "markdown", - "id": "40354a28", - "metadata": {}, - "source": [ - "\"3rd Floor, 207, Regent Street\" is a \"virtual office\" run by a company called [Hold Everything](https://www.hold-everything.com/). Businesses can use this address for correspondance/registration for £24 a month:" - ] - }, - { - "cell_type": "markdown", - "id": "11b08c79", - "metadata": {}, - "source": [ - "![title](assets/images/exclusive.png)" - ] - }, - { - "cell_type": "markdown", - "id": "2c9e85ed", - "metadata": {}, - "source": [ - "However the large number of companies registered at a single address can lead to many instances of mistaken identity. Just because a company is registered at a virtual office does not mean it has any connection with other companies registered there.:" - ] - }, - { - "cell_type": "markdown", - "id": "be5e4352", - "metadata": {}, - "source": [ - "![title](assets/images/review.png)" - ] - }, - { - "cell_type": "markdown", - "id": "282ba8ea", - "metadata": {}, - "source": [ - "Numerous media outlets have reported on fraudulent companies that use virtual offices and incorporation services: \n", - "- Kemp House, 162 City Road | Capital Officer: [Mystery group took millions in furlough funds - Financial Times](https://www.ft.com/content/b3c70369-5170-47ca-b779-fc0898fd29e6)\n", - "- 20-22 Wenlock Road | Made Simple: [Court shuts down companies behind £9m truffle scam - Gov.uk](https://www.gov.uk/government/news/court-shuts-down-companies-behind-9m-truffle-scam)\n", - "- 2 Woodberry Down | A1 Company Services [How A Suburban North London House Is Connected To The Paul Manafort Indictment - Huffington Post](https://www.huffingtonpost.co.uk/entry/manfort-london-connection_uk_59f72f50e4b07fdc5fbf92c7)\n", - "- 29 Harley Street | Formations House [Offshore in central London: the curious case of 29 Harley Street - The Guardian](https://www.theguardian.com/business/2016/apr/19/offshore-central-london-curious-case-29-harley-street)\n", - "- 63-66 Hatton Garden | Valemont Properties Ltd [The Global Laundromat: how did it work and who benefited? - The Guardian](https://www.theguardian.com/world/2017/mar/20/the-global-laundromat-how-did-it-work-and-who-benefited)" - ] - }, - { - "cell_type": "markdown", - "id": "a85fdcfa", - "metadata": {}, - "source": [ - "If we wanted to get all companies listed at 207 Regent Street we can adjust our maxsize limits to `None` and attempt to perform a hop again:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "eb0c02d0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hop number: 1\n", - "Processed 1/1 addresses.\n" - ] - } - ], - "source": [ - "regent_street_network = base.Network(address='3rd Floor, 207 Regent Street London W1B 3HH England')\n", - "regent_street_network.hop.companies_at_address_maxsize = None\n", - "regent_street_network.hop.officers_at_address_maxsize = None\n", - "regent_street_network.perform_hop(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "3dc0f165", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
company_idnlink_typenode_typenode
0078656981Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
1059869061Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
2079285161Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
3079574531Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
4079769841Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
..................
4809111197731Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
4810112776281Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
4811126143451Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
4812145279281Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
4813069260761Company at AddressAddress3rd Floor, 207 Regent Street London W1B 3HH En...
\n", - "

4814 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " company_id n link_type node_type \\\n", - "0 07865698 1 Company at Address Address \n", - "1 05986906 1 Company at Address Address \n", - "2 07928516 1 Company at Address Address \n", - "3 07957453 1 Company at Address Address \n", - "4 07976984 1 Company at Address Address \n", - "... ... .. ... ... \n", - "4809 11119773 1 Company at Address Address \n", - "4810 11277628 1 Company at Address Address \n", - "4811 12614345 1 Company at Address Address \n", - "4812 14527928 1 Company at Address Address \n", - "4813 06926076 1 Company at Address Address \n", - "\n", - " node \n", - "0 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "1 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "2 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "3 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "4 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "... ... \n", - "4809 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "4810 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "4811 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "4812 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "4813 3rd Floor, 207 Regent Street London W1B 3HH En... \n", - "\n", - "[4814 rows x 5 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regent_street_network.company_ids" - ] - }, - { - "cell_type": "markdown", - "id": "cff1061e", - "metadata": {}, - "source": [ - "Such large networks can still be interesting to analyse. For instance if we perform another hop this will get all the officers for every company at the address. This will take several hours to build as we have lots of companies to analyse, however if we want to save time we could just uncomment and load a pre-made network below: " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ef262359", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hop number: 1\n", - "Processed 0/0 addresses.\n", - "Processed 4814/4814 companies.\n", - "Processed 12/12 officers.\n" - ] - } - ], - "source": [ - "regent_street_network.perform_hop(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "38937142", - "metadata": {}, - "outputs": [], - "source": [ - "# import pickle\n", - "# with open('assets/networks/regent_street_network.pickle', 'rb') as handle:\n", - "# regent_street_network = pickle.load(handle)" - ] - }, - { - "cell_type": "markdown", - "id": "d6e330ee", - "metadata": {}, - "source": [ - "Analysing the most frequently occuring officers running businesses from 207 Regent Street returns some very busy officers and incorporation agents:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4e97fa3b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SECRETARIES LIMITED CT 177\n", - "Stuart Ralph POPPLETON 61\n", - "SECRETARIES LTD. CT 45\n", - "Stuart POPPLETON 42\n", - "TCS LIMITED HELVE 35\n", - " ... \n", - "Slawomir Zbigniew GROCHULSKI 1\n", - "Adam Kapitan BERGMAN 1\n", - "Colum Michael MAKIN 1\n", - "Peter Adam ROSE 1\n", - "Kamel HAMACHE 1\n", - "Name: name, Length: 6636, dtype: int64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "regent_street_network.officer_ids['name'].value_counts()" - ] - }, - { - "cell_type": "markdown", - "id": "d6a22e40", - "metadata": {}, - "source": [ - "A quick news lookup on two of the officers in the top 5, J. Beardsley of Helve TCS Limited and S. Poppleton reveal these names to be connected to several known instances of fraud:\n", - "- [Fraudster duo jailed for their part in defrauding millions of pounds from over 100 victims - Crown Prosecution Service](https://www.cps.gov.uk/cps/news/fraudster-duo-jailed-their-part-defrauding-millions-pounds-over-100-victims)\n", - "- [Print farming companies struck off - Printweek](https://www.printweek.com/news/article/print-farming-companies-struck-off)\n", - "- [Rogue book publishers slammed shut by the courts - Gov.uk](https://www.gov.uk/government/news/rogue-book-publishers-slammed-shut-by-the-courts)" - ] - }, - { - "cell_type": "markdown", - "id": "f0699a27", - "metadata": {}, - "source": [ - "### Busier Addresses and Downloaded Data" - ] - }, - { - "cell_type": "markdown", - "id": "944525cb", - "metadata": {}, - "source": [ - "There are situations where some addresses have thousands or even tens of thousands of companies registered. Companies House provides two methods for getting company data, API and data product. We used the API to get the information above which returns all active and dissolved companies registered to the address. We get the same result when we attempt to perform an advanced company search using this address through the website:" - ] - }, - { - "cell_type": "markdown", - "id": "c307994f", - "metadata": {}, - "source": [ - "![title](assets/images/regent.png)" - ] - }, - { - "cell_type": "markdown", - "id": "517e6aaa", - "metadata": {}, - "source": [ - "Unfortunately the API is limited to returing 5000 result max. This is fine in our case with 207 Regent Street because we're just under the limit. However there are much bigger fish out there for instance, '75 Shelton Street':" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1f40ee11", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hop number: 1\n", - "Processed 1/1 addresses.\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nodetypemaxsize_typesize
071-75, Shelton Street, Covent Garden, London, ...AddressCompanies5000
\n", - "
" - ], - "text/plain": [ - " node type maxsize_type \\\n", - "0 71-75, Shelton Street, Covent Garden, London, ... Address Companies \n", - "\n", - " size \n", - "0 5000 " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n", - "shelton_street_network.perform_hop(1)\n", - "shelton_street_network.maxsize_entities" - ] - }, - { - "cell_type": "markdown", - "id": "6f1abb52", - "metadata": {}, - "source": [ - "We can already see its over 5000 limit for the API. If we check online we can see the number is huge: " - ] - }, - { - "cell_type": "markdown", - "id": "03b64f03", - "metadata": {}, - "source": [ - "![title](assets/images/shelton.png)" - ] - }, - { - "cell_type": "markdown", - "id": "f9fda7a6", - "metadata": {}, - "source": [ - "This is where the data product comes in. We can download it in one go and use it to get all of the \"active\" companies. To use the data product:\n", - "1. Download it from [here](http://download.companieshouse.gov.uk/en_output.html) (might take some time as its a pretty large file ~430Mb)\n", - "2. Move it to local directory `assets/company_data/` and unzip the file \n", - "3. Load into a dataframe which we can pass to our network class\n", - "\n", - "Might take a minute to load. How adjust the file string below and attempt to load it into `company_data`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9d9d0080", - "metadata": {}, - "outputs": [], - "source": [ - "company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")" - ] - }, - { - "cell_type": "markdown", - "id": "2273cf39", - "metadata": {}, - "source": [ - "Now lets try get every company at the very overcrowded 71-75 Shelton Street address:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "3e273ce0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hop number: 1\n", - "Processed 1/1 addresses.\n" - ] - } - ], - "source": [ - "shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n", - "shelton_street_network.hop.companies_at_address_maxsize = None\n", - "shelton_street_network.hop.officers_at_address_maxsize = None\n", - "shelton_street_network.get_officers_at_address = False\n", - "shelton_street_network.perform_hop(1, company_data= company_data)" - ] - }, - { - "cell_type": "markdown", - "id": "820a908d", - "metadata": {}, - "source": [ - "If we check `company_ids` we have over 70000 companies that we could build a network from if we had lots of time on our hands:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "12acb915", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
company_idnlink_typenode_typenode
0028711001Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
1029657701Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
2007505931Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
3125394141Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
4133263071Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
..................
74859058312391Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
74860078464041Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
74861118692151Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
74862131867881Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
74863136408881Company at AddressAddress71-75, Shelton Street, Covent Garden, London, ...
\n", - "

74864 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " company_id n link_type node_type \\\n", - "0 02871100 1 Company at Address Address \n", - "1 02965770 1 Company at Address Address \n", - "2 00750593 1 Company at Address Address \n", - "3 12539414 1 Company at Address Address \n", - "4 13326307 1 Company at Address Address \n", - "... ... .. ... ... \n", - "74859 05831239 1 Company at Address Address \n", - "74860 07846404 1 Company at Address Address \n", - "74861 11869215 1 Company at Address Address \n", - "74862 13186788 1 Company at Address Address \n", - "74863 13640888 1 Company at Address Address \n", - "\n", - " node \n", - "0 71-75, Shelton Street, Covent Garden, London, ... \n", - "1 71-75, Shelton Street, Covent Garden, London, ... \n", - "2 71-75, Shelton Street, Covent Garden, London, ... \n", - "3 71-75, Shelton Street, Covent Garden, London, ... \n", - "4 71-75, Shelton Street, Covent Garden, London, ... \n", - "... ... \n", - "74859 71-75, Shelton Street, Covent Garden, London, ... \n", - "74860 71-75, Shelton Street, Covent Garden, London, ... \n", - "74861 71-75, Shelton Street, Covent Garden, London, ... \n", - "74862 71-75, Shelton Street, Covent Garden, London, ... \n", - "74863 71-75, Shelton Street, Covent Garden, London, ... \n", - "\n", - "[74864 rows x 5 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "shelton_street_network.company_ids" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/assets/.DS_Store b/assets/.DS_Store deleted file mode 100644 index c71ecb5..0000000 Binary files a/assets/.DS_Store and /dev/null differ diff --git a/assets/images/.DS_Store b/assets/images/.DS_Store deleted file mode 100644 index 1f38700..0000000 Binary files a/assets/images/.DS_Store and /dev/null differ diff --git a/assets/networks/kingdom_of_sweets_network.pickle b/assets/networks/kingdom_of_sweets_network.pickle index a71da7c..f916140 100644 Binary files a/assets/networks/kingdom_of_sweets_network.pickle and b/assets/networks/kingdom_of_sweets_network.pickle differ diff --git a/environment.yml b/config/environment.yml similarity index 100% rename from environment.yml rename to config/environment.yml diff --git a/config/requirements.txt b/config/requirements.txt new file mode 100644 index 0000000..670ba2c --- /dev/null +++ b/config/requirements.txt @@ -0,0 +1,100 @@ +anyio==3.6.2 +appnope==0.1.3 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +arrow==1.2.3 +asttokens==2.2.1 +attrs==22.2.0 +Babel==2.11.0 +backcall==0.2.0 +beautifulsoup4==4.11.1 +bleach==5.0.1 +bqplot==0.12.36 +branca==0.6.0 +cffi==1.15.1 +charset-normalizer==2.1.1 +comm==0.1.2 +debugpy==1.6.4 +decorator==5.1.1 +defusedxml==0.7.1 +entrypoints==0.4 +executing==1.2.0 +fastjsonschema==2.16.2 +fqdn==1.5.1 +idna==3.4 +importlib-metadata==5.2.0 +ipykernel==6.19.4 +ipyleaflet==0.17.2 +ipython==8.7.0 +ipython-genutils==0.2.0 +ipywidgets==8.0.4 +isoduration==20.11.0 +jedi==0.18.2 +Jinja2==3.1.2 +json5==0.9.10 +jsonpointer==2.3 +jsonschema==4.17.3 +jupyter-events==0.5.0 +jupyter-server==1.23.4 +jupyter_client==7.4.1 +jupyter_core==5.1.1 +jupyter_server_terminals==0.4.3 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.5 +jupyterlab_server==2.17.0 +MarkupSafe==2.1.1 +matplotlib-inline==0.1.6 +mistune==2.0.4 +nbclassic==0.4.8 +nbclient==0.7.2 +nbconvert==7.2.7 +nbformat==5.7.1 +nest-asyncio==1.5.6 +notebook==6.5.2 +notebook_shim==0.2.2 +numpy==1.24.0 +packaging==22.0 +pandas==1.5.2 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +platformdirs==2.6.0 +prometheus-client==0.15.0 +prompt-toolkit==3.0.36 +psutil==5.9.4 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +Pygments==2.13.0 +pyrsistent==0.19.2 +python-dateutil==2.8.2 +python-json-logger==2.0.4 +pytz==2022.7 +PyYAML==6.0 +pyzmq==24.0.1 +regex==2022.10.31 +requests==2.28.1 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +Send2Trash==1.8.0 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.3.2.post1 +stack-data==0.6.2 +terminado==0.17.1 +tinycss2==1.2.1 +tornado==6.2 +traitlets==5.8.0 +traittypes==0.2.1 +uri-template==1.2.0 +urllib3==1.26.13 +voila==0.4.0 +wcwidth==0.2.5 +webcolors==1.12 +webencodings==0.5.1 +websocket-client==1.4.2 +websockets==10.4 +widgetsnbextension==4.0.5 +xyzservices==2022.9.0 +zipp==3.11.0 diff --git a/dashboard/.ipynb_checkpoints/quickstart_voila-checkpoint.ipynb b/dashboard/.ipynb_checkpoints/quickstart_voila-checkpoint.ipynb new file mode 100644 index 0000000..ae13187 --- /dev/null +++ b/dashboard/.ipynb_checkpoints/quickstart_voila-checkpoint.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b6926e35", + "metadata": {}, + "source": [ + "# sugartrail " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f17ebdd2", + "metadata": {}, + "outputs": [], + "source": [ + "from sugartrail import mapview, api, base\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbc5e202", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "network = base.Network()" + ] + }, + { + "cell_type": "markdown", + "id": "1704e377", + "metadata": {}, + "source": [ + "1. Insert your [Companies House API](https://developer.company-information.service.gov.uk/how-to-create-an-application) key:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0632780b", + "metadata": {}, + "outputs": [], + "source": [ + "API_input = widgets.Text(\n", + " value='',\n", + " placeholder='Insert API Key',\n", + " disabled=False\n", + ")\n", + "\n", + "auth_status = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "auth_button = widgets.Button(description='Authenticate',button_style='success')\n", + "auth_button.on_click(lambda bt: auth())\n", + "\n", + "def auth():\n", + " auth_button.disabled=True\n", + " API_input.disabled=True\n", + " api.basic_auth.username = API_input.value\n", + " if api.test():\n", + " auth_status.value = u'\\u2705: Login Successful'\n", + " company_text.disabled = False\n", + " init_button.disabled = False\n", + " else:\n", + " auth_button.disabled=False\n", + " API_input.disabled=False\n", + " auth_status.value = u'\\u274c: Invalid API key'\n", + "\n", + "display(API_input, auth_button, auth_status)" + ] + }, + { + "cell_type": "markdown", + "id": "2bd8c5be", + "metadata": {}, + "source": [ + "2. Insert the unique company registration number (CRN) for a company you would like to investigate:" + ] + }, + { + "cell_type": "markdown", + "id": "d5f9b6ad", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "128106c5", + "metadata": {}, + "outputs": [], + "source": [ + "company_text = widgets.Text(\n", + " value='',\n", + " placeholder='Insert Company ID',\n", + " disabled=True\n", + ")\n", + "\n", + "init_status = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "init_button = widgets.Button(description='Initialise',button_style='success', disabled=True)\n", + "init_button.on_click(lambda bt: init_network()) \n", + "\n", + "def init_network():\n", + " init_button.disabled=True\n", + " company_text.disabled=True\n", + " api.basic_auth.username = API_input.value\n", + " response = api.get_company(str(company_text.value))\n", + " if response:\n", + " network.company_id = str(company_text.value)\n", + " init_status.value = u'\\u2705: Initialisation Successful for ' + str(response['company_name']) \n", + " depth_selector.disabled = False\n", + " generate_network_button.disabled = False\n", + " else:\n", + " auth_button.disabled=False\n", + " API_input.disabled=False\n", + " init_status.value = u'\\u274c: Initialisation Failed. No records for company: ' + str(company_text.value) + ' found.'\n", + "\n", + "display(company_text, init_button, init_status)" + ] + }, + { + "cell_type": "markdown", + "id": "addafb36", + "metadata": {}, + "source": [ + "3. Select the depth of the network you would like to build:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea0e8392", + "metadata": {}, + "outputs": [], + "source": [ + "depth_selector = widgets.BoundedIntText(\n", + " value=1,\n", + " min=1,\n", + " max=5,\n", + " step=1,\n", + " disabled=True\n", + ")\n", + "\n", + "generate_network_button = widgets.Button(description='Build Network',button_style='success', disabled=True)\n", + "generate_network_button.on_click(lambda bt: generate_network()) \n", + "\n", + "\n", + "build_status = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "def generate_network():\n", + " with output_box:\n", + " depth_selector.disabled = True\n", + " generate_network_button.disabled = True\n", + " network.perform_hop(depth_selector.value + 1)\n", + " network.run_map_preprocessing()\n", + " build_map_button.disabled = False\n", + " \n", + " \n", + "output_box = widgets.Output()\n", + "display(depth_selector, generate_network_button, build_status, output_box)" + ] + }, + { + "cell_type": "markdown", + "id": "03ffce05", + "metadata": {}, + "source": [ + "4. Visualise network on a map:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6449cd96", + "metadata": {}, + "outputs": [], + "source": [ + "build_map_button=widgets.Button(description='Build Map',button_style='success', disabled=True)\n", + "build_map_button.on_click(lambda bt: generate_map()) \n", + "\n", + "map_container = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "def generate_map():\n", + " map_data,path_table = mapview.build_map(network, clear_widget=False) \n", + " hbox = widgets.HBox([path_table])\n", + " vbox.children = [map_data, hbox]\n", + " accordion.selected_index=0\n", + " accordi0n.open(0)\n", + " build_map_button.disabled = True\n", + "\n", + "display(build_map_button, map_container)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5a07e3b", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "empty_network = base.Network()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "01dca0cf", + "metadata": { + "scrolled": true, + "tags": [ + "7" + ] + }, + "outputs": [], + "source": [ + "map_data,path_table = mapview.build_map(network, clear_widget=False) \n", + "hbox = widgets.HBox([path_table])\n", + "vbox = widgets.VBox([map_data, hbox])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "684a116e", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "08a9390c88cb49cfb5705f3a0b378ced", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Accordion(children=(VBox(children=(Map(bottom=87768.0, center=[51.27331450324598, -3.223454500000008], control…" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accordion = widgets.Accordion(children=[vbox])\n", + "accordion.set_title(0, 'Map')\n", + "# accordion.set_title(1, 'Text')\n", + "accordion" + ] + }, + { + "cell_type": "markdown", + "id": "457bf4d0", + "metadata": {}, + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dashboard/quickstart_voila.ipynb b/dashboard/quickstart_voila.ipynb new file mode 100644 index 0000000..ae13187 --- /dev/null +++ b/dashboard/quickstart_voila.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b6926e35", + "metadata": {}, + "source": [ + "# sugartrail " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f17ebdd2", + "metadata": {}, + "outputs": [], + "source": [ + "from sugartrail import mapview, api, base\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbc5e202", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "network = base.Network()" + ] + }, + { + "cell_type": "markdown", + "id": "1704e377", + "metadata": {}, + "source": [ + "1. Insert your [Companies House API](https://developer.company-information.service.gov.uk/how-to-create-an-application) key:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0632780b", + "metadata": {}, + "outputs": [], + "source": [ + "API_input = widgets.Text(\n", + " value='',\n", + " placeholder='Insert API Key',\n", + " disabled=False\n", + ")\n", + "\n", + "auth_status = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "auth_button = widgets.Button(description='Authenticate',button_style='success')\n", + "auth_button.on_click(lambda bt: auth())\n", + "\n", + "def auth():\n", + " auth_button.disabled=True\n", + " API_input.disabled=True\n", + " api.basic_auth.username = API_input.value\n", + " if api.test():\n", + " auth_status.value = u'\\u2705: Login Successful'\n", + " company_text.disabled = False\n", + " init_button.disabled = False\n", + " else:\n", + " auth_button.disabled=False\n", + " API_input.disabled=False\n", + " auth_status.value = u'\\u274c: Invalid API key'\n", + "\n", + "display(API_input, auth_button, auth_status)" + ] + }, + { + "cell_type": "markdown", + "id": "2bd8c5be", + "metadata": {}, + "source": [ + "2. Insert the unique company registration number (CRN) for a company you would like to investigate:" + ] + }, + { + "cell_type": "markdown", + "id": "d5f9b6ad", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "128106c5", + "metadata": {}, + "outputs": [], + "source": [ + "company_text = widgets.Text(\n", + " value='',\n", + " placeholder='Insert Company ID',\n", + " disabled=True\n", + ")\n", + "\n", + "init_status = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "init_button = widgets.Button(description='Initialise',button_style='success', disabled=True)\n", + "init_button.on_click(lambda bt: init_network()) \n", + "\n", + "def init_network():\n", + " init_button.disabled=True\n", + " company_text.disabled=True\n", + " api.basic_auth.username = API_input.value\n", + " response = api.get_company(str(company_text.value))\n", + " if response:\n", + " network.company_id = str(company_text.value)\n", + " init_status.value = u'\\u2705: Initialisation Successful for ' + str(response['company_name']) \n", + " depth_selector.disabled = False\n", + " generate_network_button.disabled = False\n", + " else:\n", + " auth_button.disabled=False\n", + " API_input.disabled=False\n", + " init_status.value = u'\\u274c: Initialisation Failed. No records for company: ' + str(company_text.value) + ' found.'\n", + "\n", + "display(company_text, init_button, init_status)" + ] + }, + { + "cell_type": "markdown", + "id": "addafb36", + "metadata": {}, + "source": [ + "3. Select the depth of the network you would like to build:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea0e8392", + "metadata": {}, + "outputs": [], + "source": [ + "depth_selector = widgets.BoundedIntText(\n", + " value=1,\n", + " min=1,\n", + " max=5,\n", + " step=1,\n", + " disabled=True\n", + ")\n", + "\n", + "generate_network_button = widgets.Button(description='Build Network',button_style='success', disabled=True)\n", + "generate_network_button.on_click(lambda bt: generate_network()) \n", + "\n", + "\n", + "build_status = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "def generate_network():\n", + " with output_box:\n", + " depth_selector.disabled = True\n", + " generate_network_button.disabled = True\n", + " network.perform_hop(depth_selector.value + 1)\n", + " network.run_map_preprocessing()\n", + " build_map_button.disabled = False\n", + " \n", + " \n", + "output_box = widgets.Output()\n", + "display(depth_selector, generate_network_button, build_status, output_box)" + ] + }, + { + "cell_type": "markdown", + "id": "03ffce05", + "metadata": {}, + "source": [ + "4. Visualise network on a map:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6449cd96", + "metadata": {}, + "outputs": [], + "source": [ + "build_map_button=widgets.Button(description='Build Map',button_style='success', disabled=True)\n", + "build_map_button.on_click(lambda bt: generate_map()) \n", + "\n", + "map_container = widgets.HTML(\n", + " value=\"\",\n", + ")\n", + "\n", + "def generate_map():\n", + " map_data,path_table = mapview.build_map(network, clear_widget=False) \n", + " hbox = widgets.HBox([path_table])\n", + " vbox.children = [map_data, hbox]\n", + " accordion.selected_index=0\n", + " accordi0n.open(0)\n", + " build_map_button.disabled = True\n", + "\n", + "display(build_map_button, map_container)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5a07e3b", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "empty_network = base.Network()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "01dca0cf", + "metadata": { + "scrolled": true, + "tags": [ + "7" + ] + }, + "outputs": [], + "source": [ + "map_data,path_table = mapview.build_map(network, clear_widget=False) \n", + "hbox = widgets.HBox([path_table])\n", + "vbox = widgets.VBox([map_data, hbox])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "684a116e", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "08a9390c88cb49cfb5705f3a0b378ced", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Accordion(children=(VBox(children=(Map(bottom=87768.0, center=[51.27331450324598, -3.223454500000008], control…" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accordion = widgets.Accordion(children=[vbox])\n", + "accordion.set_title(0, 'Map')\n", + "# accordion.set_title(1, 'Text')\n", + "accordion" + ] + }, + { + "cell_type": "markdown", + "id": "457bf4d0", + "metadata": {}, + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Tutorial 1 - Get Started.ipynb b/notebooks/.ipynb_checkpoints/001_getting_started-checkpoint.ipynb similarity index 97% rename from Tutorial 1 - Get Started.ipynb rename to notebooks/.ipynb_checkpoints/001_getting_started-checkpoint.ipynb index 0ea9fd4..9f9d5b1 100644 --- a/Tutorial 1 - Get Started.ipynb +++ b/notebooks/.ipynb_checkpoints/001_getting_started-checkpoint.ipynb @@ -30,12 +30,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "81c37bf3", "metadata": {}, "outputs": [], "source": [ "from sugartrail import api, mapview, base\n", + "from ipywidgets import VBox, HBox\n", "\n", "api.basic_auth.username = \"\"" ] @@ -94,7 +95,7 @@ "id": "f73b17d8", "metadata": {}, "source": [ - "![title](assets/images/spy.png)" + "![title](../assets/images/spy.png)" ] }, { @@ -110,7 +111,7 @@ "id": "e21f3c98", "metadata": {}, "source": [ - "![title](assets/images/scrooge.png)" + "![title](../assets/images/scrooge.png)" ] }, { @@ -440,8 +441,7 @@ }, "outputs": [], "source": [ - "from ipywidgets import VBox, HBox\n", - "map_data,path_table = mapview.build_map(network) \n", + "map_data, path_table = mapview.build_map(network) \n", "hbox = HBox([path_table])\n", "vbox = VBox([map_data, hbox])\n", "vbox" @@ -467,7 +467,7 @@ "id": "f6674e52", "metadata": {}, "source": [ - "\"Drawing\"\n" + "\"Drawing\"\n" ] }, { @@ -505,7 +505,7 @@ "source": [ "import pickle\n", "\n", - "with open('assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n", + "with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n", " pickle.dump(network, handle)" ] }, @@ -516,7 +516,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n", + "with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n", " network = pickle.load(handle)" ] } @@ -537,7 +537,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.9.15" } }, "nbformat": 4, diff --git a/Tutorial 2 - Candy Connections.ipynb b/notebooks/.ipynb_checkpoints/002_candy_connections-checkpoint.ipynb similarity index 94% rename from Tutorial 2 - Candy Connections.ipynb rename to notebooks/.ipynb_checkpoints/002_candy_connections-checkpoint.ipynb index b3683b5..ddca3d3 100644 --- a/Tutorial 2 - Candy Connections.ipynb +++ b/notebooks/.ipynb_checkpoints/002_candy_connections-checkpoint.ipynb @@ -122,7 +122,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
470-482 Oxford Street
\n", "
" ] @@ -174,7 +174,7 @@ "outputs": [], "source": [ "# import pickle\n", - "# with open('assets/networks/western_crown_network.pickle', 'rb') as handle:\n", + "# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n", "# western_crown_network = pickle.load(handle)" ] }, @@ -231,7 +231,7 @@ "metadata": {}, "outputs": [], "source": [ - "pd.DataFrame(western_crown_network.find_path('05548476'))[['node_index', 'node', 'hop', 'node_type', 'link']]" + "pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]" ] }, { @@ -240,7 +240,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
537 Oxford Street
\n", "
" ] @@ -265,7 +265,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
524 Oxford Street
\n", "
" ] @@ -291,7 +291,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
470-482 Oxford Street
\n", "
" ] @@ -311,7 +311,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
447 Oxford Street
\n", "
" ] @@ -331,7 +331,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
407-409 Oxford Street
\n", "
" ] @@ -351,7 +351,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
267-269 Oxford Street
\n", "
" ] @@ -371,7 +371,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
263-265 Oxford Street
\n", "
" ] @@ -400,7 +400,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
240-242 Oxford Street
\n", "
" ] @@ -426,7 +426,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
158 Oxford Street
\n", "
" ] @@ -447,7 +447,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
146-148 Oxford Street
\n", "
" ] @@ -478,7 +478,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
142 Oxford Street
\n", "
" ] @@ -499,7 +499,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
41 Oxford Street
\n", "
" ] @@ -519,7 +519,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
37-39 Oxford Street
\n", "
" ] @@ -556,7 +556,7 @@ "metadata": {}, "source": [ "
\n", - "\n", + "\n", "
4 Oxford Street
\n", "
" ] @@ -607,7 +607,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.9.15" } }, "nbformat": 4, diff --git a/notebooks/.ipynb_checkpoints/003_virtual_offices-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/003_virtual_offices-checkpoint.ipynb new file mode 100644 index 0000000..bb4818a --- /dev/null +++ b/notebooks/.ipynb_checkpoints/003_virtual_offices-checkpoint.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b2110da7", + "metadata": {}, + "source": [ + "*In this tutorial we will investigate addresses with a large number of companies registered via the API and Companies House Data Product download.*" + ] + }, + { + "cell_type": "markdown", + "id": "25528662", + "metadata": {}, + "source": [ + "### Busy Addresses and API Limits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab9e8ee0", + "metadata": {}, + "outputs": [], + "source": [ + "from sugartrail import base, api, mapview\n", + "import pandas as pd\n", + "api.basic_auth.username = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "00c6a5be", + "metadata": {}, + "source": [ + "When navigating Companies House there are times that we will run into some very popular addresses. For example lets say build a network from [this officer](https://find-and-update.company-information.service.gov.uk/officers/Nd2URspq4bvLy-hwzDZ0_p7FGJw/appointments):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "723f234a", + "metadata": {}, + "outputs": [], + "source": [ + "officer_id = \"Nd2URspq4bvLy-hwzDZ0_p7FGJw\"\n", + "network = base.Network(officer_id=officer_id)\n", + "network.perform_hop(2)" + ] + }, + { + "cell_type": "markdown", + "id": "edad561e", + "metadata": {}, + "source": [ + "Within 2 hops we've got over 60 addresses (although many of them look like duplicate entries):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eea32631", + "metadata": {}, + "outputs": [], + "source": [ + "network.addresses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ce897c0", + "metadata": {}, + "outputs": [], + "source": [ + "network.addresses['address'].unique()" + ] + }, + { + "cell_type": "markdown", + "id": "8c17fff5", + "metadata": {}, + "source": [ + "If we check out the `maxsize_entities` property of our Network class, we will see a dataframe containing all of the addresses and officers that have exceeded the maxsize limits imposed in the Hop class. In this case, we can see one of the addresses in the network has over 4800 companies based there." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b8d3c20", + "metadata": {}, + "outputs": [], + "source": [ + "network.maxsize_entities" + ] + }, + { + "cell_type": "markdown", + "id": "5ad7b443", + "metadata": {}, + "source": [ + "Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f94f731", + "metadata": {}, + "outputs": [], + "source": [ + "network.hop.companies_at_address_maxsize" + ] + }, + { + "cell_type": "markdown", + "id": "2d4edaf0", + "metadata": {}, + "source": [ + "If we check `companies_id` we'll notice it hasn't had 4800 companies added to it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3ef12fe", + "metadata": {}, + "outputs": [], + "source": [ + "len(network.company_ids['company_id'].unique())" + ] + }, + { + "cell_type": "markdown", + "id": "d177f1b5", + "metadata": {}, + "source": [ + "Including limits is useful to avoid our databases getting clogged up with random companies. \n", + "Although lets pause to briefly explore what address would have thousands of companies registered there?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bb8bdf1", + "metadata": {}, + "outputs": [], + "source": [ + "network.maxsize_entities['node'][0]" + ] + }, + { + "cell_type": "markdown", + "id": "e8644d6b", + "metadata": {}, + "source": [ + "![title](../assets/images/regent_storefront.jpeg)" + ] + }, + { + "cell_type": "markdown", + "id": "40354a28", + "metadata": {}, + "source": [ + "\"3rd Floor, 207, Regent Street\" is a \"virtual office\" run by a company called [Hold Everything](https://www.hold-everything.com/). Businesses can use this address for correspondance/registration for £24 a month:" + ] + }, + { + "cell_type": "markdown", + "id": "11b08c79", + "metadata": {}, + "source": [ + "![title](../assets/images/exclusive.png)" + ] + }, + { + "cell_type": "markdown", + "id": "2c9e85ed", + "metadata": {}, + "source": [ + "However the large number of companies registered at a single address can lead to many instances of mistaken identity. Just because a company is registered at a virtual office does not mean it has any connection with other companies registered there.:" + ] + }, + { + "cell_type": "markdown", + "id": "be5e4352", + "metadata": {}, + "source": [ + "![title](../assets/images/review.png)" + ] + }, + { + "cell_type": "markdown", + "id": "282ba8ea", + "metadata": {}, + "source": [ + "Numerous media outlets have reported on fraudulent companies that use virtual offices and incorporation services: \n", + "- Kemp House, 162 City Road | Capital Officer: [Mystery group took millions in furlough funds - Financial Times](https://www.ft.com/content/b3c70369-5170-47ca-b779-fc0898fd29e6)\n", + "- 20-22 Wenlock Road | Made Simple: [Court shuts down companies behind £9m truffle scam - Gov.uk](https://www.gov.uk/government/news/court-shuts-down-companies-behind-9m-truffle-scam)\n", + "- 2 Woodberry Down | A1 Company Services [How A Suburban North London House Is Connected To The Paul Manafort Indictment - Huffington Post](https://www.huffingtonpost.co.uk/entry/manfort-london-connection_uk_59f72f50e4b07fdc5fbf92c7)\n", + "- 29 Harley Street | Formations House [Offshore in central London: the curious case of 29 Harley Street - The Guardian](https://www.theguardian.com/business/2016/apr/19/offshore-central-london-curious-case-29-harley-street)\n", + "- 63-66 Hatton Garden | Valemont Properties Ltd [The Global Laundromat: how did it work and who benefited? - The Guardian](https://www.theguardian.com/world/2017/mar/20/the-global-laundromat-how-did-it-work-and-who-benefited)" + ] + }, + { + "cell_type": "markdown", + "id": "a85fdcfa", + "metadata": {}, + "source": [ + "If we wanted to get all companies listed at 207 Regent Street we can adjust our maxsize limits to `None` and attempt to perform a hop again:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb0c02d0", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network = base.Network(address='3rd Floor, 207 Regent Street London W1B 3HH England')\n", + "regent_street_network.hop.companies_at_address_maxsize = None\n", + "regent_street_network.hop.officers_at_address_maxsize = None\n", + "regent_street_network.perform_hop(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dc0f165", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network.company_ids" + ] + }, + { + "cell_type": "markdown", + "id": "cff1061e", + "metadata": {}, + "source": [ + "Such large networks can still be interesting to analyse. For instance if we perform another hop this will get all the officers for every company at the address. This will take several hours to build as we have lots of companies to analyse, however if we want to save time we could just uncomment and load a pre-made network below: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef262359", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network.perform_hop(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38937142", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n", + " regent_street_network = pickle.load(handle)" + ] + }, + { + "cell_type": "markdown", + "id": "d6e330ee", + "metadata": {}, + "source": [ + "Analysing the most frequently occuring officers running businesses from 207 Regent Street returns some very busy officers and incorporation agents:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e97fa3b", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network.officer_ids['name'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "d6a22e40", + "metadata": {}, + "source": [ + "A quick news lookup on two of the officers in the top 5, J. Beardsley of Helve TCS Limited and S. Poppleton reveal these names to be connected to several known instances of fraud:\n", + "- [Fraudster duo jailed for their part in defrauding millions of pounds from over 100 victims - Crown Prosecution Service](https://www.cps.gov.uk/cps/news/fraudster-duo-jailed-their-part-defrauding-millions-pounds-over-100-victims)\n", + "- [Print farming companies struck off - Printweek](https://www.printweek.com/news/article/print-farming-companies-struck-off)\n", + "- [Rogue book publishers slammed shut by the courts - Gov.uk](https://www.gov.uk/government/news/rogue-book-publishers-slammed-shut-by-the-courts)" + ] + }, + { + "cell_type": "markdown", + "id": "f0699a27", + "metadata": {}, + "source": [ + "### Busier Addresses and Downloaded Data" + ] + }, + { + "cell_type": "markdown", + "id": "944525cb", + "metadata": {}, + "source": [ + "There are situations where some addresses have thousands or even tens of thousands of companies registered. Companies House provides two methods for getting company data, API and data product. We used the API to get the information above which returns all active and dissolved companies registered to the address. We get the same result when we attempt to perform an advanced company search using this address through the website:" + ] + }, + { + "cell_type": "markdown", + "id": "c307994f", + "metadata": {}, + "source": [ + "![title](../assets/images/regent.png)" + ] + }, + { + "cell_type": "markdown", + "id": "517e6aaa", + "metadata": {}, + "source": [ + "Unfortunately the API is limited to returing 5000 result max. This is fine in our case with 207 Regent Street because we're just under the limit. However there are much bigger fish out there for instance, '75 Shelton Street':" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f40ee11", + "metadata": {}, + "outputs": [], + "source": [ + "shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n", + "shelton_street_network.perform_hop(1)\n", + "shelton_street_network.maxsize_entities" + ] + }, + { + "cell_type": "markdown", + "id": "6f1abb52", + "metadata": {}, + "source": [ + "We can already see its over 5000 limit for the API. If we check online we can see the number is huge: " + ] + }, + { + "cell_type": "markdown", + "id": "03b64f03", + "metadata": {}, + "source": [ + "![title](../assets/images/shelton.png)" + ] + }, + { + "cell_type": "markdown", + "id": "f9fda7a6", + "metadata": {}, + "source": [ + "This is where the data product comes in. We can download it in one go and use it to get all of the \"active\" companies. To use the data product:\n", + "1. Download it from [here](http://download.companieshouse.gov.uk/en_output.html) (might take some time as its a pretty large file ~430Mb)\n", + "2. Move it to local directory `assets/company_data/` and unzip the file \n", + "3. Load into a dataframe which we can pass to our network class\n", + "\n", + "Might take a minute to load. How adjust the file string below and attempt to load it into `company_data`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d9d0080", + "metadata": {}, + "outputs": [], + "source": [ + "company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "2273cf39", + "metadata": {}, + "source": [ + "Now lets try get every company at the very overcrowded 71-75 Shelton Street address:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e273ce0", + "metadata": {}, + "outputs": [], + "source": [ + "shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n", + "shelton_street_network.hop.companies_at_address_maxsize = None\n", + "shelton_street_network.hop.officers_at_address_maxsize = None\n", + "shelton_street_network.get_officers_at_address = False\n", + "shelton_street_network.perform_hop(1, company_data= company_data)" + ] + }, + { + "cell_type": "markdown", + "id": "820a908d", + "metadata": {}, + "source": [ + "If we check `company_ids` we have over 70000 companies that we could build a network from if we had lots of time on our hands:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12acb915", + "metadata": {}, + "outputs": [], + "source": [ + "shelton_street_network.company_ids" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Quickstart.ipynb b/notebooks/.ipynb_checkpoints/quickstart-checkpoint.ipynb similarity index 95% rename from Quickstart.ipynb rename to notebooks/.ipynb_checkpoints/quickstart-checkpoint.ipynb index 40b3880..7d4906b 100644 --- a/Quickstart.ipynb +++ b/notebooks/.ipynb_checkpoints/quickstart-checkpoint.ipynb @@ -34,10 +34,10 @@ "metadata": {}, "outputs": [], "source": [ - "## network build from Domain Foundation, company_id = \"11951034\"\n", + "# # network build from Domain Foundation, company_id = \"11951034\"\n", "# import pickle\n", "\n", - "# with open('assets/networks/domain_corp_network.pickle', 'rb') as handle:\n", + "# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n", "# network = pickle.load(handle)" ] }, @@ -121,7 +121,7 @@ "execution_count": null, "id": "01dca0cf", "metadata": { - "scrolled": true, + "scrolled": false, "tags": [ "7" ] diff --git a/notebooks/001_getting_started.ipynb b/notebooks/001_getting_started.ipynb new file mode 100644 index 0000000..9f9d5b1 --- /dev/null +++ b/notebooks/001_getting_started.ipynb @@ -0,0 +1,545 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0639ca05", + "metadata": {}, + "source": [ + "*In this tutorial we will walk through the capabilities of the tool in depth.*" + ] + }, + { + "cell_type": "markdown", + "id": "538c9eb1", + "metadata": {}, + "source": [ + "### Introduction \n", + "\n", + "'Sugartrail' was developed to make it easier and faster for researchers to explore connections between companies, persons and addresses within [Companies House](https://www.gov.uk/government/organisations/companies-house). Researchers can build networks of connected companies, persons and addresses based on a defined set of connectivity criteria and then visualise these connections through an [OpenStreetMaps interface](https://ipyleaflet.readthedocs.io/en/latest/index.html)." + ] + }, + { + "cell_type": "markdown", + "id": "eee8d524", + "metadata": {}, + "source": [ + "### Prerequisites\n", + "\n", + "Sugartrail uses the [Companies House Public Data API](https://developer-specs.company-information.service.gov.uk/companies-house-public-data-api/reference) to gather data on connected companies, persons and addresses. To access this API you will need a key which you can aquire by registering a [user account](https://developer.company-information.service.gov.uk/get-started/). Once you've aquired the key, insert it below as the string value of `api.basic_auth.username`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81c37bf3", + "metadata": {}, + "outputs": [], + "source": [ + "from sugartrail import api, mapview, base\n", + "from ipywidgets import VBox, HBox\n", + "\n", + "api.basic_auth.username = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "ad4599dc", + "metadata": {}, + "source": [ + "Lets make a test request to validate everything works by attempting to get all the officers who work at [this company](https://find-and-update.company-information.service.gov.uk/company/12411673). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51a1dd4f", + "metadata": {}, + "outputs": [], + "source": [ + "company_id = \"12411673\"\n", + "api.get_company_officers(company_id)" + ] + }, + { + "cell_type": "markdown", + "id": "29d8dd26", + "metadata": {}, + "source": [ + "### Initialising Networks \n", + "\n", + "To create a network we start from a single company, person or address. Networks are build and stored with the `Network` class. Lets go ahead and create a new network:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63bc00fa", + "metadata": {}, + "outputs": [], + "source": [ + "network = base.Network()" + ] + }, + { + "cell_type": "markdown", + "id": "aeedf139", + "metadata": {}, + "source": [ + "`Network` accepts either a company ID, officer ID or address string as the initial node. For example, [this company](https://find-and-update.company-information.service.gov.uk/company/12411673): `company_id` = \"12411673\"\n", + "\n", + "If we wanted to search by address, then `address` = \"513 Tong Street, Flat 5, Bradford, England, BD4 6NA\"" + ] + }, + { + "cell_type": "markdown", + "id": "f73b17d8", + "metadata": {}, + "source": [ + "![title](../assets/images/spy.png)" + ] + }, + { + "cell_type": "markdown", + "id": "b3caccb6", + "metadata": {}, + "source": [ + "For [this officer](https://find-and-update.company-information.service.gov.uk/officers/6WODVBRaegvY3UvEhcQxg0OsPkc/appointments), `officer_id` = \"6WODVBRaegvY3UvEhcQxg0OsPkc\"" + ] + }, + { + "cell_type": "markdown", + "id": "e21f3c98", + "metadata": {}, + "source": [ + "![title](../assets/images/scrooge.png)" + ] + }, + { + "cell_type": "markdown", + "id": "a6198a80", + "metadata": {}, + "source": [ + "Lets build the network from `company_id`: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31eea99d", + "metadata": {}, + "outputs": [], + "source": [ + "network.company_id=\"11004735\"" + ] + }, + { + "cell_type": "markdown", + "id": "7bd5060d", + "metadata": {}, + "source": [ + "We could also just initialise the network by passing `company_id` as an input: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c70f41f", + "metadata": {}, + "outputs": [], + "source": [ + "network = base.Network(company_id=\"11004735\")" + ] + }, + { + "cell_type": "markdown", + "id": "cd0f2a9e", + "metadata": {}, + "source": [ + "Data about companies, persons and addresses are stored in several attributes within the `Network` class. If we check the `company_ids` property, we will find the entry we just created:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e12f5461", + "metadata": {}, + "outputs": [], + "source": [ + "network.company_ids" + ] + }, + { + "cell_type": "markdown", + "id": "91c14cbb", + "metadata": {}, + "source": [ + "Each company is represented by its unique ID (`company_id`), number of hops from the origin company (`n`) and the company, address or person it connects to. As we've only saved the origin company so far, there isn't any information on links or connected nodes. There are also attributes for storing officer ids (`officer_ids`) and (`addresses`) although they have no information in them yet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33ed61e2", + "metadata": {}, + "outputs": [], + "source": [ + "network.officer_ids" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5a52e6a", + "metadata": {}, + "outputs": [], + "source": [ + "network.addresses" + ] + }, + { + "cell_type": "markdown", + "id": "72f30427", + "metadata": {}, + "source": [ + "### Building Networks" + ] + }, + { + "cell_type": "markdown", + "id": "862f00ef", + "metadata": {}, + "source": [ + "We can now build the network by performing hops that will find new company IDs, officer IDs and addresses connected to the entities already stored within the network. \n", + "\n", + "There are a finite number of ways that officers, companies and addresses can be connected within Companies House:\n", + "\n", + "#### Companies \n", + "\n", + "1. Companies → Officers: companies have officers \n", + "2. Companies → Addresses: companies have a history of registered addresses \n", + "3. Companies → Addresses: companies have correspondence addresses for their persons of significant control (psc)\n", + "\n", + "#### Officers \n", + "\n", + "4. Officers → Companies: officers have appointments (companies they have a role in) \n", + "5. Officers → Addresses: officers have correspondence addresses\n", + "6. Officers → Officers: officers may have duplicate enteries within Companies House; other officers using the same name and birth date (but different values for `officer_id`\n", + "\n", + "#### Addresses \n", + "\n", + "7. Addresses → Officers: addresses are used as officer correspondence addresses \n", + "8. Addresses → Companies: addresses are used as company correspondence addresses \n", + "\n", + "To build the network we can use any combination of this connectivity criteria. The above connections are implemented as methods that get called everytime we perform a hop: \n", + "\n", + "1. get_company_officers\n", + "2. get_company_address_history\n", + "3. get_psc_correspondance_address\n", + "3. get_officer_appointments\n", + "4. get_officer_correspondance_address \n", + "5. get_officer_duplicates \n", + "6. get_officers_at_address\n", + "7. get_companies_at_address\n", + "\n", + "We can toggle each of these methods via boolean properties of the `Hop` subclass:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32643a9c", + "metadata": {}, + "outputs": [], + "source": [ + "network.hop.__dict__" + ] + }, + { + "cell_type": "markdown", + "id": "1802bb34", + "metadata": {}, + "source": [ + "We can see the `Hop` subclass contains all of the connections mentioned above set to `True` by default, therefore everytime we perform a hop, the network will use these methods to get data.\n", + "\n", + "We also notice that there are some properties setting a \"maxsize\" limit. These properties ensure that if the number of results returned by the method exceeds this limit then the results will not be stored within the `Network` class properties. This limit is quite important when building networks as some of these methods can return 1000s of results and if we're not interested in these results they can make it difficult to visualise meaningful connections within the network (see Tutorial 3 for more on this). \n", + "\n", + "Lets go ahead and perform one hop using these default settings and see what addresses, companies and officers are added:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "167cc25c", + "metadata": {}, + "outputs": [], + "source": [ + "network.perform_hop(1)" + ] + }, + { + "cell_type": "markdown", + "id": "2486aa17", + "metadata": {}, + "source": [ + "Lets now check out `company_ids`, `officer_ids` and `addresses` to see what new enteries have been added. Nothing new in `company_ids` but this is expected as none of the API methods above connect companies with companies in one hop:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6ce4047", + "metadata": {}, + "outputs": [], + "source": [ + "network.company_ids" + ] + }, + { + "cell_type": "markdown", + "id": "eb5cb2f6", + "metadata": {}, + "source": [ + "We can see we now have an officer below in `officer_ids` and some of the other properties in the table now have values other than None. `node_type` describes what the type of node the company is connected to (Company, Person or Address), `node_id` provides the unique id for the node (`company_id`, `officer_id` or `address`) and `link_type` describes the relationship between the company and the node." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "947c4cf1", + "metadata": {}, + "outputs": [], + "source": [ + "network.officer_ids" + ] + }, + { + "cell_type": "markdown", + "id": "a8cf6fa0", + "metadata": {}, + "source": [ + "We can interpret the table above as:\n", + "\n", + "There is an officer with ID=`Nd2URspq4bvLy-hwzDZ0_p7FGJw` who is an officer to a company with ID=`11004735`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7083402a", + "metadata": {}, + "outputs": [], + "source": [ + "network.addresses" + ] + }, + { + "cell_type": "markdown", + "id": "264de2dd", + "metadata": {}, + "source": [ + "We can see from the table above that:\n", + "\n", + "`3rd Floor 13 Charles Ii Street London SW1Y 4QU England` is an address that used to be home to a company (with ID=`11004735`):" + ] + }, + { + "cell_type": "markdown", + "id": "b4828d92", + "metadata": {}, + "source": [ + "For reproducibility, each time we perform a hop, the methods and limit configs are stored in " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bb5f542", + "metadata": {}, + "outputs": [], + "source": [ + "network.hop_history" + ] + }, + { + "cell_type": "markdown", + "id": "ac1dab27", + "metadata": {}, + "source": [ + "Lets perform another two hops: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2b0baba", + "metadata": {}, + "outputs": [], + "source": [ + "network.perform_hop(2)" + ] + }, + { + "cell_type": "markdown", + "id": "cec66fcc", + "metadata": {}, + "source": [ + "Now we can go ahead and visualise this in a map. To do this we need to get a bit more info that isn't present, namely the coordinates for all the addresses mentioned and the company names for each company. We can get this information via `run_map_preprocessing()`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be52255", + "metadata": {}, + "outputs": [], + "source": [ + "network.run_map_preprocessing()" + ] + }, + { + "cell_type": "markdown", + "id": "dfa1b90c", + "metadata": {}, + "source": [ + "To see the information added, we can check out `address_history` and `companies` properties of our class:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b800202c", + "metadata": {}, + "outputs": [], + "source": [ + "network.address_history" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37013a7e", + "metadata": {}, + "outputs": [], + "source": [ + "network.companies " + ] + }, + { + "cell_type": "markdown", + "id": "3e3b597d", + "metadata": {}, + "source": [ + "We can now visualise all the companies in the network with a UK address through OpenStreetMaps:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7256c5f9", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "map_data, path_table = mapview.build_map(network) \n", + "hbox = HBox([path_table])\n", + "vbox = VBox([map_data, hbox])\n", + "vbox" + ] + }, + { + "cell_type": "markdown", + "id": "7e225045", + "metadata": {}, + "source": [ + "Each marker represents a company in the network. Green markers represent active companies based at the address, red markers represent active companies no longer based at the address and black markers represent dissolved companies once based at the address. \n", + "\n", + "Select a marker to display additional information: \n", + "- pop-up with the selected company's name and address\n", + "- table containing the most efficient paths from the origin to the selected company\n", + "- antpaths for each company in the network. Red antpath represents the path through all the historic addresses for the selected company. Black antpath represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table. \n", + "\n", + "To read paths from the table we start from the bottom of the table where we find one or several rows containing our selected company (`Node`) but with differing values for `Node Index`, `Node Type` and `Link`. If we encounter multiple rows containing our selected node, this tells us there are multiple paths of equal length from the selected node (origin) to the origin. For example, consider the following table: " + ] + }, + { + "cell_type": "markdown", + "id": "f6674e52", + "metadata": {}, + "source": [ + "\"Drawing\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "fd5d9a0d", + "metadata": {}, + "source": [ + "Pick N Mix London Limited (E) is a 'company at address' for 3rd Floor 13 Charles Ii Street (C) which is a 'historic address' for Kingdom of Sweets Ltd (A).\n", + "\n", + "Additionally, Pick N Mix London Limited (D) is an appointment of (B) who is an officer of Kingdom of Sweets Ltd (A). " + ] + }, + { + "cell_type": "markdown", + "id": "4a6662be", + "metadata": {}, + "source": [ + "### Network Persistance" + ] + }, + { + "cell_type": "markdown", + "id": "a68e26ca", + "metadata": {}, + "source": [ + "The network object can be saved with 'pickle' and reloaded when needed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee8d8c24", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "\n", + "with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n", + " pickle.dump(network, handle)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e7c5578", + "metadata": {}, + "outputs": [], + "source": [ + "with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n", + " network = pickle.load(handle)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/002_candy_connections.ipynb b/notebooks/002_candy_connections.ipynb new file mode 100644 index 0000000..ddca3d3 --- /dev/null +++ b/notebooks/002_candy_connections.ipynb @@ -0,0 +1,615 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d484fa3a", + "metadata": {}, + "source": [ + "*In this tutorial we will explore how many of Oxford Street's souvenir and candy shops are connected.*" + ] + }, + { + "cell_type": "markdown", + "id": "bb98746a", + "metadata": {}, + "source": [ + "In a [recent article]('https://www.ft.com/candy') in the Financial Times, journalists mapped a number of candy shops on Oxford Street and noted that the company officers appear to form a loose network \"with some sharing residential or business addresses, or taking ownership of a business for months at a time before ceding to another shareholder\". Using the addresses and companies mentioned in the article, I compiled the following relevant records from Companies House:\n", + "\n", + "- CITY SOUVENIRS LTD (539 Oxford Street): 08658716\n", + "- London Hot Accessories Limited (537 Oxford Street): 10116914\n", + "- AMERICAN SWEET DREAMS LIMITED (524 Oxford Street): 13938312\n", + "- Western Crown Limited (470-482 Oxford Street): 13455377\n", + "- CANDYLICIOUS WHOLESALE LIMITED (470-472 Oxford Street): 14091125\n", + "- FREAKNAUGHTY LTD (407 Oxford Street): 11730327\n", + "- NASTY BANG LTD (324 Oxford Street): 14223273\n", + "- Quality Products and Merchandise Ltd (321-323 Oxford Street): 14518117\n", + "- Kingdom of Sweets (270 Oxford Street): 11004735\n", + "- LND Accesorize Limited (271 Oxford Street): 11601607\n", + "- Candystreet (146-148 Oxford Street): 12415826\n", + "- E & A Accessories Limited (35 Oxford Street): 14261732\n", + "- Breeze Vape Limited (33 Oxford Street): 14050986\n", + "- Drip Vape (33 Oxford Street): 14055609\n", + "- Gift 4 You (4 Oxford Street): 11439227\n", + "\n", + "The following companies were also mentioned however I couldn't find a relevant record in Companies House:\n", + "\n", + "- London Dream (476 Oxford Street)\n", + "- Welcome London (399-403 Oxford Street)\n", + "- American Candy Shop (385-389 Oxford Street)\n", + "- Candy World (363-367 Oxford Street)\n", + "- Tobacco & Cigarettes Sold Here (273 Oxford Street)\n", + "- Unique Gifts (159 Oxford Street)\n", + "- Toys and Gifts (142-144 Oxford Street)\n", + "- American Candy World (119-121 Oxford Street)\n", + "- American Sweets & Souvenirs (37-39 Oxford Street)\n", + "\n", + "In this tutorial we will build a large network of companies that connect to a single company on Oxford Street. We can use the following dictionary of companies for reference: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e43ba230", + "metadata": {}, + "outputs": [], + "source": [ + "candy_shops = [{\"company_name\": \"City Souvenirs\", \"address\": \"539 Oxford Street\", \"company_id\": \"08658716\"},\n", + " {\"company_name\": \"London Hot Accessories\", \"address\": \"537 Oxford Street\", \"company_id\": \"10116914\"},\n", + " {\"company_name\": \"American Sweet Dreams\", \"address\": \"524 Oxford Street\", \"company_id\": \"13938312\"},\n", + " {\"company_name\": \"London Dream\", \"address\": \"476 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"Western Crown\", \"address\": \"470-482 Oxford Street\", \"company_id\": \"13455377\"},\n", + " {\"company_name\": \"Candylicious Wholesale\", \"address\": \"470-482 Oxford Street\", \"company_id\": \"14091125\"},\n", + " {\"company_name\": \"Freakynaughty\", \"address\": \"407 Oxford Street\", \"company_id\": \"11730327\"},\n", + " {\"company_name\": \"Welcome London\", \"address\": \"399-403 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"American Candy Shop\", \"address\": \"385-389 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"Candy World\", \"address\": \"363-367 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"Nasty Bang\", \"address\": \"324 Oxford Street\", \"company_id\": \"14223273\"},\n", + " {\"company_name\": \"Quality Products and Merchandise\", \"address\": \"324 Oxford Street\", \"company_id\": \"14223273\"},\n", + " {\"company_name\": \"Tobacco & Cigarettes Sold Here\", \"address\": \"273 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"LND Accesorize\", \"address\": \"271 Oxford Street\", \"company_id\": \"11601607\"},\n", + " {\"company_name\": \"Unique Gifts\", \"address\": \"159 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"Toys and Gifts\", \"address\": \"142-144 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"Candystreet\", \"address\": \"146-148 Oxford Street\", \"company_id\": \"12415826\"},\n", + " {\"company_name\": \"American Candy World\", \"address\": \"119-121 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"American Sweets & Souvenirs\", \"address\": \"37-39 Oxford Street\", \"company_id\": \"\"},\n", + " {\"company_name\": \"E & A Accessories Limited\", \"address\": \"35 Oxford Street\", \"company_id\": \"14261732\"},\n", + " {\"company_name\": \"Breeze Vape Limited\", \"address\": \"33 Oxford Street\", \"company_id\": \"14050986\"},\n", + " {\"company_name\": \"Drip Vape\", \"address\": \"33 Oxford Street\", \"company_id\": \"14055609\"},\n", + " {\"company_name\": \"Gift 4 You\", \"address\": \"4 Oxford Street\", \"company_id\": \"11439227\"}]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c8ebc89", + "metadata": {}, + "outputs": [], + "source": [ + "from sugartrail import base, api, mapview\n", + "import pandas as pd\n", + "from ipywidgets import HTML, Widget, Layout, Output, VBox, HBox, Textarea" + ] + }, + { + "cell_type": "markdown", + "id": "26dfff93", + "metadata": {}, + "source": [ + "Add Companies House API key:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a5377a3", + "metadata": {}, + "outputs": [], + "source": [ + "api.basic_auth.username = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "ac9946b1", + "metadata": {}, + "source": [ + "Lets investigate \"Western Crown\" which has an id of \"13455377\"" + ] + }, + { + "cell_type": "markdown", + "id": "a8e5dbe1", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
470-482 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79c2e3ab", + "metadata": {}, + "outputs": [], + "source": [ + "origin_company_id=\"13455377\"\n", + "western_crown_network = base.Network(company_id=origin_company_id)" + ] + }, + { + "cell_type": "markdown", + "id": "15c23378", + "metadata": {}, + "source": [ + "Lets impose some limits on the results. As we're doing a deeper search we want to avoid accumulating lots of irrelevant data connected to incorporation agents and virtual offices. By setting the following limits, the network will not include results that exceed these limits:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e83fd13f", + "metadata": {}, + "outputs": [], + "source": [ + "western_crown_network.hop.companies_at_address_maxsize = 50\n", + "western_crown_network.hop.officers_at_address_maxsize = 50\n", + "western_crown_network.hop.officer_appointments_maxsize = 50" + ] + }, + { + "cell_type": "markdown", + "id": "866bc18e", + "metadata": {}, + "source": [ + "Lets go big and perform 6 hops. It's likely to take some time to gather all the data +1 hour. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df617fda", + "metadata": {}, + "outputs": [], + "source": [ + "# import pickle\n", + "# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n", + "# western_crown_network = pickle.load(handle)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "477823cf", + "metadata": {}, + "outputs": [], + "source": [ + "western_crown_network.perform_hop(6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a52276d7", + "metadata": {}, + "outputs": [], + "source": [ + "western_crown_network.run_map_preprocessing()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7bdde00f", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# generate map\n", + "map_data,path_table = mapview.build_map(western_crown_network) \n", + "hbox = HBox([path_table])\n", + "vbox = VBox([map_data, hbox])\n", + "vbox" + ] + }, + { + "cell_type": "markdown", + "id": "6d3090cc", + "metadata": {}, + "source": [ + "### Oxford Street Connections\n", + "\n", + "From the data we've gathered, there are many companies and addresses that connect with the original address the network was built from (Western Crown Limited). To print the connections we can pass the company ID to `find_path`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6079643b", + "metadata": {}, + "outputs": [], + "source": [ + "pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]" + ] + }, + { + "cell_type": "markdown", + "id": "439ba049", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
537 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "e641bf13", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [BEST OF LONDON LIMITED](https://find-and-update.company-information.service.gov.uk/company/10895963) (10895963)\n", + "\n", + "*Dissolved Companies*\n", + "- [LONDON HOT ACCESSORIES LIMITED](https://find-and-update.company-information.service.gov.uk/company/10116914) (10116914)\n", + "- [TOURISTS WORLD LTD.](https://find-and-update.company-information.service.gov.uk/company/10643744) (10643744)\n", + "- [GIFTS FOR TOURIST LIMITED](https://find-and-update.company-information.service.gov.uk/company/10910649) (10910649)" + ] + }, + { + "cell_type": "markdown", + "id": "67b89126", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
524 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "145f6470", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [CANDY TOWN LTD](https://find-and-update.company-information.service.gov.uk/company/11464159) (1464159)\n", + "- [ESPANZA LIMITED](https://find-and-update.company-information.service.gov.uk/company/11474248) (11474248)\n", + "\n", + "*Dissolved Companies*\n", + "- [MARGIN FREE SUPER MARKET LIMITED](https://find-and-update.company-information.service.gov.uk/company/10540083) (10540083)\n", + "- [COOL MIX LIMITED](https://find-and-update.company-information.service.gov.uk/company/11031538) (11031538)\n", + "- [ROCK GIFTS LTD](https://find-and-update.company-information.service.gov.uk/company/11588633) (11588633)" + ] + }, + { + "cell_type": "markdown", + "id": "004ff136", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
470-482 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "baf21c69", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [WESTERN CROWN LIMITED](https://find-and-update.company-information.service.gov.uk/company/13455377) (13455377)" + ] + }, + { + "cell_type": "markdown", + "id": "2143ce03", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
447 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "7025e057", + "metadata": {}, + "source": [ + "*Dissolved Companies*\n", + "- [PLANET SOUVENIRS (UK) LIMITED](https://find-and-update.company-information.service.gov.uk/company/07570906) (07570906)" + ] + }, + { + "cell_type": "markdown", + "id": "1b74fcca", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
407-409 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "b2b2771d", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [WESTERN CROWN LIMITED](https://find-and-update.company-information.service.gov.uk/company/13455377) (13455377)" + ] + }, + { + "cell_type": "markdown", + "id": "a3a6e274", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
267-269 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "5c007277", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [MOODY MOON LIMITED](https://find-and-update.company-information.service.gov.uk/company/13287820) (13287820)" + ] + }, + { + "cell_type": "markdown", + "id": "54301d43", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
263-265 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "4a290e19", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [CEEKEY LONDON LTD](https://find-and-update.company-information.service.gov.uk/company/11647876) (11647876)\n", + "\n", + "*Liquidated Companies*\n", + "- [RUSTIC RAY LTD](https://find-and-update.company-information.service.gov.uk/company/11758349) (11758349)\n", + "\n", + "*Dissolved Companies*\n", + "- [LILLY MAX LIMITED](https://find-and-update.company-information.service.gov.uk/company/11474310) (11474310)\n", + "- [GIFTNET LTD](https://find-and-update.company-information.service.gov.uk/company/11593230) (11593230)\n", + "- [BUMPZ LTD](https://find-and-update.company-information.service.gov.uk/company/10941293) (10941293)\n", + "- [VENGAT GIFT LTD](https://find-and-update.company-information.service.gov.uk/company/11647421) (11647421)" + ] + }, + { + "cell_type": "markdown", + "id": "a5883ee7", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
240-242 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "99bec6e0", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [GIFT 4 YOU LIMITED](https://find-and-update.company-information.service.gov.uk/company/11439227) (11439227)\n", + "- [WEST END MANAGEMENT LIMITED](https://find-and-update.company-information.service.gov.uk/company/11467385) (11467385)\n", + "\n", + "*Dissolved Companies*\n", + "- [TOURISTS WORLD LTD.](https://find-and-update.company-information.service.gov.uk/company/10643744) (10643744)\n", + "- [GIFTS FOR TOURIST LIMITED](https://find-and-update.company-information.service.gov.uk/company/10910649) (10910649)\n", + "- [LILLY MAX LIMITED](https://find-and-update.company-information.service.gov.uk/company/11474310) (11474310)" + ] + }, + { + "cell_type": "markdown", + "id": "af81028b", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
158 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "0dd6dea9", + "metadata": {}, + "source": [ + "*Dissolved Companies*\n", + "\n", + "- [UNIQUE GIFTS (LONDON) LTD](https://find-and-update.company-information.service.gov.uk/company/07060273) (07060273)" + ] + }, + { + "cell_type": "markdown", + "id": "6c3bf19e", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
146-148 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "914616c5", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [CANDYSTREET LTD](https://find-and-update.company-information.service.gov.uk/company/12415826) (12415826)\n", + "- [LUMS SWEETS LTD](https://find-and-update.company-information.service.gov.uk/company/11864536) (11864536)\n", + "- [SPARK LABEL LTD](https://find-and-update.company-information.service.gov.uk/company/13865359) (13865359)\n", + "- [SEEN CAPTURE LTD](https://find-and-update.company-information.service.gov.uk/company/11468719) (11468719)\n", + "- [PINEBIRD LTD](https://find-and-update.company-information.service.gov.uk/company/11869360) (11869360)\n", + "\n", + "*Dissolved Companies*\n", + "- [WESTERN CANDIES LTD](https://find-and-update.company-information.service.gov.uk/company/12005109) (12005109)\n", + "- [GIFTS OF GLORY LTD](https://find-and-update.company-information.service.gov.uk/company/12268339) (12268339)\n", + "- [CANDY CANE LTD](https://find-and-update.company-information.service.gov.uk/company/12005370) (12005370)\n", + "- [XEE ASSET MANAGEMENT LTD](https://find-and-update.company-information.service.gov.uk/company/10734212) (10734212)\n", + "- [GRAND STORE LTD](https://find-and-update.company-information.service.gov.uk/company/11843228) (11843228)" + ] + }, + { + "cell_type": "markdown", + "id": "fa1727b0", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
142 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "0dc6b948", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [GRAND EMPIRE LIMITED](https://find-and-update.company-information.service.gov.uk/company/13376158) (13376158)\n", + "- [ASUS BLUE LIMITED](https://find-and-update.company-information.service.gov.uk/company/13795800) (3795800)" + ] + }, + { + "cell_type": "markdown", + "id": "047afd96", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
41 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "4719929d", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [FANCY SOUVENIRS LIMITED](https://find-and-update.company-information.service.gov.uk/company/05548476) (05548476)" + ] + }, + { + "cell_type": "markdown", + "id": "5bd0e6f3", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
37-39 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "4f51a748", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "\n", + "- [CHOCO-FIVES LTD](https://find-and-update.company-information.service.gov.uk/company/13967497) (13967497)\n", + "- [FANCYGREEN LTD](https://find-and-update.company-information.service.gov.uk/company/14210992) (14210992)\n", + "- [FORTLEE LTD](https://find-and-update.company-information.service.gov.uk/company/10782536) (10782536)\n", + "- [CEEKEY LONDON LTD](https://find-and-update.company-information.service.gov.uk/company/11647876) (11647876)\n", + "- [CHERRYTREE FOUNDATION](https://find-and-update.company-information.service.gov.uk/company/08632458) (08632458)\n", + "- [CHOCO-LOT LTD](https://find-and-update.company-information.service.gov.uk/company/13964334) (13964334)\n", + "- [FABIAN BELL LTD](https://find-and-update.company-information.service.gov.uk/company/11855234) (11855234)\n", + "- [SEEN CAPTURE LTD](https://find-and-update.company-information.service.gov.uk/company/11468719) (11468719)\n", + "- [ASUSGIFTS LIMITED](https://find-and-update.company-information.service.gov.uk/company/13857378) (13857378)\n", + "- [BRITCO GIFTS LIMITED](https://find-and-update.company-information.service.gov.uk/company/14472151) (14472151)\n", + "- [AH MONEY EXCHANGE LTD](https://find-and-update.company-information.service.gov.uk/company/10231441) (10231441)\n", + "\n", + "*Dissolved Companies*\n", + "- [GIFT PUNCH LIMITED](https://find-and-update.company-information.service.gov.uk/company/12387042) (12387042)\n", + "- [GIFTNET LTD](https://find-and-update.company-information.service.gov.uk/company/11593230) (11593230)\n", + "- [JUICE COLLECTIVE LTD](https://find-and-update.company-information.service.gov.uk/company/08282993) (08282993)\n", + "- [WINHAND LTD](https://find-and-update.company-information.service.gov.uk/company/11016980) (11016980)" + ] + }, + { + "cell_type": "markdown", + "id": "a3f9a2d7", + "metadata": {}, + "source": [ + "
\n", + "\n", + "
4 Oxford Street
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "bef660a5", + "metadata": {}, + "source": [ + "*Active Companies*\n", + "- [GIFT 4 YOU LIMITED](https://find-and-update.company-information.service.gov.uk/company/11439227) (11439227)" + ] + }, + { + "cell_type": "markdown", + "id": "957f4c3e", + "metadata": {}, + "source": [ + "### Beyond Oxford Street\n", + "\n", + "From analysing the 'generate map' cell, we will notice many other companies connected to the original company located beyond Oxford Street. Further research could involve:\n", + "- building networks from some of the other companies registered to Oxford Street addresses\n", + "- building networks deeper than 6 hops to explore a wider range of connections\n", + "- analysing connections in greater detail\n", + "- analysing documents from Companies House linked to companies in the network\n", + "- identify other connected companies of interest beyond Oxford Street\n", + "- develop statistics that communicate the scale of these networks and connectivity within the UK\n", + "- analyse connections outside the UK \n", + "- run a news search on entities within the network to see if companies are connected to any newsworthy entities\n", + "- analyse hotspots for registering new companies over time to see if there are emerging popular locations, in other words where is the new Oxford Street?\n", + "- analyse other types of companies connected to souvenir and candy shops (money exchanges, security firms etc.)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/003_virtual_offices.ipynb b/notebooks/003_virtual_offices.ipynb new file mode 100644 index 0000000..bb4818a --- /dev/null +++ b/notebooks/003_virtual_offices.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b2110da7", + "metadata": {}, + "source": [ + "*In this tutorial we will investigate addresses with a large number of companies registered via the API and Companies House Data Product download.*" + ] + }, + { + "cell_type": "markdown", + "id": "25528662", + "metadata": {}, + "source": [ + "### Busy Addresses and API Limits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab9e8ee0", + "metadata": {}, + "outputs": [], + "source": [ + "from sugartrail import base, api, mapview\n", + "import pandas as pd\n", + "api.basic_auth.username = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "00c6a5be", + "metadata": {}, + "source": [ + "When navigating Companies House there are times that we will run into some very popular addresses. For example lets say build a network from [this officer](https://find-and-update.company-information.service.gov.uk/officers/Nd2URspq4bvLy-hwzDZ0_p7FGJw/appointments):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "723f234a", + "metadata": {}, + "outputs": [], + "source": [ + "officer_id = \"Nd2URspq4bvLy-hwzDZ0_p7FGJw\"\n", + "network = base.Network(officer_id=officer_id)\n", + "network.perform_hop(2)" + ] + }, + { + "cell_type": "markdown", + "id": "edad561e", + "metadata": {}, + "source": [ + "Within 2 hops we've got over 60 addresses (although many of them look like duplicate entries):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eea32631", + "metadata": {}, + "outputs": [], + "source": [ + "network.addresses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ce897c0", + "metadata": {}, + "outputs": [], + "source": [ + "network.addresses['address'].unique()" + ] + }, + { + "cell_type": "markdown", + "id": "8c17fff5", + "metadata": {}, + "source": [ + "If we check out the `maxsize_entities` property of our Network class, we will see a dataframe containing all of the addresses and officers that have exceeded the maxsize limits imposed in the Hop class. In this case, we can see one of the addresses in the network has over 4800 companies based there." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b8d3c20", + "metadata": {}, + "outputs": [], + "source": [ + "network.maxsize_entities" + ] + }, + { + "cell_type": "markdown", + "id": "5ad7b443", + "metadata": {}, + "source": [ + "Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f94f731", + "metadata": {}, + "outputs": [], + "source": [ + "network.hop.companies_at_address_maxsize" + ] + }, + { + "cell_type": "markdown", + "id": "2d4edaf0", + "metadata": {}, + "source": [ + "If we check `companies_id` we'll notice it hasn't had 4800 companies added to it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3ef12fe", + "metadata": {}, + "outputs": [], + "source": [ + "len(network.company_ids['company_id'].unique())" + ] + }, + { + "cell_type": "markdown", + "id": "d177f1b5", + "metadata": {}, + "source": [ + "Including limits is useful to avoid our databases getting clogged up with random companies. \n", + "Although lets pause to briefly explore what address would have thousands of companies registered there?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8bb8bdf1", + "metadata": {}, + "outputs": [], + "source": [ + "network.maxsize_entities['node'][0]" + ] + }, + { + "cell_type": "markdown", + "id": "e8644d6b", + "metadata": {}, + "source": [ + "![title](../assets/images/regent_storefront.jpeg)" + ] + }, + { + "cell_type": "markdown", + "id": "40354a28", + "metadata": {}, + "source": [ + "\"3rd Floor, 207, Regent Street\" is a \"virtual office\" run by a company called [Hold Everything](https://www.hold-everything.com/). Businesses can use this address for correspondance/registration for £24 a month:" + ] + }, + { + "cell_type": "markdown", + "id": "11b08c79", + "metadata": {}, + "source": [ + "![title](../assets/images/exclusive.png)" + ] + }, + { + "cell_type": "markdown", + "id": "2c9e85ed", + "metadata": {}, + "source": [ + "However the large number of companies registered at a single address can lead to many instances of mistaken identity. Just because a company is registered at a virtual office does not mean it has any connection with other companies registered there.:" + ] + }, + { + "cell_type": "markdown", + "id": "be5e4352", + "metadata": {}, + "source": [ + "![title](../assets/images/review.png)" + ] + }, + { + "cell_type": "markdown", + "id": "282ba8ea", + "metadata": {}, + "source": [ + "Numerous media outlets have reported on fraudulent companies that use virtual offices and incorporation services: \n", + "- Kemp House, 162 City Road | Capital Officer: [Mystery group took millions in furlough funds - Financial Times](https://www.ft.com/content/b3c70369-5170-47ca-b779-fc0898fd29e6)\n", + "- 20-22 Wenlock Road | Made Simple: [Court shuts down companies behind £9m truffle scam - Gov.uk](https://www.gov.uk/government/news/court-shuts-down-companies-behind-9m-truffle-scam)\n", + "- 2 Woodberry Down | A1 Company Services [How A Suburban North London House Is Connected To The Paul Manafort Indictment - Huffington Post](https://www.huffingtonpost.co.uk/entry/manfort-london-connection_uk_59f72f50e4b07fdc5fbf92c7)\n", + "- 29 Harley Street | Formations House [Offshore in central London: the curious case of 29 Harley Street - The Guardian](https://www.theguardian.com/business/2016/apr/19/offshore-central-london-curious-case-29-harley-street)\n", + "- 63-66 Hatton Garden | Valemont Properties Ltd [The Global Laundromat: how did it work and who benefited? - The Guardian](https://www.theguardian.com/world/2017/mar/20/the-global-laundromat-how-did-it-work-and-who-benefited)" + ] + }, + { + "cell_type": "markdown", + "id": "a85fdcfa", + "metadata": {}, + "source": [ + "If we wanted to get all companies listed at 207 Regent Street we can adjust our maxsize limits to `None` and attempt to perform a hop again:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb0c02d0", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network = base.Network(address='3rd Floor, 207 Regent Street London W1B 3HH England')\n", + "regent_street_network.hop.companies_at_address_maxsize = None\n", + "regent_street_network.hop.officers_at_address_maxsize = None\n", + "regent_street_network.perform_hop(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dc0f165", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network.company_ids" + ] + }, + { + "cell_type": "markdown", + "id": "cff1061e", + "metadata": {}, + "source": [ + "Such large networks can still be interesting to analyse. For instance if we perform another hop this will get all the officers for every company at the address. This will take several hours to build as we have lots of companies to analyse, however if we want to save time we could just uncomment and load a pre-made network below: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef262359", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network.perform_hop(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38937142", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n", + " regent_street_network = pickle.load(handle)" + ] + }, + { + "cell_type": "markdown", + "id": "d6e330ee", + "metadata": {}, + "source": [ + "Analysing the most frequently occuring officers running businesses from 207 Regent Street returns some very busy officers and incorporation agents:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e97fa3b", + "metadata": {}, + "outputs": [], + "source": [ + "regent_street_network.officer_ids['name'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "d6a22e40", + "metadata": {}, + "source": [ + "A quick news lookup on two of the officers in the top 5, J. Beardsley of Helve TCS Limited and S. Poppleton reveal these names to be connected to several known instances of fraud:\n", + "- [Fraudster duo jailed for their part in defrauding millions of pounds from over 100 victims - Crown Prosecution Service](https://www.cps.gov.uk/cps/news/fraudster-duo-jailed-their-part-defrauding-millions-pounds-over-100-victims)\n", + "- [Print farming companies struck off - Printweek](https://www.printweek.com/news/article/print-farming-companies-struck-off)\n", + "- [Rogue book publishers slammed shut by the courts - Gov.uk](https://www.gov.uk/government/news/rogue-book-publishers-slammed-shut-by-the-courts)" + ] + }, + { + "cell_type": "markdown", + "id": "f0699a27", + "metadata": {}, + "source": [ + "### Busier Addresses and Downloaded Data" + ] + }, + { + "cell_type": "markdown", + "id": "944525cb", + "metadata": {}, + "source": [ + "There are situations where some addresses have thousands or even tens of thousands of companies registered. Companies House provides two methods for getting company data, API and data product. We used the API to get the information above which returns all active and dissolved companies registered to the address. We get the same result when we attempt to perform an advanced company search using this address through the website:" + ] + }, + { + "cell_type": "markdown", + "id": "c307994f", + "metadata": {}, + "source": [ + "![title](../assets/images/regent.png)" + ] + }, + { + "cell_type": "markdown", + "id": "517e6aaa", + "metadata": {}, + "source": [ + "Unfortunately the API is limited to returing 5000 result max. This is fine in our case with 207 Regent Street because we're just under the limit. However there are much bigger fish out there for instance, '75 Shelton Street':" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f40ee11", + "metadata": {}, + "outputs": [], + "source": [ + "shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n", + "shelton_street_network.perform_hop(1)\n", + "shelton_street_network.maxsize_entities" + ] + }, + { + "cell_type": "markdown", + "id": "6f1abb52", + "metadata": {}, + "source": [ + "We can already see its over 5000 limit for the API. If we check online we can see the number is huge: " + ] + }, + { + "cell_type": "markdown", + "id": "03b64f03", + "metadata": {}, + "source": [ + "![title](../assets/images/shelton.png)" + ] + }, + { + "cell_type": "markdown", + "id": "f9fda7a6", + "metadata": {}, + "source": [ + "This is where the data product comes in. We can download it in one go and use it to get all of the \"active\" companies. To use the data product:\n", + "1. Download it from [here](http://download.companieshouse.gov.uk/en_output.html) (might take some time as its a pretty large file ~430Mb)\n", + "2. Move it to local directory `assets/company_data/` and unzip the file \n", + "3. Load into a dataframe which we can pass to our network class\n", + "\n", + "Might take a minute to load. How adjust the file string below and attempt to load it into `company_data`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d9d0080", + "metadata": {}, + "outputs": [], + "source": [ + "company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "2273cf39", + "metadata": {}, + "source": [ + "Now lets try get every company at the very overcrowded 71-75 Shelton Street address:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e273ce0", + "metadata": {}, + "outputs": [], + "source": [ + "shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n", + "shelton_street_network.hop.companies_at_address_maxsize = None\n", + "shelton_street_network.hop.officers_at_address_maxsize = None\n", + "shelton_street_network.get_officers_at_address = False\n", + "shelton_street_network.perform_hop(1, company_data= company_data)" + ] + }, + { + "cell_type": "markdown", + "id": "820a908d", + "metadata": {}, + "source": [ + "If we check `company_ids` we have over 70000 companies that we could build a network from if we had lots of time on our hands:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12acb915", + "metadata": {}, + "outputs": [], + "source": [ + "shelton_street_network.company_ids" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/quickstart.ipynb b/notebooks/quickstart.ipynb new file mode 100644 index 0000000..7d4906b --- /dev/null +++ b/notebooks/quickstart.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b6926e35", + "metadata": {}, + "source": [ + "*Quickstart hands-on exercise. For in-depth intro checkout Tutorial 1:*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f17ebdd2", + "metadata": {}, + "outputs": [], + "source": [ + "from sugartrail import mapview, api, base\n", + "from ipywidgets import VBox, HBox" + ] + }, + { + "cell_type": "markdown", + "id": "d5f9b6ad", + "metadata": {}, + "source": [ + "Insert a valid [Companies House Public Data API key](https://developer.company-information.service.gov.uk/get-started/) as `username` string value below. If you don't want to use the API and would prefer loading a pre-built network, uncomment and run the cell below and then run the final cell to build and load the map. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a9639e6", + "metadata": {}, + "outputs": [], + "source": [ + "# # network build from Domain Foundation, company_id = \"11951034\"\n", + "# import pickle\n", + "\n", + "# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n", + "# network = pickle.load(handle)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89b0082a", + "metadata": {}, + "outputs": [], + "source": [ + "api.basic_auth.username = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "63220f29", + "metadata": {}, + "source": [ + "Enter the company number (as string) for a company you would like to explore. Example value is provided: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8aca6a54", + "metadata": {}, + "outputs": [], + "source": [ + "company_id = \"11951034\"\n", + "network = base.Network(company_id=company_id)" + ] + }, + { + "cell_type": "markdown", + "id": "7de31e72", + "metadata": { + "tags": [ + "5" + ] + }, + "source": [ + "Perform `n` number of hops (3 or less at first is advised to keep the network manageable in size):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d80be86d", + "metadata": { + "tags": [ + "6" + ] + }, + "outputs": [], + "source": [ + "n = 3\n", + "network = base.Network(company_id=company_id)\n", + "network.perform_hop(n)" + ] + }, + { + "cell_type": "markdown", + "id": "4481c80d", + "metadata": {}, + "source": [ + "Now lets visualise the connections:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "022f026e", + "metadata": {}, + "outputs": [], + "source": [ + "network.run_map_preprocessing()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01dca0cf", + "metadata": { + "scrolled": false, + "tags": [ + "7" + ] + }, + "outputs": [], + "source": [ + "map_data,path_table = mapview.build_map(network) \n", + "hbox = HBox([path_table])\n", + "vbox = VBox([map_data, hbox])\n", + "vbox" + ] + }, + { + "cell_type": "markdown", + "id": "457bf4d0", + "metadata": {}, + "source": [ + "Each marker represents a company in the network. Green markers represent active companies based at the address, red markers represent active companies no longer based at the address and black markers represent dissolved companies once based at the address. \n", + "\n", + "Select a marker to display additional information: \n", + "- pop-up with the selected company's name and address\n", + "- table containing the most efficient paths from the origin to the selected company\n", + "- antpaths for each company in the network. Red antpath represents the path through all the historic addresses for the selected company. Black antpath represents the path from the network origin through all the addresses in the path to the selected company as displayed in the table. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d24329a --- /dev/null +++ b/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup, find_packages + +with open("config/requirements.txt") as requirement_file: + requirements = requirement_file.read().split() + +setup( + name="sugartrail", + version="1.0.0", + install_requires=requirements, + packages=find_packages(exclude=["notebooks", "dashboard", "assets"]), +) diff --git a/sugartrail/api.py b/sugartrail/api.py index 93cb797..a60ff11 100644 --- a/sugartrail/api.py +++ b/sugartrail/api.py @@ -8,6 +8,14 @@ password = "" size = "5000" basic_auth = requests.auth.HTTPBasicAuth(username, password) +def test(): + url = "https://api.company-information.service.gov.uk/advanced-search/companies" + response = requests.get(url, auth=basic_auth) + if response.status_code == 200: + return True + else: + return False + def make_request(url, input, input_type, response_type): time.sleep(0.5) try: @@ -17,13 +25,17 @@ def make_request(url, input, input_type, response_type): if response.status_code == 200: return response.json() except requests.exceptions.RequestException as err: - print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + # print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + return except requests.exceptions.HTTPError as errh: - print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + # print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + return except requests.exceptions.ConnectionError as errc: - print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + # print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + return except requests.exceptions.Timeout as errt: - print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + # print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input)) + return def get_company_officers(company_id): url = "https://api.company-information.service.gov.uk/company/" + company_id + "/officers" diff --git a/sugartrail/base.py b/sugartrail/base.py index b1dfefc..6983766 100644 --- a/sugartrail/base.py +++ b/sugartrail/base.py @@ -208,18 +208,18 @@ class Network: for i,address in enumerate(selected_addresses): self.hop.search_address(self, address, company_data) IPython.display.clear_output(wait=True) - print("Hop number: " + str(hop+1)) + print("Hop number: " + str(hop)) print("Processed " + str(i+1) + "/" + str(len(selected_addresses)) + " addresses.") for j,company in enumerate(selected_companies): self.hop.search_company_id(self,company) IPython.display.clear_output(wait=True) - print("Hop number: " + str(hop+1)) + print("Hop number: " + str(hop)) print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.") print("Processed " + str(j+1) + "/" + str(len(selected_companies)) + " companies.") for k,officer in enumerate(selected_officers): self.hop.search_officer_id(self,officer) IPython.display.clear_output(wait=True) - print("Hop number: " + str(hop+1)) + print("Hop number: " + str(hop)) print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.") print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.") print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.") @@ -230,14 +230,14 @@ class Network: self.get_company_address_history = True self.get_psc_correspondance_address = True self.get_officer_appointments = True - self.officer_appointments_maxsize = 2000 + self.officer_appointments_maxsize = 50 self.get_officer_correspondance_address = True self.get_officer_duplicates = True self.officer_duplicates_maxsize = None self.get_officers_at_address = True - self.officers_at_address_maxsize = 1000 + self.officers_at_address_maxsize = 50 self.get_companies_at_address = True - self.companies_at_address_maxsize = 500 + self.companies_at_address_maxsize = 50 def search_company_id(self, network, company_id): officers = [] diff --git a/sugartrail/mapview.py b/sugartrail/mapview.py index 922cffe..1d5c82c 100644 --- a/sugartrail/mapview.py +++ b/sugartrail/mapview.py @@ -4,9 +4,11 @@ import pandas as pd from datetime import datetime import functools from string import ascii_lowercase as alc +import math -def build_map(network): - Widget.close_all() +def build_map(network, clear_widget=True): + if clear_widget: + Widget.close_all() m, path_table = load_map_data(network) return m, path_table @@ -14,7 +16,10 @@ def get_address_path(network, company_id): company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id] address_path = [] for index, row in company_address_history.iterrows(): - address_path.insert(0,[row['lat'], row['lon']]) + if math.isnan(float(row['lat'])) or math.isnan(float(row['lon'])): + pass + else: + address_path.insert(0,[row['lat'], row['lon']]) return address_path def locations_from_origin_path(path, network): @@ -24,12 +29,18 @@ def locations_from_origin_path(path, network): last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1] lat = last_company_address_row['lat'].item() lon = last_company_address_row['lon'].item() - locations.append([float(lat),float(lon)]) + if math.isnan(float(lat)): + pass + else: + locations.append([float(lat),float(lon)]) elif node['type'] == 'Address': address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1] lat = address_row['lat'].item() lon = address_row['lon'].item() - locations.append([float(lat),float(lon)]) + if math.isnan(float(lat)) or math.isnan(float(lon)): + pass + else: + locations.append([float(lat),float(lon)]) return locations def on_button_clicked(address_path, path, location, address_trail, path_table, origin_trail, locations_from_origin, **kwargs):