adjusted notebooks for dict compliance

2026-06-08 03:28:31 +03:00 · 2023-01-06 21:38:10 +00:00
parent da539ce46f
commit 1a65a9fb96
23 changed files with 287 additions and 199 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -48,3 +48,6 @@ coverage.xml

 # Sphinx documentation
 docs/_build/
+
+# API Keys
+config/config.py
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ You will require an API key from Companies House to get data. First you will nee

 ## Usage

-A hosted demo of the Sugartrail dashboard can be accessed [here](https://stark-island-99644.herokuapp.com/).
+A hosted demo of the Sugartrail dashboard can be accessed [here](https://stark-island-99644.herokuapp.com/) (might take a few seconds to load the page).

 ## Installation

--- a/assets/images/kingdom_table.png
+++ b/assets/images/kingdom_table.png
--- a/assets/networks/.DS_Store
+++ b/assets/networks/.DS_Store
--- a/assets/networks/domain_corp_network.json
+++ b/assets/networks/domain_corp_network.json
--- a/assets/networks/domain_corp_network.pickle
+++ b/assets/networks/domain_corp_network.pickle
--- a/assets/networks/kingdom_of_sweets_network.json
+++ b/assets/networks/kingdom_of_sweets_network.json
--- a/assets/networks/kingdom_of_sweets_network.pickle
+++ b/assets/networks/kingdom_of_sweets_network.pickle
--- a/assets/networks/regent_street_network.json
+++ b/assets/networks/regent_street_network.json
--- a/assets/networks/regent_street_network.pickle
+++ b/assets/networks/regent_street_network.pickle
--- a/assets/networks/shelton_street_network.json
+++ b/assets/networks/shelton_street_network.json
--- a/assets/networks/western_crown_network.json
+++ b/assets/networks/western_crown_network.json
--- a/assets/networks/western_crown_network.pickle
+++ b/assets/networks/western_crown_network.pickle
--- a/config/init.py
+++ b/config/init.py
@@ -0,0 +1 @@
+from . import config
--- a/dashboard/Sugartrail.ipynb
+++ b/dashboard/Sugartrail.ipynb
@@ -10,7 +10,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "f17ebdd2",
   "metadata": {},
   "outputs": [],
@@ -18,7 +18,8 @@
    "from sugartrail import mapview, api, base\n",
    "import ipywidgets as widgets\n",
    "from IPython.display import display\n",
-    "import requests"
+    "import requests\n",
+    "import pandas as pd"
   ]
  },
  {
@@ -222,13 +223,13 @@
    "    accordion_data.selected_index=0\n",
    "    build_map_button.disabled = True\n",
    "    with tab.children[0]:\n",
-    "        display(network.company_ids)\n",
+    "        display(pd.DataFrame(network.company_ids))\n",
    "    with tab.children[1]:\n",
-    "        display(network.addresses)\n",
+    "        display(pd.DataFrame(network.addresses))\n",
    "    with tab.children[2]:\n",
-    "        display(network.officer_ids)\n",
+    "        display(pd.DataFrame(network.officer_ids))\n",
    "    with tab.children[3]:\n",
-    "        display(network.companies)   \n",
+    "        display(pd.DataFrame(network.companies))   \n",
    "\n",
    "display(build_map_button, map_container)"
   ]
--- a/notebooks/001_getting_started.ipynb
+++ b/notebooks/001_getting_started.ipynb
@@ -37,6 +37,7 @@
   "source": [
    "from sugartrail import api, mapview, base\n",
    "from ipywidgets import VBox, HBox\n",
+    "import pandas as pd\n",
    "\n",
    "api.basic_auth.username = \"\""
   ]
@@ -291,7 +292,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.company_ids"
+    "pd.DataFrame(network.company_ids)"
   ]
  },
  {
@@ -309,7 +310,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.officer_ids"
+    "pd.DataFrame(network.officer_ids)"
   ]
  },
  {
@@ -329,7 +330,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.addresses"
+    "pd.DataFrame(network.addresses)"
   ]
  },
  {
@@ -357,7 +358,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.hop_history"
+    "pd.DataFrame(network.hop_history)"
   ]
  },
  {
@@ -411,7 +412,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.address_history"
+    "pd.DataFrame(network.address_history)"
   ]
  },
  {
@@ -421,7 +422,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.companies "
+    "pd.DataFrame(network.companies)"
   ]
  },
  {
@@ -475,9 +476,9 @@
   "id": "fd5d9a0d",
   "metadata": {},
   "source": [
-    "Pick N Mix London Limited (E) is a 'company at address' for 3rd Floor 13 Charles Ii Street (C) which is a 'historic address' for Kingdom of Sweets Ltd (A).\n",
+    "Pick N Mix London Limited (e) is a 'company at address' for 3rd Floor 13 Charles Ii Street (c) which is a 'historic address' for Kingdom of Sweets Ltd (a).\n",
    "\n",
-    "Additionally, Pick N Mix London Limited (D) is an appointment of (B) who is an officer of Kingdom of Sweets Ltd (A). "
+    "Additionally, Pick N Mix London Limited (d) is an appointment of (b) who is an officer of Kingdom of Sweets Ltd (a). "
   ]
  },
  {
@@ -493,7 +494,7 @@
   "id": "a68e26ca",
   "metadata": {},
   "source": [
-    "The network object can be saved with 'pickle' and reloaded when needed:"
+    "The network object can be saved to `../assets/networks/` as json file:"
   ]
  },
  {
@@ -503,10 +504,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import pickle\n",
-    "\n",
-    "with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n",
-    "    pickle.dump(network, handle)"
+    "network.save('kingdom_of_sweets_network.json')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7632e2a8",
+   "metadata": {},
+   "source": [
+    "We can load the network by creating a new network and passing the filename: "
   ]
  },
  {
@@ -516,8 +522,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n",
-    "    network = pickle.load(handle)"
+    "network = base.Network(file='kingdom_of_sweets_network.json')"
   ]
  }
 ],
--- a/notebooks/002_candy_connections.ipynb
+++ b/notebooks/002_candy_connections.ipynb
@@ -163,7 +163,7 @@
   "id": "866bc18e",
   "metadata": {},
   "source": [
-    "Lets go big and perform 6 hops. It's likely to take some time to gather all the data +1 hour. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
+    "Lets go big and perform 6 hops. It's likely to take some time to gather all the data +2 hours. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
   ]
  },
  {
@@ -173,9 +173,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# import pickle\n",
-    "# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n",
-    "#     western_crown_network = pickle.load(handle)"
+    "# western_crown_network = base.Network(file='western_crown_network.json')"
   ]
  },
  {
@@ -231,7 +229,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
+    "pd.DataFrame(western_crown_network.find_path('10289650'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
   ]
  },
  {
--- a/notebooks/003_virtual_offices.ipynb
+++ b/notebooks/003_virtual_offices.ipynb
@@ -63,7 +63,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.addresses"
+    "pd.DataFrame(network.addresses)"
   ]
  },
  {
@@ -73,7 +73,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.addresses['address'].unique()"
+    "pd.DataFrame(network.addresses)['address'].unique()"
   ]
  },
  {
@@ -91,7 +91,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.maxsize_entities"
+    "pd.DataFrame(network.maxsize_entities)"
   ]
  },
  {
@@ -99,7 +99,7 @@
   "id": "5ad7b443",
   "metadata": {},
   "source": [
-    "Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
+    "Because we set a limit of 50 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
   ]
  },
  {
@@ -127,7 +127,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "len(network.company_ids['company_id'].unique())"
+    "len(network.company_ids)"
   ]
  },
  {
@@ -146,7 +146,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "network.maxsize_entities['node'][0]"
+    "network.maxsize_entities[2]['node']"
   ]
  },
  {
@@ -230,7 +230,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "regent_street_network.company_ids"
+    "pd.DataFrame(regent_street_network.company_ids)"
   ]
  },
  {
@@ -254,13 +254,11 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "38937142",
+   "id": "4b7616c7",
   "metadata": {},
   "outputs": [],
   "source": [
-    "import pickle\n",
-    "with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n",
-    "    regent_street_network = pickle.load(handle)"
+    "# regent_street_network = base.Network(file='regent_street_network.json')"
   ]
  },
  {
@@ -278,7 +276,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "regent_street_network.officer_ids['name'].value_counts()"
+    "pd.DataFrame(regent_street_network.officer_ids)['name'].value_counts()"
   ]
  },
  {
@@ -333,7 +331,7 @@
   "source": [
    "shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
    "shelton_street_network.perform_hop(1)\n",
-    "shelton_street_network.maxsize_entities"
+    "shelton_street_network.maxsize_entities[0]"
   ]
  },
  {
@@ -372,7 +370,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
+    "company_data = pd.read_csv(\"../assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
   ]
  },
  {
@@ -380,7 +378,7 @@
   "id": "2273cf39",
   "metadata": {},
   "source": [
-    "Now lets try get every company at the very overcrowded 71-75 Shelton Street address:"
+    "Now lets try get every company at the very overcrowded 71-75 Shelton Street address (might take several minutes- can uncomment the cell below to load pre-made network):"
   ]
  },
  {
@@ -394,7 +392,17 @@
    "shelton_street_network.hop.companies_at_address_maxsize = None\n",
    "shelton_street_network.hop.officers_at_address_maxsize = None\n",
    "shelton_street_network.get_officers_at_address = False\n",
-    "shelton_street_network.perform_hop(1, company_data= company_data)"
+    "shelton_street_network.perform_hop(1, company_data = company_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d34c9833",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# shelton_street_network = base.Network(file='shelton_street_network.json')"
   ]
  },
  {
@@ -412,7 +420,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "shelton_street_network.company_ids"
+    "len(shelton_street_network.company_ids)"
   ]
  }
 ],
--- a/notebooks/quickstart.ipynb
+++ b/notebooks/quickstart.ipynb
@@ -35,10 +35,7 @@
   "outputs": [],
   "source": [
    "# # network build from Domain Foundation, company_id = \"11951034\"\n",
-    "# import pickle\n",
-    "\n",
-    "# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n",
-    "#     network = pickle.load(handle)"
+    "# network = base.Network(file='domain_corp_network.json')"
   ]
  },
  {
--- a/sugartrail/api.py
+++ b/sugartrail/api.py
@@ -1,9 +1,11 @@
 import requests
 import time
 import os
+import config

 access_token = ""
-username = access_token
+# username = config.config.APIKEY
+username = ""
 password = ""
 size = "5000"
 basic_auth = requests.auth.HTTPBasicAuth(username, password)
@@ -17,25 +19,28 @@ def test():
        return False

 def make_request(url, input, input_type, response_type):
-    time.sleep(0.5)
-    try:
-        response = requests.get(url, auth=basic_auth)
-        response.raise_for_status()
-        # print("here")
-        if response.status_code == 200:
-            return response.json()
-    except requests.exceptions.RequestException as err:
-        # print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
-        return
-    except requests.exceptions.HTTPError as errh:
-        # print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
-        return
-    except requests.exceptions.ConnectionError as errc:
-        # print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
-        return
-    except requests.exceptions.Timeout as errt:
-        # print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
-        return
+    if basic_auth.username:
+        time.sleep(0.5)
+        try:
+            response = requests.get(url, auth=basic_auth)
+            response.raise_for_status()
+            # print("here")
+            if response.status_code == 200:
+                return response.json()
+        except requests.exceptions.RequestException as err:
+            # print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
+            return
+        except requests.exceptions.HTTPError as errh:
+            # print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
+            return
+        except requests.exceptions.ConnectionError as errc:
+            # print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
+            return
+        except requests.exceptions.Timeout as errt:
+            # print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
+            return
+    else:
+        print("Authentication required")

 def get_company_officers(company_id):
    url = "https://api.company-information.service.gov.uk/company/" + company_id + "/officers"
@@ -72,14 +77,15 @@ def get_duplicate_officers(officer_id):
        url = "https://api.company-information.service.gov.uk/search/officers?q=" + name
        response = make_request(url, name, 'officer name', 'officers')
        filtered_results = []
-        if 'items' in response:
-            for officer in response['items']:
-                if 'date_of_birth' in officer.keys() and 'date_of_birth' in officer_data.keys():
-                    if officer['date_of_birth'] == officer_data['date_of_birth'] and officer['links']['self'] != officer_self_link:
-                        filtered_results.append(officer)
-            return filtered_results
-        else:
-            return
+        if response:
+            if 'items' in response:
+                for officer in response['items']:
+                    if 'date_of_birth' in officer.keys() and 'date_of_birth' in officer_data.keys():
+                        if officer['date_of_birth'] == officer_data['date_of_birth'] and officer['links']['self'] != officer_self_link:
+                            filtered_results.append(officer)
+                return filtered_results
+            else:
+                return

 def get_companies_at_address(address):
    url = "https://api.company-information.service.gov.uk/advanced-search/companies?location=" + address + "&size=" + "5000"
@@ -88,12 +94,13 @@ def get_companies_at_address(address):
 def get_officers_at_address(address):
    url = "https://api.company-information.service.gov.uk/search/officers?q=location:" + address
    response = make_request(url, address, 'address', 'officers')
-    if 'items' in response:
-        officers = []
-        word_list = []
-        for word in address.replace(',','').split():
-            word_list.append(word)
-        for officer in response['items']:
-            if all(word in officer['address_snippet'] for word in word_list):
-                officers.append(officer)
-        return officers
+    if response:
+        if 'items' in response:
+            officers = []
+            word_list = []
+            for word in address.replace(',','').split():
+                word_list.append(word)
+            for officer in response['items']:
+                if all(word in officer['address_snippet'] for word in word_list):
+                    officers.append(officer)
+            return officers
--- a/sugartrail/base.py
+++ b/sugartrail/base.py
@@ -1,18 +1,14 @@
 from sugartrail import api
 from sugartrail import processing
-import pandas as pd
 import IPython
 import numpy as np
 import math
 import warnings
-from string import ascii_lowercase as alc
-warnings.simplefilter(action='ignore', category=FutureWarning)
-pd.set_option('display.max_columns', 500)
-pd.set_option('display.max_rows', 150)
+import json
+from string import ascii_letters as alc

 class Network:
-    def __init__(self, officer_id=None, company_id=None, address=None):
-        # convert all dataframes to lists of dictionaries:
+    def __init__(self, officer_id=None, company_id=None, address=None, file=None):
        self.addresses = []
        self.officer_ids = []
        self.company_ids = []
@@ -23,10 +19,26 @@ class Network:
        self._address = address
        self.n = 0
        self.link_type = None
-        self.initialise()
        self.hop = self.Hop()
        self.hop_history = []
        self.maxsize_entities = []
+        self.processed_officers  = []
+        self.processed_companies = []
+        self.processed_addresses = []
+        self._file = self.load(file)
+        self.initialise()
+
+    @property
+    def file(self):
+        return self._file
+
+    @file.setter
+    def file(self, new_value):
+        self._file = new_value
+        self._officer_id = None
+        self._company_id = None
+        self._address_id = None
+        self.load(self._file)

    @property
    def officer_id(self):
@@ -51,7 +63,7 @@ class Network:
        self.initialise()

    @property
-    def address(self):
+    def address(self, value):
        return self._address

    @address.setter
@@ -63,24 +75,60 @@ class Network:

    # change to initialise
    def initialise(self):
-        if self._officer_id:
-            if api.get_appointments(self._officer_id):
-                self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}))
+        if self.n < 1:
+            if self._officer_id:
+                if api.get_appointments(self._officer_id):
+                    self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}))
+                else:
+                    print(f"Officer with ID:{str(self._officer_id)} not found")
+            elif self._company_id:
+                self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
+                company = api.get_company(self._company_id)
+                self.companies.append(dict(processing.flatten(company)))
+            elif self._address:
+                self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
+            elif self.file:
+                pass
            else:
-                print(f"Officer with ID:{str(self._officer_id)} not found")
-        elif self.company_id:
-            self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
-            company = api.get_company(self._company_id)
-            self.companies.append(dict(processing.flatten(company)))
-        elif self._address:
-            self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
-        else:
-            print("No input provided. Please provide either officer_id, company_id or address value as input.")
+                print("No input provided. Please provide either officer_id, company_id or address value as input.")
+
+    def save(self, filename):
+        network_data = {k: v for k, v in self.__dict__.items() if k != 'hop' and k != 'file'}
+        saved_network = json.dumps(network_data)
+        f = open(f'../assets/networks/{filename}', 'w')
+        f.write(saved_network)
+        f.close
+
+    def load(self, filename):
+        if filename:
+            f = open(f'../assets/networks/{filename}')
+            network_data = json.load(f)
+            self.addresses = network_data['addresses']
+            self.officer_ids = network_data['officer_ids']
+            self.company_ids = network_data['company_ids']
+            self.companies = network_data['companies']
+            self.address_history = network_data['address_history']
+            self._officer_id = network_data['_officer_id']
+            self._company_id = network_data['_company_id']
+            self._address = network_data['_address']
+            self.n = network_data['n']
+            self.link_type = network_data['link_type']
+            self.hop_history = network_data['hop_history']
+            self.maxsize_entities = network_data['maxsize_entities']
+            self.processed_officers  = network_data['processed_officers']
+            self.processed_companies = network_data['processed_companies']
+            self.processed_addresses = network_data['processed_addresses']

    def add_company_names(self):
        for i, row in enumerate(self.company_ids):
-            self.company_ids[i]['company_name'] = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))[0]['company_name']
-        # self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']]
+            company_details = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))
+            if company_details:
+                self.company_ids[i]['company_name'] = company_details[0]['company_name']
+            else:
+                company_details = api.get_company(row['company_id'])
+                if company_details:
+                    if 'company_name' in  company_details:
+                        self.company_ids[i]['company_name'] = company_details['company_name']

    def get_company_from_id(self, company_df=None, company_id=None, print_progress=True):
        company_list = []
@@ -91,7 +139,6 @@ class Network:
                print("add valid company id")
        else:
            company_list = [company['company_id'] for company in self.company_ids]
-        # companies
        companies = []
        for i, company_id in enumerate(company_list):
            IPython.display.clear_output(wait=True)
@@ -207,46 +254,58 @@ class Network:
            link_indices = []
            for j,item in enumerate(sorted_path):
                if item['id'] == search_term:
-                    link_indices.append(alc[j].upper())
+                    link_indices.append(alc[j])
            sorted_path[i]["link"] = ','.join(link_indices)
-            sorted_path[i]["node_index"] = alc[i].upper()
+            sorted_path[i]["node_index"] = alc[i]
        return sorted_path

    def perform_hop(self, hops, company_data=None):
        hop_history = []
        for hop in range(hops):
+            # check if previous hop completed, if any processed items then its still mid-processing:
            selected_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') == self.n, self.addresses))]
-            # selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address']
            selected_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') == self.n, self.company_ids))]
-            # selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id']
            selected_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') == self.n, self.officer_ids))]
-            # selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id']
            if not selected_addresses and not selected_companies and not selected_officers:
                print("Edge of network reached.")
                break
            else:
-                self.n += 1
-                hop_history.append(self.hop.__dict__)
-                # self.hop_history = self.hop_history.append(self.hop.__dict__, ignore_index=True)
                for i,address in enumerate(selected_addresses):
-                    self.hop.search_address(self, address, company_data)
+                    if address not in self.processed_addresses:
+                        self.hop.search_address(self, address, company_data)
+                        self.processed_addresses.append(address)
                    IPython.display.clear_output(wait=True)
                    print("Hop number: " + str(hop))
                    print("Processed " + str(i+1) + "/" + str(len(selected_addresses)) + " addresses.")
                for j,company in enumerate(selected_companies):
-                    self.hop.search_company_id(self,company)
+                    if company not in self.processed_companies:
+                        self.hop.search_company_id(self,company)
+                        self.processed_companies.append(company)
                    IPython.display.clear_output(wait=True)
                    print("Hop number: " + str(hop))
                    print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
                    print("Processed " + str(j+1) + "/" + str(len(selected_companies)) + " companies.")
                for k,officer in enumerate(selected_officers):
-                    self.hop.search_officer_id(self,officer)
+                    if officer not in self.processed_officers:
+                        self.hop.search_officer_id(self,officer)
+                        self.processed_officers.append(officer)
                    IPython.display.clear_output(wait=True)
                    print("Hop number: " + str(hop))
                    print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
                    print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.")
                    print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.")
-        self.hop_history.append(hop_history)
+                self.officer_ids = [i for n, i in enumerate(self.officer_ids) if i not in self.officer_ids[n + 1:]]
+                self.company_ids = [i for n, i in enumerate(self.company_ids) if i not in self.company_ids[n + 1:]]
+                self.maxsize_entities = [i for n, i in enumerate(self.maxsize_entities) if i not in self.maxsize_entities[n + 1:]]
+                self.addresses = [i for n, i in enumerate(self.addresses) if i not in self.addresses[n + 1:]]
+                self.address_history = [i for n, i in enumerate(self.address_history) if i not in self.address_history[n + 1:]]
+                self.companies = [i for n, i in enumerate(self.companies) if i not in self.companies[n + 1:]]
+                self.processed_officers = []
+                self.processed_companies = []
+                self.processed_addresses = []
+                self.n += 1
+                hop_history.append(self.hop.__dict__)
+            self.hop_history.extend(hop_history)

    class Hop:
        def __init__(self):
@@ -273,22 +332,23 @@ class Network:
                    officers = officers['items']
            network.node_type = "Company"
            network.node = company_id
-            lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
-            lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
+            lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
+            lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
            if officers:
                for officer in officers:
-                    if processing.normalise_address(officer['address']) not in lower_n_addresses:
-                        network.link_type = "Officer Corresponance Address"
-                        new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
-                        if new_address not in new_addresses:
-                            new_addresses.append(new_address)
-                        # network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
-                    if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
-                        network.link_type = "Officer"
-                        new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
-                        if new_officer not in new_officers:
-                            new_officers.append(new_officer)
-                        # network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
+                    if 'address' in officer:
+                        if processing.normalise_address(officer['address']) not in lower_n_addresses:
+                            network.link_type = "Officer Corresponance Address"
+                            new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                            if new_address not in new_addresses:
+                                new_addresses.append(new_address)
+                            # network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
+                        if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
+                            network.link_type = "Officer"
+                            new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                            if new_officer not in new_officers:
+                                new_officers.append(new_officer)
+                            # network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
            if self.get_psc_correspondance_address:
                psc = api.get_psc(company_id)
                if psc:
@@ -296,7 +356,7 @@ class Network:
                        if "address" in person:
                            network.link_type = "Person of Significant Control Address"
                            if processing.normalise_address(person['address']) not in lower_n_addresses:
-                                new_address = {'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                                new_address = {'address': processing.normalise_address(person['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
                                if new_address not in new_addresses:
                                    new_addresses.append(new_address)
            if self.get_company_address_history:
@@ -305,34 +365,35 @@ class Network:
                for address in address_history:
                    network.link_type = "Historic Address"
                    if address['address'] not in lower_n_addresses:
-                        new_address = {'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                        new_address = {'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
                        if new_address not in new_addresses:
-                            new_addresses.append(dict({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}))
+                            new_addresses.append(dict({'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}))
                        # network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
            network.addresses.extend(new_addresses)
            network.officer_ids.extend(new_officers)

+
        def search_officer_id(self, network, officer_id):
            new_addresses = []
            new_companies = []
            new_officers = []
            network.node_type = "Person"
            network.node = officer_id
-            lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
-            lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
-            lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))]
+            lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
+            lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
+            lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
            appointments = api.get_appointments(officer_id)
            if appointments:
                if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0):
                    for appointment in appointments['items']:
                        if processing.normalise_address(appointment['address']) not in lower_n_addresses:
                            network.link_type = "Appointment Address"
-                            new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                            new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
                            if new_address not in new_addresses:
                                new_addresses.append(new_address)
                        if appointment['appointed_to']['company_number'] not in lower_n_companies:
                            network.link_type = "Appointment"
-                            new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                            new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
                            if new_company not in new_companies:
                                new_companies.append(new_company)
                elif len(appointments['items']) > int(self.officer_appointments_maxsize):
@@ -342,7 +403,7 @@ class Network:
                if correspondance_address:
                    if processing.normalise_address(correspondance_address['items'][0]['address']) not in lower_n_addresses:
                        network.link_type = "Officer Corresponance Address"
-                        new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                        new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
                        if new_address not in new_addresses:
                            new_addresses.append(new_address)
            if self.get_officer_duplicates:
@@ -352,7 +413,7 @@ class Network:
                        for duplicate in duplicate_officers:
                            network.link_type = "Duplicate Officer"
                            if duplicate['links']['self'].split('/')[2] not in lower_n_officers:
-                                new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}
+                                new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n+1, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}
                                if new_officer not in new_officers:
                                    new_officers.append(new_officer)
                    elif len(duplicate_officers) > int(self.officer_duplicates_maxsize):
@@ -366,9 +427,9 @@ class Network:
            new_officers = []
            network.node_type = "Address"
            network.node = address
-            lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
-            lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
-            lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))]
+            lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
+            lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
+            lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
            if self.get_companies_at_address:
                companies = {}
                if company_data is not None:
@@ -376,25 +437,27 @@ class Network:
                else:
                    companies = api.get_companies_at_address(address)
                if companies:
-                    if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
-                        for company in companies['items']:
-                            network.link_type = "Company at Address"
-                            if company['company_number'] not in lower_n_companies:
-                                new_company = {'company_id': company['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
-                                if new_company not in new_companies:
-                                    new_companies.append(new_company)
-                    elif len(companies['items']) > int(self.companies_at_address_maxsize):
-                        network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])}))
+                    if 'items' in companies:
+                        if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
+                            for company in companies['items']:
+                                network.link_type = "Company at Address"
+                                if company['company_number'] not in lower_n_companies:
+                                    new_company = {'company_id': company['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                                    if new_company not in new_companies:
+                                        new_companies.append(new_company)
+                        elif len(companies['items']) > int(self.companies_at_address_maxsize):
+                            network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])}))
            if self.get_officers_at_address:
                officers = api.get_officers_at_address(address)
                if officers:
                    if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0):
                        for officer in officers:
-                            network.link_type = "Officer at Address"
-                            if officer['links']['self'].split('/')[2] not in lower_n_officers:
-                                new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
-                                if new_officer not in new_officers:
-                                    new_officers.append(new_officer)
+                            if 'links' and 'title' in officer:
+                                network.link_type = "Officer at Address"
+                                if officer['links']['self'].split('/')[2] not in lower_n_officers:
+                                    new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
+                                    if new_officer not in new_officers:
+                                        new_officers.append(new_officer)
                    elif len(officers) > int(self.officers_at_address_maxsize):
                        network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)}))
            network.officer_ids.extend(new_officers)
--- a/sugartrail/mapview.py
+++ b/sugartrail/mapview.py
@@ -2,7 +2,6 @@ from ipywidgets import HTML, Widget, Layout, Output, VBox, HBox, Textarea
 from ipyleaflet import Map, Marker, MarkerCluster, AwesomeIcon, AntPath, Popup
 from datetime import datetime
 import functools
-from string import ascii_lowercase as alc
 import math

 def build_map(network, clear_widget=True):
@@ -14,9 +13,10 @@ def build_map(network, clear_widget=True):
 def get_address_path(network, company_id):
    # company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
    company_address_history = list(filter(lambda d: d.get('company_number') == company_id, network.address_history))
+    company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
    address_path = []
-    for index, row in enumerate(company_address_history):
-        if 'lat' not in row or 'lon' not in row:
+    for index, row in enumerate(company_address_history_sorted):
+        if not row['lat'] or not row['lon']:
            pass
        else:
            address_path.insert(0,[row['lat'], row['lon']])
@@ -27,8 +27,14 @@ def locations_from_origin_path(path, network):
    for node in path:
        if node['type'] == 'Company':
            ###
-            last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0]
-            # last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1]
+            company_address_history = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))
+            company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
+            last_company_address_row = {}
+            for address_row in company_address_history_sorted:
+                if address_row['lat'] and address_row['lon']:
+                    last_company_address_row = address_row
+                    break
+            # last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0]
            lat = last_company_address_row['lat']
            lon = last_company_address_row['lon']
            if not lat or not lon:
@@ -102,28 +108,29 @@ def get_marker_data(network,address_trail, origin_trail, path_table):
    origin_trail=origin_trail
    ms = []
    for index, row in enumerate(network.address_history):
-        path = ""
-        locations_from_origin = ""
-        message = HTML()
-        marker_color = "green"
-        company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
-        # company = network.companies.loc[network.companies['company_number'] == row['company_number']]
-        company_name = company['company_name']
-        company_status = company['company_status']
-        if company_status == "active":
-            if row['end_date'] != None:
-                marker_color = "red"
-        else:
-            marker_color = "black"
-        address = row['address']
-        path = network.find_path(str(row['company_number']))
-        locations_from_origin = locations_from_origin_path(path, network)
-        message.value = str(company_name) + "<hr>" + str(address)
-        icon = AwesomeIcon(
-        marker_color=marker_color
-        )
-        address_path = get_address_path(network,str(row['company_number']))
-        marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
-        marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
-        ms.append(marker)
+        if row['lat'] and row['lon']:
+            path = ""
+            locations_from_origin = ""
+            message = HTML()
+            marker_color = "green"
+            company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
+            # company = network.companies.loc[network.companies['company_number'] == row['company_number']]
+            company_name = company['company_name']
+            company_status = company['company_status']
+            if company_status == "active":
+                if row['end_date']:
+                    marker_color = "red"
+            else:
+                marker_color = "black"
+            address = row['address']
+            path = network.find_path(str(row['company_number']))
+            locations_from_origin = locations_from_origin_path(path, network)
+            message.value = str(company_name) + "<hr>" + str(address)
+            icon = AwesomeIcon(
+            marker_color=marker_color
+            )
+            address_path = get_address_path(network,str(row['company_number']))
+            marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
+            marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
+            ms.append(marker)
    return ms
--- a/sugartrail/processing.py
+++ b/sugartrail/processing.py
@@ -1,6 +1,6 @@
 from sugartrail import api
 import requests
-# import pandas as pd
+import pandas as pd
 import random
 import urllib
 import regex as re
@@ -29,13 +29,6 @@ def get_companies_from_address_database(address, company_data):
    companies['registered_office_address'] = [{'address_line_1': row['address_line_1'], 'address_line_2': row['address_line_2'], 'locality': row['locality'], 'postal_code': row['postal_code'], 'country': row['country']} for i,row in companies.iterrows()]
    return companies.to_dict('records')

-# def load_company_data(company_data_filepath):
-#     try:
-#         company_data = pd.read_csv(company_data_filepath)
-#         return company_data
-#     except:
-#         return
-
 def get_nearby_postcode(postcode_string):
    url = "http://api.postcodes.io/postcodes/" + postcode_string[:-1] + "/autocomplete"
    response = requests.get(url).json()