Merge pull request #4 from ribenamaplesyrup/pandas2dict

Pandas2dict
This commit is contained in:
Sean
2023-01-06 21:41:08 +00:00
committed by GitHub
22 changed files with 402 additions and 242 deletions

3
.gitignore vendored
View File

@@ -48,3 +48,6 @@ coverage.xml
# Sphinx documentation
docs/_build/
# API Keys
config/config.py

Binary file not shown.

Before

Width:  |  Height:  |  Size: 89 KiB

After

Width:  |  Height:  |  Size: 86 KiB

BIN
assets/networks/.DS_Store vendored Normal file

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
config/__init__.py Normal file
View File

@@ -0,0 +1 @@
from . import config

View File

@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "f17ebdd2",
"metadata": {},
"outputs": [],
@@ -18,7 +18,8 @@
"from sugartrail import mapview, api, base\n",
"import ipywidgets as widgets\n",
"from IPython.display import display\n",
"import requests"
"import requests\n",
"import pandas as pd"
]
},
{
@@ -222,13 +223,13 @@
" accordion_data.selected_index=0\n",
" build_map_button.disabled = True\n",
" with tab.children[0]:\n",
" display(network.company_ids)\n",
" display(pd.DataFrame(network.company_ids))\n",
" with tab.children[1]:\n",
" display(network.addresses)\n",
" display(pd.DataFrame(network.addresses))\n",
" with tab.children[2]:\n",
" display(network.officer_ids)\n",
" display(pd.DataFrame(network.officer_ids))\n",
" with tab.children[3]:\n",
" display(network.companies) \n",
" display(pd.DataFrame(network.companies)) \n",
"\n",
"display(build_map_button, map_container)"
]

View File

@@ -37,6 +37,7 @@
"source": [
"from sugartrail import api, mapview, base\n",
"from ipywidgets import VBox, HBox\n",
"import pandas as pd\n",
"\n",
"api.basic_auth.username = \"\""
]
@@ -291,7 +292,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.company_ids"
"pd.DataFrame(network.company_ids)"
]
},
{
@@ -309,7 +310,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.officer_ids"
"pd.DataFrame(network.officer_ids)"
]
},
{
@@ -329,7 +330,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
"pd.DataFrame(network.addresses)"
]
},
{
@@ -357,7 +358,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.hop_history"
"pd.DataFrame(network.hop_history)"
]
},
{
@@ -411,7 +412,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.address_history"
"pd.DataFrame(network.address_history)"
]
},
{
@@ -421,7 +422,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.companies "
"pd.DataFrame(network.companies)"
]
},
{
@@ -475,9 +476,9 @@
"id": "fd5d9a0d",
"metadata": {},
"source": [
"Pick N Mix London Limited (E) is a 'company at address' for 3rd Floor 13 Charles Ii Street (C) which is a 'historic address' for Kingdom of Sweets Ltd (A).\n",
"Pick N Mix London Limited (e) is a 'company at address' for 3rd Floor 13 Charles Ii Street (c) which is a 'historic address' for Kingdom of Sweets Ltd (a).\n",
"\n",
"Additionally, Pick N Mix London Limited (D) is an appointment of (B) who is an officer of Kingdom of Sweets Ltd (A). "
"Additionally, Pick N Mix London Limited (d) is an appointment of (b) who is an officer of Kingdom of Sweets Ltd (a). "
]
},
{
@@ -493,7 +494,7 @@
"id": "a68e26ca",
"metadata": {},
"source": [
"The network object can be saved with 'pickle' and reloaded when needed:"
"The network object can be saved to `../assets/networks/` as json file:"
]
},
{
@@ -503,10 +504,15 @@
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n",
" pickle.dump(network, handle)"
"network.save('kingdom_of_sweets_network.json')"
]
},
{
"cell_type": "markdown",
"id": "7632e2a8",
"metadata": {},
"source": [
"We can load the network by creating a new network and passing the filename: "
]
},
{
@@ -516,8 +522,7 @@
"metadata": {},
"outputs": [],
"source": [
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n",
" network = pickle.load(handle)"
"network = base.Network(file='kingdom_of_sweets_network.json')"
]
}
],

View File

@@ -163,7 +163,7 @@
"id": "866bc18e",
"metadata": {},
"source": [
"Lets go big and perform 6 hops. It's likely to take some time to gather all the data +1 hour. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
"Lets go big and perform 6 hops. It's likely to take some time to gather all the data +2 hours. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
]
},
{
@@ -173,9 +173,7 @@
"metadata": {},
"outputs": [],
"source": [
"# import pickle\n",
"# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n",
"# western_crown_network = pickle.load(handle)"
"# western_crown_network = base.Network(file='western_crown_network.json')"
]
},
{
@@ -231,7 +229,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
"pd.DataFrame(western_crown_network.find_path('10289650'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
]
},
{

View File

@@ -63,7 +63,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
"pd.DataFrame(network.addresses)"
]
},
{
@@ -73,7 +73,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.addresses['address'].unique()"
"pd.DataFrame(network.addresses)['address'].unique()"
]
},
{
@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities"
"pd.DataFrame(network.maxsize_entities)"
]
},
{
@@ -99,7 +99,7 @@
"id": "5ad7b443",
"metadata": {},
"source": [
"Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
"Because we set a limit of 50 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
]
},
{
@@ -127,7 +127,7 @@
"metadata": {},
"outputs": [],
"source": [
"len(network.company_ids['company_id'].unique())"
"len(network.company_ids)"
]
},
{
@@ -146,7 +146,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities['node'][0]"
"network.maxsize_entities[2]['node']"
]
},
{
@@ -230,7 +230,7 @@
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.company_ids"
"pd.DataFrame(regent_street_network.company_ids)"
]
},
{
@@ -254,13 +254,11 @@
{
"cell_type": "code",
"execution_count": null,
"id": "38937142",
"id": "4b7616c7",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n",
" regent_street_network = pickle.load(handle)"
"# regent_street_network = base.Network(file='regent_street_network.json')"
]
},
{
@@ -278,7 +276,7 @@
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.officer_ids['name'].value_counts()"
"pd.DataFrame(regent_street_network.officer_ids)['name'].value_counts()"
]
},
{
@@ -333,7 +331,7 @@
"source": [
"shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
"shelton_street_network.perform_hop(1)\n",
"shelton_street_network.maxsize_entities"
"shelton_street_network.maxsize_entities[0]"
]
},
{
@@ -372,7 +370,7 @@
"metadata": {},
"outputs": [],
"source": [
"company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
"company_data = pd.read_csv(\"../assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
]
},
{
@@ -380,7 +378,7 @@
"id": "2273cf39",
"metadata": {},
"source": [
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address:"
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address (might take several minutes- can uncomment the cell below to load pre-made network):"
]
},
{
@@ -394,7 +392,17 @@
"shelton_street_network.hop.companies_at_address_maxsize = None\n",
"shelton_street_network.hop.officers_at_address_maxsize = None\n",
"shelton_street_network.get_officers_at_address = False\n",
"shelton_street_network.perform_hop(1, company_data= company_data)"
"shelton_street_network.perform_hop(1, company_data = company_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d34c9833",
"metadata": {},
"outputs": [],
"source": [
"# shelton_street_network = base.Network(file='shelton_street_network.json')"
]
},
{
@@ -412,7 +420,7 @@
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network.company_ids"
"len(shelton_street_network.company_ids)"
]
}
],

View File

@@ -35,10 +35,7 @@
"outputs": [],
"source": [
"# # network build from Domain Foundation, company_id = \"11951034\"\n",
"# import pickle\n",
"\n",
"# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n",
"# network = pickle.load(handle)"
"# network = base.Network(file='domain_corp_network.json')"
]
},
{

View File

@@ -1,9 +1,11 @@
import requests
import time
import os
import config
access_token = ""
username = access_token
# username = config.config.APIKEY
username = ""
password = ""
size = "5000"
basic_auth = requests.auth.HTTPBasicAuth(username, password)
@@ -17,25 +19,28 @@ def test():
return False
def make_request(url, input, input_type, response_type):
time.sleep(0.5)
try:
response = requests.get(url, auth=basic_auth)
response.raise_for_status()
# print("here")
if response.status_code == 200:
return response.json()
except requests.exceptions.RequestException as err:
# print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.HTTPError as errh:
# print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.ConnectionError as errc:
# print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.Timeout as errt:
# print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
if basic_auth.username:
time.sleep(0.5)
try:
response = requests.get(url, auth=basic_auth)
response.raise_for_status()
# print("here")
if response.status_code == 200:
return response.json()
except requests.exceptions.RequestException as err:
# print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.HTTPError as errh:
# print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.ConnectionError as errc:
# print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.Timeout as errt:
# print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
else:
print("Authentication required")
def get_company_officers(company_id):
url = "https://api.company-information.service.gov.uk/company/" + company_id + "/officers"
@@ -89,12 +94,13 @@ def get_companies_at_address(address):
def get_officers_at_address(address):
url = "https://api.company-information.service.gov.uk/search/officers?q=location:" + address
response = make_request(url, address, 'address', 'officers')
if 'items' in response:
officers = []
word_list = []
for word in address.replace(',','').split():
word_list.append(word)
for officer in response['items']:
if all(word in officer['address_snippet'] for word in word_list):
officers.append(officer)
return officers
if response:
if 'items' in response:
officers = []
word_list = []
for word in address.replace(',','').split():
word_list.append(word)
for officer in response['items']:
if all(word in officer['address_snippet'] for word in word_list):
officers.append(officer)
return officers

View File

@@ -1,31 +1,44 @@
from sugartrail import api
from sugartrail import processing
import pandas as pd
import IPython
import numpy as np
import math
import warnings
from string import ascii_lowercase as alc
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 150)
import json
from string import ascii_letters as alc
class Network:
def __init__(self, officer_id=None, company_id=None, address=None):
self.addresses = pd.DataFrame(columns=['address','lat','lon','n','link_type','node_type','node'])
self.officer_ids = pd.DataFrame(columns=['officer_id','name','n','link_type','node_type','node'])
self.company_ids = pd.DataFrame(columns=['company_id','n','link_type','node_type','node',])
self.companies = pd.DataFrame(columns=['company_number'])
self.address_history = pd.DataFrame(columns=['company_number', 'address', 'start_date', 'end_date', 'lat', 'lon'])
def __init__(self, officer_id=None, company_id=None, address=None, file=None):
self.addresses = []
self.officer_ids = []
self.company_ids = []
self.companies = []
self.address_history = []
self._officer_id = officer_id
self._company_id = company_id
self._address = address
self.n = 0
self.link_type = None
self.initialise_dataframe()
self.hop = self.Hop()
self.hop_history = pd.DataFrame()
self.maxsize_entities = pd.DataFrame(columns=['node','type', 'maxsize_type', 'size'])
self.hop_history = []
self.maxsize_entities = []
self.processed_officers = []
self.processed_companies = []
self.processed_addresses = []
self._file = self.load(file)
self.initialise()
@property
def file(self):
return self._file
@file.setter
def file(self, new_value):
self._file = new_value
self._officer_id = None
self._company_id = None
self._address_id = None
self.load(self._file)
@property
def officer_id(self):
@@ -36,7 +49,7 @@ class Network:
self._officer_id = new_value
self._company_id = None
self._address_id = None
self.initialise_dataframe()
self.initialise()
@property
def company_id(self):
@@ -47,10 +60,10 @@ class Network:
self._company_id = new_value
self._officer_id = None
self._address_id = None
self.initialise_dataframe()
self.initialise()
@property
def address(self):
def address(self, value):
return self._address
@address.setter
@@ -58,98 +71,143 @@ class Network:
self._address = new_value
self._company_id = None
self._officer_id = None
self.initialise_dataframe()
self.initialise()
def initialise_dataframe(self):
self.company_ids = self.company_ids.iloc[0:0]
self.officer_ids = self.officer_ids.iloc[0:0]
self.addresses = self.addresses.iloc[0:0]
if self._officer_id:
if api.get_appointments(self._officer_id):
self.officer_ids = self.officer_ids.append({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}, ignore_index=True)
elif self.company_id:
self.company_ids = self.company_ids.append({'company_id': self._company_id, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}, ignore_index=True)
company = api.get_company(self._company_id)
# company['n'] = self.n
company['link_type'] = self.link_type
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
elif self._address:
self.addresses = self.addresses.append({'address': self._address, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None,}, ignore_index=True)
else:
print("No input provided. Please provide either officer_id, company_id or address value as input.")
# change to initialise
def initialise(self):
if self.n < 1:
if self._officer_id:
if api.get_appointments(self._officer_id):
self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}))
else:
print(f"Officer with ID:{str(self._officer_id)} not found")
elif self._company_id:
self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
company = api.get_company(self._company_id)
self.companies.append(dict(processing.flatten(company)))
elif self._address:
self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
elif self.file:
pass
else:
print("No input provided. Please provide either officer_id, company_id or address value as input.")
def save(self, filename):
network_data = {k: v for k, v in self.__dict__.items() if k != 'hop' and k != 'file'}
saved_network = json.dumps(network_data)
f = open(f'../assets/networks/{filename}', 'w')
f.write(saved_network)
f.close
def load(self, filename):
if filename:
f = open(f'../assets/networks/{filename}')
network_data = json.load(f)
self.addresses = network_data['addresses']
self.officer_ids = network_data['officer_ids']
self.company_ids = network_data['company_ids']
self.companies = network_data['companies']
self.address_history = network_data['address_history']
self._officer_id = network_data['_officer_id']
self._company_id = network_data['_company_id']
self._address = network_data['_address']
self.n = network_data['n']
self.link_type = network_data['link_type']
self.hop_history = network_data['hop_history']
self.maxsize_entities = network_data['maxsize_entities']
self.processed_officers = network_data['processed_officers']
self.processed_companies = network_data['processed_companies']
self.processed_addresses = network_data['processed_addresses']
def add_company_names(self):
self.company_ids['name'] = ''
for i, row in self.company_ids.iterrows():
self.company_ids['name'][i] = self.companies.loc[self.companies['company_number'] == self.company_ids['company_id'][i]]['company_name'].unique()[0]
self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']]
for i, row in enumerate(self.company_ids):
company_details = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))
if company_details:
self.company_ids[i]['company_name'] = company_details[0]['company_name']
else:
company_details = api.get_company(row['company_id'])
if company_details:
if 'company_name' in company_details:
self.company_ids[i]['company_name'] = company_details['company_name']
def get_company_from_id(self, company_df=None, company_id=None, print_progress=True):
company_list = []
if company_id:
if company_id in self.company_ids['company_id'].unique():
if company_id in [company['company_id'] for company in self.company_ids]:
company_list = [company_id]
else:
print("add valid company id")
else:
company_list = self.company_ids['company_id'].unique()
company_list = [company['company_id'] for company in self.company_ids]
companies = []
for i, company_id in enumerate(company_list):
IPython.display.clear_output(wait=True)
if print_progress:
print("Processed " + str(i+1) + "/" + str(len(company_list)) + " companies.")
if company_id not in self.companies['company_number'].unique():
if company_id not in [company['company_number'] for company in self.companies]:
if company_df is not None:
try:
company = company_df[company_df[" CompanyNumber"] == str(company_id)]["CompanyName"].item()
if company:
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
# self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
companies.append(company)
except:
try:
company = api.get_company(company_id)
if company:
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
# self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
companies.append(company)
except:
print(f"Failed to get data for {company_id}")
else:
company = api.get_company(company_id)
if company:
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
# self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
companies.append(company)
# add companies to dataframe
self.companies.extend(companies)
def run_map_preprocessing(self):
self.get_company_from_id()
self.add_company_names()
self.get_coords()
self.address_history = [dict(t) for t in {tuple(d.items()) for d in self.address_history}]
return
def get_coords(self):
for i, row in self.addresses.iterrows():
for i, row in enumerate(self.addresses):
IPython.display.clear_output(wait=True)
print("Processed " + str(i+1) + "/" + str(len(self.addresses)) + " addresses.")
if row.isnull()['lat'] and row.isnull()['lon']:
if 'lat' not in row or 'lon' not in row:
coords = processing.get_coords_from_address(row['address'])
if coords:
self.addresses['lat'][i] = coords['lat']
self.addresses['lon'][i] = coords['lon']
self.addresses[i]['lat'] = coords['lat']
self.addresses[i]['lon'] = coords['lon']
historic_addresses = list(filter(lambda d: d.get('address') == row['address'], self.address_history))
for j, historic_address in enumerate(self.address_history):
if historic_address['address'] == row['address']:
self.address_history[j]['lon'] = coords['lon']
self.address_history[j]['lat'] = coords['lat']
else:
self.addresses[i]['lat'] = ""
self.addresses[i]['lon'] = ""
print("No coords found: " + row['address'])
historic_indices = self.address_history.index[self.address_history["address"]==row['address']].tolist()
for j in historic_indices:
self.address_history["lon"][j] = self.addresses['lon'][i]
self.address_history["lat"][j] = self.addresses['lat'][i]
def find_path(self, select_company):
network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company]
# network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company]
network_link_type_rows = list(filter(lambda d: d.get('company_id') == select_company, self.company_ids))
path = []
company_info = self.get_company_from_id(company_id=select_company, print_progress=False)
for i, row in network_link_type_rows.iterrows():
path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": self.companies[self.companies["company_number"] == select_company]['company_name'].item(), "node_type": row['link_type'], "link_id": row['node']})
for i, row in enumerate(network_link_type_rows):
path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": row['company_name'], "node_type": row['link_type'], "link_id": row['node']})
search_terms = [{'n': row['n']-1, 'node_type':row['node_type'], 'node':row['node']}]
for j in range(row['n']-1,-1,-1):
for term in search_terms:
if term['n'] == j:
if term['node_type'] == "Address":
select_rows = self.addresses.loc[(self.addresses['address'] == term['node']) & (self.addresses['n'] == j)]
for k, select_row in select_rows.iterrows():
###
select_rows = list(filter(lambda d: d.get('address') == term['node'] and d.get('n') == j, self.addresses))
for k, select_row in enumerate(select_rows):
if select_row['n'] == 0:
origin = {'hop': j, "type": "Address", "id": select_row['address'], "node": select_row['address'], "node_type": "", "link_id": ""}
if origin not in path:
@@ -161,22 +219,22 @@ class Network:
path.insert(0, item)
search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']})
elif term['node_type'] == "Company":
select_rows = self.company_ids.loc[(self.company_ids['company_id'] == term['node']) & (self.company_ids['n'] == j)]
for l, select_row in select_rows.iterrows():
select_rows = list(filter(lambda d: d.get('company_id') == term['node'] and d.get('n') == j, self.company_ids))
for l, select_row in enumerate(select_rows):
self.get_company_from_id(company_id=select_row['company_id'], print_progress=False)
if select_row['n'] == 0:
origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": "", "link_id": ""}
origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": "", "link_id": ""}
if origin not in path:
path.insert(0, origin)
break
else:
item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": select_row['link_type'], "link_id": select_row['node']}
item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": select_row['link_type'], "link_id": select_row['node']}
if item not in path:
path.insert(0, item)
search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']})
elif term['node_type'] == "Person":
select_rows = self.officer_ids.loc[(self.officer_ids['officer_id'] == term['node']) & (self.officer_ids['n'] == j)]
for m, select_row in select_rows.iterrows():
select_rows = list(filter(lambda d: d.get('officer_id') == term['node'] and d.get('n') == j, self.officer_ids))
for m, select_row in enumerate(select_rows):
if select_row['link_type'] == 0:
origin = {'hop': j, "type": "Person", "id": select_row["officer_id"], "node": select_row['name'], "node_type": "", "link_id": ""}
if origin not in path:
@@ -196,40 +254,58 @@ class Network:
link_indices = []
for j,item in enumerate(sorted_path):
if item['id'] == search_term:
link_indices.append(alc[j].upper())
link_indices.append(alc[j])
sorted_path[i]["link"] = ','.join(link_indices)
sorted_path[i]["node_index"] = alc[i].upper()
sorted_path[i]["node_index"] = alc[i]
return sorted_path
def perform_hop(self, hops, company_data=None):
hop_history = []
for hop in range(hops):
selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address']
selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id']
selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id']
if len(selected_addresses.index) == 0 and len(selected_companies.index) == 0 and len(selected_officers.index) == 0:
print("link_type of network reached.")
# check if previous hop completed, if any processed items then its still mid-processing:
selected_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') == self.n, self.addresses))]
selected_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') == self.n, self.company_ids))]
selected_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') == self.n, self.officer_ids))]
if not selected_addresses and not selected_companies and not selected_officers:
print("Edge of network reached.")
break
else:
self.n += 1
self.hop_history = self.hop_history.append(self.hop.__dict__, ignore_index=True)
for i,address in enumerate(selected_addresses):
self.hop.search_address(self, address, company_data)
if address not in self.processed_addresses:
self.hop.search_address(self, address, company_data)
self.processed_addresses.append(address)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop))
print("Processed " + str(i+1) + "/" + str(len(selected_addresses)) + " addresses.")
for j,company in enumerate(selected_companies):
self.hop.search_company_id(self,company)
if company not in self.processed_companies:
self.hop.search_company_id(self,company)
self.processed_companies.append(company)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop))
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(j+1) + "/" + str(len(selected_companies)) + " companies.")
for k,officer in enumerate(selected_officers):
self.hop.search_officer_id(self,officer)
if officer not in self.processed_officers:
self.hop.search_officer_id(self,officer)
self.processed_officers.append(officer)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop))
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.")
print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.")
self.officer_ids = [i for n, i in enumerate(self.officer_ids) if i not in self.officer_ids[n + 1:]]
self.company_ids = [i for n, i in enumerate(self.company_ids) if i not in self.company_ids[n + 1:]]
self.maxsize_entities = [i for n, i in enumerate(self.maxsize_entities) if i not in self.maxsize_entities[n + 1:]]
self.addresses = [i for n, i in enumerate(self.addresses) if i not in self.addresses[n + 1:]]
self.address_history = [i for n, i in enumerate(self.address_history) if i not in self.address_history[n + 1:]]
self.companies = [i for n, i in enumerate(self.companies) if i not in self.companies[n + 1:]]
self.processed_officers = []
self.processed_companies = []
self.processed_addresses = []
self.n += 1
hop_history.append(self.hop.__dict__)
self.hop_history.extend(hop_history)
class Hop:
def __init__(self):
@@ -248,104 +324,141 @@ class Network:
def search_company_id(self, network, company_id):
officers = []
new_addresses = []
new_officers = []
if self.get_company_officers:
officers = api.get_company_officers(company_id)
if officers:
officers = officers['items']
network.node_type = "Company"
network.node = company_id
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
if officers:
for officer in officers:
if processing.normalise_address(officer['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
network.link_type = "Officer Corresponance Address"
network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if officer['links']['officer']['appointments'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
network.link_type = "Officer"
network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if 'address' in officer:
if processing.normalise_address(officer['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
# network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
network.link_type = "Officer"
new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
# network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if self.get_psc_correspondance_address:
psc = api.get_psc(company_id)
if psc:
for person in psc['items']:
if "address" in person:
network.link_type = "Person of Significant Control Address"
if processing.normalise_address(person['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
network.addresses = network.addresses.append({'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if processing.normalise_address(person['address']) not in lower_n_addresses:
new_address = {'address': processing.normalise_address(person['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
if self.get_company_address_history:
address_history = processing.build_address_history(company_id)
network.address_history = network.address_history.append(address_history, ignore_index=True)
network.address_history.extend(address_history)
for address in address_history:
network.link_type = "Historic Address"
if address['address'] not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
network.address_history = network.address_history.drop_duplicates().reset_index(drop=True)
network.addresses = network.addresses.drop_duplicates().reset_index(drop=True)
network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
if address['address'] not in lower_n_addresses:
new_address = {'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(dict({'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}))
# network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
network.addresses.extend(new_addresses)
network.officer_ids.extend(new_officers)
def search_officer_id(self, network, officer_id):
new_addresses = []
new_companies = []
new_officers = []
network.node_type = "Person"
network.node = officer_id
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
appointments = api.get_appointments(officer_id)
if appointments:
if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0):
for appointment in appointments['items']:
if processing.normalise_address(appointment['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
if processing.normalise_address(appointment['address']) not in lower_n_addresses:
network.link_type = "Appointment Address"
network.addresses = network.addresses.append({'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if appointment['appointed_to']['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique():
new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
if appointment['appointed_to']['company_number'] not in lower_n_companies:
network.link_type = "Appointment"
network.company_ids = network.company_ids.append({'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(appointments['items']) > int(self.officer_appointments_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])}, ignore_index=True)
network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])}))
if self.get_officer_correspondance_address:
correspondance_address = api.get_correspondance_address(officer_id)
if correspondance_address:
if processing.normalise_address(correspondance_address['items'][0]['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
if processing.normalise_address(correspondance_address['items'][0]['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
network.addresses = network.addresses.append({'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
if self.get_officer_duplicates:
duplicate_officers = api.get_duplicate_officers(officer_id)
if duplicate_officers:
if self.officer_duplicates_maxsize == None or len(duplicate_officers) < int(self.officer_duplicates_maxsize or 0):
for duplicate in duplicate_officers:
network.link_type = "Duplicate Officer"
if duplicate['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
network.officer_ids = network.officer_ids.append({'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if duplicate['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n+1, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
elif len(duplicate_officers) > int(self.officer_duplicates_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)}, ignore_index=True)
network.addresses = network.addresses.drop_duplicates().reset_index(drop=True)
network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True)
network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)}))
network.addresses.extend(new_addresses)
network.officer_ids.extend(new_officers)
network.company_ids.extend(new_companies)
def search_address(self, network, address, company_data):
new_companies = []
new_officers = []
network.node_type = "Address"
network.node = address
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
if self.get_companies_at_address:
# database method here:
companies = {}
if company_data is not None:
companies['items'] = processing.get_companies_from_address_database(address, company_data)
else:
companies = api.get_companies_at_address(address)
if companies:
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
company_ids = []
for company in companies['items']:
network.link_type = "Company at Address"
if company['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique():
company_ids.append({'company_id': company['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node})
network.company_ids = network.company_ids.append(company_ids, ignore_index=True)
elif len(companies['items']) > int(self.companies_at_address_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])},ignore_index=True)
if 'items' in companies:
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
for company in companies['items']:
network.link_type = "Company at Address"
if company['company_number'] not in lower_n_companies:
new_company = {'company_id': company['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(companies['items']) > int(self.companies_at_address_maxsize):
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])}))
if self.get_officers_at_address:
officers = api.get_officers_at_address(address)
if officers:
if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0):
for officer in officers:
network.link_type = "Officer at Address"
if officer['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if 'links' and 'title' in officer:
network.link_type = "Officer at Address"
if officer['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
elif len(officers) > int(self.officers_at_address_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)},ignore_index=True)
network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True)
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)}))
network.officer_ids.extend(new_officers)
network.company_ids.extend(new_companies)

View File

@@ -1,9 +1,7 @@
from ipywidgets import HTML, Widget, Layout, Output, VBox, HBox, Textarea
from ipyleaflet import Map, Marker, MarkerCluster, AwesomeIcon, AntPath, Popup
import pandas as pd
from datetime import datetime
import functools
from string import ascii_lowercase as alc
import math
def build_map(network, clear_widget=True):
@@ -13,10 +11,12 @@ def build_map(network, clear_widget=True):
return m, path_table
def get_address_path(network, company_id):
company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
# company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
company_address_history = list(filter(lambda d: d.get('company_number') == company_id, network.address_history))
company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
address_path = []
for index, row in company_address_history.iterrows():
if math.isnan(float(row['lat'])) or math.isnan(float(row['lon'])):
for index, row in enumerate(company_address_history_sorted):
if not row['lat'] or not row['lon']:
pass
else:
address_path.insert(0,[row['lat'], row['lon']])
@@ -26,21 +26,30 @@ def locations_from_origin_path(path, network):
locations = []
for node in path:
if node['type'] == 'Company':
last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1]
lat = last_company_address_row['lat'].item()
lon = last_company_address_row['lon'].item()
if math.isnan(float(lat)):
###
company_address_history = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))
company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
last_company_address_row = {}
for address_row in company_address_history_sorted:
if address_row['lat'] and address_row['lon']:
last_company_address_row = address_row
break
# last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0]
lat = last_company_address_row['lat']
lon = last_company_address_row['lon']
if not lat or not lon:
pass
else:
locations.append([float(lat),float(lon)])
locations.append([lat,lon])
elif node['type'] == 'Address':
address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1]
lat = address_row['lat'].item()
lon = address_row['lon'].item()
if math.isnan(float(lat)) or math.isnan(float(lon)):
address_row = list(filter(lambda d: d.get('address') == node['node'], network.addresses))[0]
# address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1]
lat = address_row['lat']
lon = address_row['lon']
if not lat or not lon:
pass
else:
locations.append([float(lat),float(lon)])
locations.append([lat,lon])
return locations
def on_button_clicked(address_path, path, location, address_trail, path_table, origin_trail, locations_from_origin, **kwargs):
@@ -98,28 +107,30 @@ def get_marker_data(network,address_trail, origin_trail, path_table):
address_trail=address_trail
origin_trail=origin_trail
ms = []
for index, row in network.address_history.iterrows():
path = ""
locations_from_origin = ""
message = HTML()
marker_color = "green"
company = network.companies.loc[network.companies['company_number'] == row['company_number']]
company_name = company['company_name'].item()
company_status = company['company_status'].item()
if company_status == "active":
if row['end_date'] != None:
marker_color = "red"
else:
marker_color = "black"
address = row['address']
path = network.find_path(str(row['company_number']))
locations_from_origin = locations_from_origin_path(path, network)
message.value = str(company_name) + "<hr>" + str(address)
icon = AwesomeIcon(
marker_color=marker_color
)
address_path = get_address_path(network,str(row['company_number']))
marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
ms.append(marker)
for index, row in enumerate(network.address_history):
if row['lat'] and row['lon']:
path = ""
locations_from_origin = ""
message = HTML()
marker_color = "green"
company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
# company = network.companies.loc[network.companies['company_number'] == row['company_number']]
company_name = company['company_name']
company_status = company['company_status']
if company_status == "active":
if row['end_date']:
marker_color = "red"
else:
marker_color = "black"
address = row['address']
path = network.find_path(str(row['company_number']))
locations_from_origin = locations_from_origin_path(path, network)
message.value = str(company_name) + "<hr>" + str(address)
icon = AwesomeIcon(
marker_color=marker_color
)
address_path = get_address_path(network,str(row['company_number']))
marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
ms.append(marker)
return ms

View File

@@ -4,6 +4,17 @@ import pandas as pd
import random
import urllib
import regex as re
import collections
def flatten(d, parent_key='', sep='.'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
def infer_postcode(address_string):
postcode = re.findall(r'\b[A-Z]{1,2}[0-9][A-Z0-9]? [0-9][ABD-HJLNP-UW-Z]{2}\b', address_string)
@@ -18,13 +29,6 @@ def get_companies_from_address_database(address, company_data):
companies['registered_office_address'] = [{'address_line_1': row['address_line_1'], 'address_line_2': row['address_line_2'], 'locality': row['locality'], 'postal_code': row['postal_code'], 'country': row['country']} for i,row in companies.iterrows()]
return companies.to_dict('records')
def load_company_data(company_data_filepath):
try:
company_data = pd.read_csv(company_data_filepath)
return company_data
except:
return
def get_nearby_postcode(postcode_string):
url = "http://api.postcodes.io/postcodes/" + postcode_string[:-1] + "/autocomplete"
response = requests.get(url).json()
@@ -89,6 +93,8 @@ def build_address_history(company_id):
addresses = []
entry = {}
entry["company_number"] = str(company_id)
entry["lat"] = ""
entry["lon"] = ""
entry["address"] = str(normalise_address(company_info_subset['registered_office_address']))
entry["start_date"] = str(address_changes['items'][0]['date'])
if 'date_of_cessation' in company_info_subset:
@@ -98,6 +104,8 @@ def build_address_history(company_id):
addresses.append(entry)
for i,change in enumerate(address_changes['items']):
entry = {}
entry["lat"] = ""
entry["lon"] = ""
entry["company_number"] = str(company_id)
if 'old_address' in change['description_values']:
entry["address"] = change['description_values']['old_address']
@@ -120,6 +128,8 @@ def build_address_history(company_id):
entry[address_keys[k]] = None
entry["company_number"] = str(company_id)
entry['address'] = normalise_address(entry['address'])
entry["lat"] = ""
entry["lon"] = ""
return [entry]
else:
address_history = []
@@ -131,6 +141,8 @@ def build_address_history(company_id):
entry[address_keys[k]] = None
entry["company_number"] = str(company_id)
entry['address'] = normalise_address(entry['address'])
entry["lat"] = ""
entry["lon"] = ""
return [entry]
else:
return []