Merge pull request #11 from ribenamaplesyrup/graph

implemented graph
This commit is contained in:
Sean
2023-01-29 10:08:05 +00:00
committed by GitHub
15 changed files with 588 additions and 618 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.9 MiB

View File

@@ -402,10 +402,10 @@
"def generate_download_zip(company_text, network):\n",
" Path(str(company_text.value)).mkdir(parents=True, exist_ok=True)\n",
" df_company_ids = pd.DataFrame(network.company_ids)\n",
" df_companies = pd.DataFrame(network.companies)\n",
" df_company_records = pd.DataFrame(network.company_records)\n",
" df_addresses = pd.DataFrame(network.addresses)\n",
" df_officer_ids = pd.DataFrame(network.officer_ids)\n",
" files = {'companies': df_company_ids, 'addresses': df_addresses, 'officers': df_officer_ids, 'company_details': df_companies}\n",
" files = {'companies': df_company_ids, 'addresses': df_addresses, 'officers': df_officer_ids, 'company_details': df_company_records}\n",
" for key in files:\n",
" files[key].to_csv(str(company_text.value) + '/' + key + '.csv')\n",
" file = str(company_text.value) + '.json'\n",
@@ -429,7 +429,7 @@
" with tab.children[2]:\n",
" display(pd.DataFrame(network.officer_ids))\n",
" with tab.children[3]:\n",
" display(pd.DataFrame(network.companies))\n",
" display(pd.DataFrame(network.company_records))\n",
" zip_filename = generate_download_zip(company_text, network)\n",
" html_button = html_buttons.format(filename=zip_filename)\n",
" with download_link:\n",

View File

@@ -162,7 +162,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e12f5461",
"id": "11b129ca",
"metadata": {},
"outputs": [],
"source": [
@@ -174,7 +174,7 @@
"id": "91c14cbb",
"metadata": {},
"source": [
"Each company is represented by its unique ID (`company_id`), number of hops from the origin company (`n`) and the company, address or person it connects to. As we've only saved the origin company so far, there isn't any information on links or connected nodes. There are also attributes for storing officer ids (`officer_ids`) and (`addresses`) although they have no information in them yet:"
"Each company is represented by its unique ID (`company_id`), name (`title`), number of hops from the origin company (`depth`) and the company, address or person it connects to. As we've only saved the origin company so far, there isn't any information on links or connected nodes. There are also attributes for storing officer ids (`officer_ids`) and (`addresses`) although they have no information in them yet:"
]
},
{
@@ -292,7 +292,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(network.company_ids)"
"network.company_ids"
]
},
{
@@ -310,7 +310,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(network.officer_ids)"
"network.officer_ids"
]
},
{
@@ -329,6 +329,24 @@
"id": "7083402a",
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
]
},
{
"cell_type": "markdown",
"id": "eb8b7408",
"metadata": {},
"source": [
"We can load multiple results into a DataFrame for better readability:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9240d709",
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(network.addresses)"
]
@@ -348,7 +366,7 @@
"id": "b4828d92",
"metadata": {},
"source": [
"For reproducibility, each time we perform a hop, the methods and limit configs are stored in "
"For reproducibility, each time we perform a hop, the methods and limit configs are stored in `hop_history` which we can view through `print_hop_history`:"
]
},
{
@@ -358,7 +376,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(network.hop_history)"
"network.hop_history"
]
},
{
@@ -402,7 +420,7 @@
"id": "dfa1b90c",
"metadata": {},
"source": [
"To see the information added, we can check out `address_history` and `companies` properties of our class:"
"To see the information added, we can check out `address_history` and `companies`:"
]
},
{
@@ -422,7 +440,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(network.companies)"
"pd.DataFrame(network.company_records)"
]
},
{

View File

@@ -80,7 +80,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "9c8ebc89",
"metadata": {},
"outputs": [],
@@ -168,7 +168,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "df617fda",
"metadata": {},
"outputs": [],
@@ -198,28 +198,12 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "7bdde00f",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d68aba9065b4429e9852696d97be010d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Map(center=[50, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title',…"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# generate map\n",
"map_data,path_table = mapview.build_map(western_crown_network) \n",
@@ -245,7 +229,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(western_crown_network.find_path('10289650'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
"pd.DataFrame(western_crown_network.find_path('10540083'))[['node_index', 'title', 'id', 'depth', 'node_type', 'link_type', 'link']]"
]
},
{
@@ -290,7 +274,7 @@
"metadata": {},
"source": [
"*Active Companies*\n",
"- [CANDY TOWN LTD](https://find-and-update.company-information.service.gov.uk/company/11464159) (1464159)\n",
"- [CANDY TOWN LTD](https://find-and-update.company-information.service.gov.uk/company/11464159) (11464159)\n",
"- [ESPANZA LIMITED](https://find-and-update.company-information.service.gov.uk/company/11474248) (11474248)\n",
"\n",
"*Dissolved Companies*\n",

View File

@@ -45,7 +45,7 @@
"source": [
"officer_id = \"Nd2URspq4bvLy-hwzDZ0_p7FGJw\"\n",
"network = base.Network(officer_id=officer_id)\n",
"network.perform_hop(2)"
"network.perform_hop(3)"
]
},
{
@@ -73,7 +73,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(network.addresses)['address'].unique()"
"pd.DataFrame(network.addresses)['title'].unique()"
]
},
{
@@ -139,16 +139,6 @@
"Although lets pause to briefly explore what address would have thousands of companies registered there?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bb8bdf1",
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities[2]['node']"
]
},
{
"cell_type": "markdown",
"id": "e8644d6b",
@@ -276,7 +266,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(regent_street_network.officer_ids)['name'].value_counts()"
"pd.DataFrame(regent_street_network.officer_ids)['title'].value_counts()"
]
},
{

View File

@@ -2,3 +2,4 @@ from . import api
from . import base
from . import processing
from . import mapview
from . import hop

View File

@@ -2,24 +2,21 @@ import sugartrail
import IPython
import json
import functools
from string import ascii_letters as alc
import pandas as pd
class Network:
"""Class represents a network of connected companies, officers and
addresses. Class contains methods to build network of user defined size from
a single seed company, officer or address."""
def __init__(self, officer_id=None, company_id=None, address=None, file=None):
self.addresses = []
self.officer_ids = []
self.company_ids = []
self.companies = []
self.graph = {}
self.company_records = []
self.address_history = []
self._officer_id = officer_id
self._company_id = company_id
self._address = address
self.n = 0
self.link_type = None
self.hop = self.Hop()
self.hop = sugartrail.hop.Hop()
self.hop_history = []
self.maxsize_entities = []
self.processed_officers = []
@@ -32,10 +29,8 @@ class Network:
"""Resets the class attributes to pre-init state."""
@functools.wraps(func)
def wrapper_clear(*args, **kwargs):
args[0].addresses = []
args[0].officer_ids = []
args[0].company_ids = []
args[0].companies = []
args[0].graph = {}
args[0].company_records = []
args[0].address_history = []
args[0]._officer_id = None
args[0]._company_id = None
@@ -62,18 +57,43 @@ class Network:
officer_info = sugartrail.api.get_appointments(new_value)
if officer_info:
self._officer_id = new_value
self.officer_ids = [{
'officer_id': new_value,
'name': officer_info['items'][0]['name'],
'n':self.n,
'link_type': None,
'node_type': None,
'node': None
}]
self.graph = {
new_value: {
'title': officer_info['items'][0]['name'],
'depth':self.n,
'node_type': "Person",
'arcs': []
}
}
else:
print(f"Officer with ID:{str(new_value)} not found")
self._officer_id = None
@property
def officer_ids(self):
"""Get all officers from graph."""
officer_ids = {k: v for k, v in self.graph.items() if v['node_type'] == 'Person'}
officer_table = []
for officer_id, officer_data in officer_ids.items():
officer = {
"officer_id": officer_id,
"title": officer_data['title'],
"depth": officer_data['depth'],
"title": officer_data['title'],
'link_type': '',
'link': ''
}
if not officer_data['arcs']:
officer_table.append(officer)
else:
for arc in officer_data['arcs']:
officer.update({
'link_type': arc['arc_type'],
'link': arc['start_node']
})
officer_table.append(officer)
return officer_table
@property
def company_id(self):
"""company_id property representing seed company."""
@@ -86,18 +106,43 @@ class Network:
company_info = sugartrail.api.get_company(new_value)
if company_info:
self._company_id = new_value
self.company_ids = [{
'company_id': self._company_id,
'n':self.n,
'link_type': '',
'node_type': '',
'node': ''
}]
self.companies = [dict(sugartrail.processing.flatten(company_info))]
self.graph = {
new_value: {
'title': company_info['company_name'],
'depth':self.n,
'node_type': "Company",
'arcs': []
}
}
# self.companies = [dict(sugartrail.processing.flatten(company_info))]
else:
print(f"Company with ID:{str(new_value)} not found")
self._company_id = None
@property
def company_ids(self):
company_ids = {k: v for k, v in self.graph.items() if v['node_type'] == 'Company'}
company_table = []
for company_id, company_data in company_ids.items():
company = {
"company_id": company_id,
"title": company_data['title'],
"depth": company_data['depth'],
"title": company_data['title'],
'link_type': '',
'link': ''
}
if not company_data['arcs']:
company_table.append(company)
else:
for arc in company_data['arcs']:
company.update({
'link_type': arc['arc_type'],
'link': arc['start_node']
})
company_table.append(company)
return company_table
@property
def address(self, value):
"""address property representing seed address."""
@@ -108,13 +153,38 @@ class Network:
def address(self, new_value):
"""address setter."""
self._address = new_value
self.addresses = [dict({
'address': self._address,
'n':self.n,
'link_type': '',
'node_type': '',
'node': ''
})]
self.graph = {
new_value: {
'title': new_value,
'depth':self.n,
'node_type': "Address",
'arcs': []
}
}
@property
def addresses(self):
addresses = {k: v for k, v in self.graph.items() if v['node_type'] == 'Address'}
address_table = []
for address_string, address_data in addresses.items():
address = {
"address": address_string,
"title": address_data['title'],
"depth": address_data['depth'],
"title": address_data['title'],
'link_type': '',
'link': ''
}
if not address_data['arcs']:
address_table.append(address)
else:
for arc in address_data['arcs']:
address.update({
'link_type': arc['arc_type'],
'link': arc['start_node']
})
address_table.append(address)
return address_table
@property
def file(self):
@@ -155,10 +225,8 @@ class Network:
if filename:
f = open(f'../assets/networks/{filename}')
network_data = json.load(f)
self.addresses = network_data['addresses']
self.officer_ids = network_data['officer_ids']
self.company_ids = network_data['company_ids']
self.companies = network_data['companies']
self.graph = network_data['graph']
self.company_records = network_data['company_records']
self.address_history = network_data['address_history']
self._officer_id = network_data['_officer_id']
self._company_id = network_data['_company_id']
@@ -174,233 +242,157 @@ class Network:
def run_map_preprocessing(self):
"""Gets missing/additional information on companies and addresses required for
mapping them. This includes address histories, company records and coordinates."""
self.get_address_histories()
self.get_network_edge_address_histories()
self.get_company_records_from_id()
self.get_coords()
return
def get_address_histories(self):
"""Gets missing address histories for companies at the edge of the network."""
historic_address_company_ids = list(dict.fromkeys([company['company_number'] for company in self.address_history]))
for i, company in enumerate(self.company_ids):
IPython.display.clear_output(wait=True)
print("Updated " + str(i+1) + "/" + str(len(self.company_ids)) + " company addresses.")
# if company is at the edge of the network:
# if historic address not in
if company['company_id'] not in historic_address_company_ids:
historic_address_company_ids.append(company['company_id'])
address_history = sugartrail.processing.build_address_history(company['company_id'])
historic_addresses = []
for historic_address in address_history:
if historic_address not in self.address_history:
historic_addresses.append(historic_address)
self.address_history.extend(historic_addresses)
def get_company_records_from_id(self, company_df=None, print_progress=True):
"""Gets company records for all company IDs in the network. Additionally
enriches company_ids with company names for improved readability."""
company_list = [company['company_id'] for company in self.company_ids]
companies = []
"""Gets company records for all company IDs in the network."""
company_list = [item for item in self.graph.keys() if self.graph[item]['node_type'] == 'Company']
company_records = []
for i, company_id in enumerate(company_list):
IPython.display.clear_output(wait=True)
if print_progress:
print("Processed " + str(i+1) + "/" + str(len(company_list)) + " companies.")
if company_id not in [company['company_number'] for company in self.companies]:
# if using local Companies House data
if company_id not in [company['company_number'] for company in self.company_records]:
if company_df is not None:
try:
company = company_df[company_df[" CompanyNumber"] == str(company_id)]["CompanyName"].item()
if company:
companies.append(company)
company_records.append(company)
except:
try:
company = sugartrail.api.get_company(company_id)
if company:
companies.append(company)
company_records.append(company)
except:
print(f"Failed to get data for {company_id}")
# otherwise uses API
else:
company = sugartrail.api.get_company(company_id)
if company:
companies.append(company)
# update company_ids with company name
self.company_ids[i]['company_name'] = company['company_name']
else:
self.company_ids[i]['company_name'] = list(filter(lambda d: d.get('company_number') == company_id, self.companies))[0]['company_name']
self.companies.extend(companies)
company_records.append(company)
self.company_records.extend(company_records)
def get_network_edge_address_histories(self):
"""Gets missing address histories for companies at the edge of the network."""
if self.hop.get_company_address_history:
network_edge_companies = []
for item in self.graph.keys():
if self.graph[item]['depth'] == self.n and self.graph[item]['node_type'] == 'Company':
network_edge_companies.append(item)
for i, company in enumerate(network_edge_companies):
IPython.display.clear_output(wait=True)
print("Processed " + str(i+1) + "/" + str(len(network_edge_companies)) + " company addresses.")
# get company address history
address_history = sugartrail.processing.build_address_history(company)
if address_history:
# self.address_history.extend(address_history)
for address in address_history:
if 'address' in address:
self.address_history.append(address)
new_address = address['address']
if new_address not in self.graph:
self.graph[new_address] = {
'depth': self.n+1,
'title': new_address,
'node_type': "Address",
'arcs': []
}
arc = {
'arc_type': "Historic Address",
'start_node': company
}
if arc not in self.graph[new_address]['arcs'] and self.graph[new_address]['depth'] == self.n+1:
self.graph[new_address]['arcs'].append(arc)
def get_coords(self):
"""Gets coordinates for each address in addresses and address_history."""
for i, row in enumerate(self.addresses):
address_coords = {}
for i, address in enumerate(self.address_history):
IPython.display.clear_output(wait=True)
print("Processed " + str(i+1) + "/" + str(len(self.addresses)) + " addresses.")
if 'lat' not in row or 'lon' not in row:
coords = sugartrail.processing.get_coords_from_address(row['address'])
print("Processed " + str(i+1) + "/" + str(len(self.address_history)) + " addresses.")
if address['address'] not in address_coords:
coords = sugartrail.processing.get_coords_from_address(address['address'])
if coords:
self.addresses[i]['lat'] = coords['lat']
self.addresses[i]['lon'] = coords['lon']
historic_addresses = list(filter(lambda d: d.get('address') == row['address'], self.address_history))
for j, historic_address in enumerate(self.address_history):
if historic_address['address'] == row['address']:
self.address_history[j]['lon'] = coords['lon']
self.address_history[j]['lat'] = coords['lat']
address_coords[address['address']] = {'lat': coords['lat'], 'lon': coords['lon']}
else:
# no coords found
self.addresses[i]['lat'] = ""
self.addresses[i]['lon'] = ""
address_coords[address['address']] = {'lat': '', 'lon': ''}
self.address_history[i]['lat'] = address_coords[address['address']]['lat']
self.address_history[i]['lon'] = address_coords[address['address']]['lon']
self.graph[address['address']]['lat'] = address_coords[address['address']]['lat']
self.graph[address['address']]['lon'] = address_coords[address['address']]['lon']
def find_path(self, select_company):
def find_path(self, company_id):
"""Finds path from 'select_company' to origin company'."""
# retrieve rows containing selected company:
network_link_type_rows = list(filter(lambda d: d.get('company_id') == select_company, self.company_ids))
path = []
# iterate through each path from selected company to seed company:
for i, row in enumerate(network_link_type_rows):
# insert end of path node:
path.insert(0, {
'hop': row['n'],
"type": "Company",
"id": select_company,
"node": row['company_name'],
"node_type": row['link_type'],
"link_id": row['node']
})
# define search terms for locating connected nodes:
search_terms = [{
'n': row['n']-1,
'node_type':row['node_type'],
'node':row['node']
}]
# iterate through degrees of seperation till origin is reached:
for j in range(row['n']-1,-1,-1):
for term in search_terms:
if term['n'] == j:
if term['node_type'] == "Address":
select_rows = list(filter(lambda d: d.get('address') == term['node'] and d.get('n') == j, self.addresses))
for k, select_row in enumerate(select_rows):
if select_row['n'] == 0:
origin = {
'hop': j,
"type": "Address",
"id": select_row['address'],
"node": select_row['address'],
"node_type": "",
"link_id": ""
}
if origin not in path:
path.insert(0, origin)
break
else:
item = {
'hop': j,
"type": "Address",
"id": select_row['address'],
"node": select_row['address'],
"node_type": select_row['link_type'],
"link_id": select_row['node']
}
if item not in path:
path.insert(0, item)
search_terms.append({
'n': j-1,
'node_type':select_row['node_type'],
'node':select_row['node']
})
elif term['node_type'] == "Company":
select_rows = list(filter(lambda d: d.get('company_id') == term['node'] and d.get('n') == j, self.company_ids))
for l, select_row in enumerate(select_rows):
if select_row['n'] == 0:
origin = {
'hop': j,
"type": "Company",
"id": select_row['company_id'],
"node": select_row['company_name'],
"node_type": "",
"link_id": ""
}
if origin not in path:
path.insert(0, origin)
break
else:
item = {
'hop': j,
"type": "Company",
"id": select_row['company_id'],
"node": select_row['company_name'],
"node_type": select_row['link_type'],
"link_id": select_row['node']
}
if item not in path:
path.insert(0, item)
search_terms.append({
'n': j-1,
'node_type':select_row['node_type'],
'node':select_row['node']
})
elif term['node_type'] == "Person":
select_rows = list(filter(lambda d: d.get('officer_id') == term['node'] and d.get('n') == j, self.officer_ids))
for m, select_row in enumerate(select_rows):
if select_row['link_type'] == 0:
origin = {
'hop': j,
"type": "Person",
"id": select_row["officer_id"],
"node": select_row['name'],
"node_type": "",
"link_id": ""
}
if origin not in path:
path.insert(0, origin)
break
else:
item = {
'hop': j,
"type": "Person",
"id": select_row["officer_id"],
"node": str(select_row['name']),
"node_type": str(select_row['link_type']),
"link_id": select_row['node']
}
if item not in path:
path.insert(0, item)
search_terms.append({
'n': j-1,
'node_type':select_row['node_type'],
'node':select_row['node']
})
else:
print(f"{row['node_type']} is invalid node_type")
break
sorted_path = sorted(path, key=lambda d: d['hop'])
# add letter correspondance for readability
for i in range(len(sorted_path)-1,-1,-1):
search_term = sorted_path[i]['link_id']
link_indices = []
for j,item in enumerate(sorted_path):
if item['id'] == search_term:
link_indices.append(alc[j])
sorted_path[i]["link"] = ','.join(link_indices)
sorted_path[i]["node_index"] = alc[i]
return sorted_path
end_node = dict(self.graph[company_id])
if not end_node['arcs']:
# start_node selected
end_node.update({
'id': company_id,
'link_type': '',
'link': ''
})
path.append(dict((k, end_node[k]) for k in ('title', 'depth', 'node_type', 'id', 'link', 'link_type')))
else:
# work back from the end node to the start node
for arc in end_node['arcs']:
connection = dict((k, end_node[k]) for k in ('title', 'depth', 'node_type'))
connection.update({
'id': company_id,
'link_type': arc['arc_type'],
'link': arc['start_node']
})
path.append(connection)
for connection in path:
id = connection['link']
node = dict(self.graph[id])
if node['arcs']:
for arc in node['arcs']:
connection = dict((k, node[k]) for k in ('title', 'depth', 'node_type'))
connection.update({
'id': id,
'link_type': arc['arc_type'],
'link': arc['start_node']
})
if connection not in path:
path.append(connection)
else:
start_node = dict((k, node[k]) for k in ('title', 'depth', 'node_type'))
start_node.update({
'id': id,
'link_type': '',
'link': ''
})
path.append(start_node)
break
path.reverse()
path = sugartrail.processing.condense_path(path)
path = sugartrail.processing.asciiify_path(path)
return path
def perform_hop(self, hops, company_data=None):
"""Gets companies, officers and addresses within n-degrees of seperation
from current nodes, where n is the number of hops."""
hop_history = []
for hop in range(hops):
# select the nodes for which the method will retrieve other nodes
# 1-degree of seperation from:
selected_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') == self.n, self.addresses))]
selected_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') == self.n, self.company_ids))]
selected_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') == self.n, self.officer_ids))]
# retrieve addresses, companies and officers at edge of network
selected_addresses, selected_companies, selected_officers = [], [], []
for k in self.graph.keys():
if self.graph[k]['depth'] == self.n:
if self.graph[k]['node_type'] == 'Address':
selected_addresses.append(k)
elif self.graph[k]['node_type'] == 'Person':
selected_officers.append(k)
elif self.graph[k]['node_type'] == 'Company':
selected_companies.append(k)
if not selected_addresses and not selected_companies and not selected_officers:
print("Edge of network reached.")
break
# get new addresses, companies and officers connected to selected
else:
for i,address in enumerate(selected_addresses):
# in-case method was run previously and failed to complete,
# check if address was previously processed:
if address not in self.processed_addresses:
self.hop.search_address(self, address, company_data)
self.processed_addresses.append(address)
@@ -408,8 +400,6 @@ class Network:
print("Hop number: " + str(hop+1))
print("Processed " + str(i+1) + "/" + str(len(selected_addresses)) + " addresses.")
for j,company in enumerate(selected_companies):
# in-case method was run previously and failed to complete,
# check if company was previously processed:
if company not in self.processed_companies:
self.hop.search_company_id(self,company)
self.processed_companies.append(company)
@@ -418,8 +408,6 @@ class Network:
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(j+1) + "/" + str(len(selected_companies)) + " companies.")
for k,officer in enumerate(selected_officers):
# in-case method was run previously and failed to complete,
# check if officer was previously processed:
if officer not in self.processed_officers:
self.hop.search_officer_id(self,officer)
self.processed_officers.append(officer)
@@ -428,260 +416,8 @@ class Network:
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.")
print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.")
self.officer_ids = [i for n, i in enumerate(self.officer_ids) if i not in self.officer_ids[n + 1:]]
self.company_ids = [i for n, i in enumerate(self.company_ids) if i not in self.company_ids[n + 1:]]
self.maxsize_entities = [i for n, i in enumerate(self.maxsize_entities) if i not in self.maxsize_entities[n + 1:]]
self.addresses = [i for n, i in enumerate(self.addresses) if i not in self.addresses[n + 1:]]
self.address_history = [i for n, i in enumerate(self.address_history) if i not in self.address_history[n + 1:]]
self.companies = [i for n, i in enumerate(self.companies) if i not in self.companies[n + 1:]]
self.processed_officers = []
self.processed_companies = []
self.processed_addresses = []
self.processed_officers, self.processed_companies, self.processed_addresses = [],[],[]
self.n += 1
hop_history.append(self.hop.__dict__)
self.hop_history.extend(hop_history)
class Hop:
"""Class attributes store the criteria for each hop. Class contains
methods for getting officers, addresses and companies using the
criteria."""
def __init__(self):
self.get_company_officers = True
self.get_company_address_history = True
self.get_psc_correspondance_address = True
self.get_officer_appointments = True
self.officer_appointments_maxsize = 50
self.get_officer_correspondance_address = True
self.get_officer_duplicates = True
self.officer_duplicates_maxsize = None
self.get_officers_at_address = True
self.officers_at_address_maxsize = 50
self.get_companies_at_address = True
self.companies_at_address_maxsize = 50
def search_company_id(self, network, company_id):
"""Gets officers and addresses connected to input company
(company_id)."""
officers = []
new_addresses = []
new_officers = []
if self.get_company_officers:
# get officers at company
officers = sugartrail.api.get_company_officers(company_id)
if officers:
if 'items' in officers:
officers = officers['items']
# process officer results
network.node_type = "Company"
network.node = company_id
# find addresses and officers already added to the network
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
if officers:
for officer in officers:
if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
network.link_type = "Officer"
new_officer = {
'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]),
'name': sugartrail.processing.normalise_name(officer['name']),
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_officer not in new_officers:
new_officers.append(new_officer)
if self.get_psc_correspondance_address:
# get address for company pscs
psc = sugartrail.api.get_psc(company_id)
if psc:
if 'items' in psc:
for person in psc['items']:
if "address" in person:
network.link_type = "Person of Significant Control Address"
if sugartrail.processing.normalise_address(person['address']) not in lower_n_addresses:
new_address = {
'address': sugartrail.processing.normalise_address(person['address']),
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_address not in new_addresses:
new_addresses.append(new_address)
if self.get_company_address_history:
# get company address history
address_history = sugartrail.processing.build_address_history(company_id)
network.address_history.extend(address_history)
for address in address_history:
network.link_type = "Historic Address"
if 'address' in address:
if address['address'] not in lower_n_addresses:
new_address = {
'address': address['address'],
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_address not in new_addresses:
new_addresses.append(dict({
'address': address['address'],
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}))
network.addresses.extend(new_addresses)
network.officer_ids.extend(new_officers)
def search_officer_id(self, network, officer_id):
"""Gets officers, companies and addresses connected to input officer
(officer_id)."""
new_addresses = []
new_companies = []
new_officers = []
network.node_type = "Person"
network.node = officer_id
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
appointments = sugartrail.api.get_appointments(officer_id)
if appointments:
if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0):
for appointment in appointments['items']:
if sugartrail.processing.normalise_address(appointment['address']) not in lower_n_addresses:
network.link_type = "Appointment Address"
new_address = {
'address': sugartrail.processing.normalise_address(appointment['address']),
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_address not in new_addresses:
new_addresses.append(new_address)
if appointment['appointed_to']['company_number'] not in lower_n_companies:
network.link_type = "Appointment"
new_company = {
'company_id': appointment['appointed_to']['company_number'],
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(appointments['items']) > int(self.officer_appointments_maxsize):
network.maxsize_entities.append(dict({
'node':officer_id,
'type': 'Officer',
'maxsize_type': 'Appointments',
'size': len(appointments['items'])
}))
if self.get_officer_correspondance_address:
correspondance_address = sugartrail.api.get_correspondance_address(officer_id)
if correspondance_address:
if sugartrail.processing.normalise_address(correspondance_address['items'][0]['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
new_address = {
'address': sugartrail.processing.normalise_address(correspondance_address['items'][0]['address']),
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_address not in new_addresses:
new_addresses.append(new_address)
if self.get_officer_duplicates:
duplicate_officers = sugartrail.api.get_duplicate_officers(officer_id)
if duplicate_officers:
if self.officer_duplicates_maxsize == None or len(duplicate_officers) < int(self.officer_duplicates_maxsize or 0):
for duplicate in duplicate_officers:
network.link_type = "Duplicate Officer"
if duplicate['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {
'officer_id': duplicate['links']['self'].split('/')[2],
'name': duplicate['title'], 'n':network.n+1,
'link_type': network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_officer not in new_officers:
new_officers.append(new_officer)
elif len(duplicate_officers) > int(self.officer_duplicates_maxsize):
network.maxsize_entities.append(dict({
'node':officer_id,
'type': 'Officer',
'maxsize_type': 'Duplicates',
'size': len(duplicate_officers)
}))
network.addresses.extend(new_addresses)
network.officer_ids.extend(new_officers)
network.company_ids.extend(new_companies)
def search_address(self, network, address, company_data):
"""Gets officers, companies and addresses connected to input officer
(officer_id)."""
new_companies = []
new_officers = []
network.node_type = "Address"
network.node = address
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
if self.get_companies_at_address:
companies = {}
if company_data is not None:
companies['items'] = sugartrail.processing.get_companies_from_address_database(address, company_data)
else:
companies = sugartrail.api.get_companies_at_address(address)
if companies:
if 'items' in companies:
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
for company in companies['items']:
network.link_type = "Company at Address"
if company['company_number'] not in lower_n_companies:
new_company = {
'company_id': company['company_number'],
'n':network.n+1,
'link_type':network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(companies['items']) > int(self.companies_at_address_maxsize):
network.maxsize_entities.append(dict({
'node':address,
'type': 'Address',
'maxsize_type': 'Companies',
'size': len(companies['items'])
}))
if self.get_officers_at_address:
officers = sugartrail.api.get_officers_at_address(address)
if officers:
if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0):
for officer in officers:
if 'links' and 'title' in officer:
network.link_type = "Officer at Address"
if officer['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {
'officer_id': officer['links']['self'].split('/')[2],
'name': officer['title'],
'n':network.n+1,
'link_type': network.link_type,
'node_type': network.node_type,
'node': network.node
}
if new_officer not in new_officers:
new_officers.append(new_officer)
elif len(officers) > int(self.officers_at_address_maxsize):
network.maxsize_entities.append(dict({
'node':address,
'type': 'Address',
'maxsize_type': 'Officers',
'size': len(officers)
}))
network.officer_ids.extend(new_officers)
network.company_ids.extend(new_companies)

221
sugartrail/hop.py Normal file
View File

@@ -0,0 +1,221 @@
import sugartrail
class Hop:
"""Class attributes store the criteria for each hop. Class contains
methods for getting officers, addresses and companies using the
criteria."""
def __init__(self):
self.get_company_officers = True
self.get_company_address_history = True
self.get_psc_correspondance_address = True
self.get_officer_appointments = True
self.officer_appointments_maxsize = 50
self.get_officer_correspondance_address = True
self.get_officer_duplicates = True
self.officer_duplicates_maxsize = None
self.get_officers_at_address = True
self.officers_at_address_maxsize = 50
self.get_companies_at_address = True
self.companies_at_address_maxsize = 50
def search_company_id(self, network, company_id):
"""Gets officers and addresses connected to input company
(company_id)."""
officers = []
if self.get_company_officers:
officers = sugartrail.api.get_company_officers(company_id)
if officers:
if 'items' in officers:
officers = officers['items']
if officers:
for officer in officers:
new_officer_id = str(officer['links']['officer']['appointments'].split('/')[2])
if new_officer_id not in network.graph:
network.graph[new_officer_id] = {
'depth': network.n+1,
'title': sugartrail.processing.normalise_name(officer['name']),
'node_type': "Person",
'arcs': []
}
arc = {
'arc_type': "Officer",
'start_node': company_id
}
if arc not in network.graph[new_officer_id]['arcs'] and network.graph[new_officer_id]['depth'] == network.n+1:
network.graph[new_officer_id]['arcs'].append(arc)
if self.get_psc_correspondance_address:
# get address for company pscs
psc = sugartrail.api.get_psc(company_id)
if psc:
if 'items' in psc:
for person in psc['items']:
if "address" in person:
new_address = sugartrail.processing.normalise_address(person['address'])
if new_address not in network.graph:
network.graph[new_address] = {
'depth': network.n+1,
'title': new_address,
'node_type': "Address",
'arcs': []
}
arc = {
'arc_type': "Person of Significant Control Address",
'start_node': company_id
}
if arc not in network.graph[new_address]['arcs'] and network.graph[new_address]['depth'] == network.n+1:
network.graph[new_address]['arcs'].append(arc)
if self.get_company_address_history:
# get company address history
address_history = sugartrail.processing.build_address_history(company_id)
# network.address_history.extend(address_history)
for address in address_history:
if 'address' in address:
network.address_history.append(address)
new_address = address['address']
if new_address not in network.graph:
network.graph[new_address] = {
'depth': network.n+1,
'title': new_address,
'node_type': "Address",
'arcs': []
}
arc = {
'arc_type': "Historic Address",
'start_node': company_id
}
if arc not in network.graph[new_address]['arcs'] and network.graph[new_address]['depth'] == network.n+1:
network.graph[new_address]['arcs'].append(arc)
def search_officer_id(self, network, officer_id):
"""Gets officers, companies and addresses connected to input officer
(officer_id)."""
appointments = sugartrail.api.get_appointments(officer_id)
if appointments:
if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0):
for appointment in appointments['items']:
new_company = appointment['appointed_to']['company_number']
if new_company not in network.graph:
network.graph[new_company] = {
'depth': network.n+1,
'title': appointment['appointed_to']['company_name'],
'node_type': "Company",
'arcs': []
}
arc = {
'arc_type': "Appointment",
'start_node': officer_id
}
if arc not in network.graph[new_company]['arcs'] and network.graph[new_company]['depth'] == network.n+1:
network.graph[new_company]['arcs'].append(arc)
elif len(appointments['items']) > int(self.officer_appointments_maxsize):
network.maxsize_entities.append(dict({
'node':officer_id,
'type': 'Officer',
'maxsize_type': 'Appointments',
'size': len(appointments['items'])
}))
if self.get_officer_correspondance_address:
correspondance_address = sugartrail.api.get_correspondance_address(officer_id)
if correspondance_address:
new_address = sugartrail.processing.normalise_address(correspondance_address['items'][0]['address'])
if new_address not in network.graph:
network.graph[new_address] = {
'depth': network.n+1,
'title': new_address,
'node_type': "Address",
'arcs': []
}
arc = {
'arc_type': "Officer Corresponance Address",
'start_node': officer_id
}
if arc not in network.graph[new_address]['arcs'] and network.graph[new_address]['depth'] == network.n+1:
network.graph[new_address]['arcs'].append(arc)
if self.get_officer_duplicates:
duplicate_officers = sugartrail.api.get_duplicate_officers(officer_id)
if duplicate_officers:
if self.officer_duplicates_maxsize == None or len(duplicate_officers) < int(self.officer_duplicates_maxsize or 0):
for duplicate in duplicate_officers:
new_officer = duplicate['links']['self'].split('/')[2]
if new_officer not in network.graph:
network.graph[new_officer] = {
'depth': network.n+1,
'title': duplicate['title'],
'node_type': "Person",
'arcs': []
}
arc = {
'arc_type': "Duplicate Officer",
'start_node': officer_id
}
if arc not in network.graph[new_officer]['arcs'] and network.graph[new_officer]['depth'] == network.n+1:
network.graph[new_officer]['arcs'].append(arc)
elif len(duplicate_officers) > int(self.officer_duplicates_maxsize):
network.maxsize_entities.append(dict({
'node':officer_id,
'type': 'Officer',
'maxsize_type': 'Duplicates',
'size': len(duplicate_officers)
}))
def search_address(self, network, address, company_data):
"""Gets officers, companies and addresses connected to input officer
(officer_id)."""
if self.get_companies_at_address:
companies = {}
if company_data is not None:
companies['items'] = sugartrail.processing.get_companies_from_address_database(address, company_data)
else:
companies = sugartrail.api.get_companies_at_address(address)
if companies:
if 'items' in companies:
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
for company in companies['items']:
new_company = company['company_number']
if new_company not in network.graph:
network.graph[new_company] = {
'depth': network.n+1,
'title': company['company_name'],
'node_type': "Company",
'arcs': []
}
arc = {
'arc_type': "Company at Address",
'start_node': address
}
if arc not in network.graph[new_company]['arcs'] and network.graph[new_company]['depth'] == network.n+1:
network.graph[new_company]['arcs'].append(arc)
elif len(companies['items']) > int(self.companies_at_address_maxsize):
network.maxsize_entities.append(dict({
'node':address,
'type': 'Address',
'maxsize_type': 'Companies',
'size': len(companies['items'])
}))
if self.get_officers_at_address:
officers = sugartrail.api.get_officers_at_address(address)
if officers:
if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0):
for officer in officers:
if 'links' and 'title' in officer:
new_officer = officer['links']['self'].split('/')[2]
if new_officer not in network.graph:
network.graph[new_officer] = {
'depth': network.n+1,
'title': officer['title'],
'node_type': "Person",
'arcs': []
}
arc = {
'arc_type': "Officer at Address",
'start_node': address
}
if arc not in network.graph[new_officer]['arcs'] and network.graph[new_officer]['depth'] == network.n+1:
network.graph[new_officer]['arcs'].append(arc)
elif len(officers) > int(self.officers_at_address_maxsize):
network.maxsize_entities.append(dict({
'node':address,
'type': 'Address',
'maxsize_type': 'Officers',
'size': len(officers)
}))

View File

@@ -64,52 +64,53 @@ def get_marker_data(network,address_trail, origin_trail, path_table):
if row['lat'] and row['lon']:
marker_color = "green"
# locate company at historic address
company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
company_name = company['company_name']
company_status = company['company_status']
if company_status == "active":
if row['end_date']:
marker_color = "red"
else:
marker_color = "black"
address = row['address']
# find path from company to origin
path = network.find_path(str(row['company_number']))
locations_from_origin = locations_from_origin_path(path, network)
message = HTML()
message.value = str(company_name) + "<hr>" + str(address)
icon = AwesomeIcon(
marker_color=marker_color
)
# find historic addresses path for company
address_path = get_address_path(network,str(row['company_number']))
marker = Marker(
icon=icon,
opacity=1,
location=(row['lat'],
row['lon']),
draggable=False,
popup=message,
title="Address"
company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.company_records))
if company:
company_name = company[0]['company_name']
company_status = company[0]['company_status']
if company_status == "active":
if row['end_date']:
marker_color = "red"
else:
marker_color = "black"
address = row['address']
# find path from company to origin
path = network.find_path(str(row['company_number']))
locations_from_origin = locations_from_origin_path(path, network)
message = HTML()
message.value = str(company_name) + "<hr>" + str(address)
icon = AwesomeIcon(
marker_color=marker_color
)
# attach on click behavoir for marker
marker.on_click(functools.partial(
on_button_clicked,
address_path=address_path,
address_trail=address_trail,
path_table=path_table,
origin_trail=origin_trail,
path=path, location=(row['lat'], row['lon']),
locations_from_origin = locations_from_origin
))
markers.append(marker)
# find historic addresses path for company
address_path = get_address_path(network,str(row['company_number']))
marker = Marker(
icon=icon,
opacity=1,
location=(row['lat'],
row['lon']),
draggable=False,
popup=message,
title="Address"
)
# attach on click behavoir for marker
marker.on_click(functools.partial(
on_button_clicked,
address_path=address_path,
address_trail=address_trail,
path_table=path_table,
origin_trail=origin_trail,
path=path, location=(row['lat'], row['lon']),
locations_from_origin = locations_from_origin
))
markers.append(marker)
return markers
def locations_from_origin_path(path, network):
"""Returns list of addresses found within origin path."""
locations = []
for node in path:
if node['type'] == 'Company':
if node['node_type'] == 'Company':
# finds location for company node
company_address_history = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))
company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
@@ -125,14 +126,13 @@ def locations_from_origin_path(path, network):
pass
else:
locations.append([lat,lon])
elif node['type'] == 'Address':
address_row = list(filter(lambda d: d.get('address') == node['node'], network.addresses))[0]
lat = address_row['lat']
lon = address_row['lon']
if not lat or not lon:
pass
else:
elif node['node_type'] == 'Address':
if 'lat' in network.graph[node['id']]:
lat = network.graph[node['id']]['lat']
lon = network.graph[node['id']]['lon']
locations.append([lat,lon])
else:
pass
return locations
def get_address_path(network, company_id):
@@ -158,12 +158,12 @@ def on_button_clicked(address_path, path, location, address_trail, path_table, o
def html_table_generator(path):
"""Generates table for displaying origin path data."""
table_style = '<style>table {font-family: arial, sans-serif;border-collapse: collapse;}td, th {border: 1px solid #dddddd;text-align: left;padding: 8px;}tr:nth-child(even) {background-color: #dddddd;}</style>'
headers = ['Node Index', 'Node', 'Hop', 'Node Type', 'Link']
headers = ['Node Index', 'Title', 'Depth', 'Link Type', 'Link']
headers_row = ""
for header in headers:
headers_row += '<th>' + header + '</th>'
nodes = ""
for i, node in enumerate(path):
nodes += '<tr><td>' + node['node_index'] + '</td><td>' + str(node['node']) + '</td><td>' + str(node['hop']) + '</td><td>' + str(node['node_type']) + '</td><td>' + str(node['link']) + '</td></tr>'
nodes += '<tr><td>' + node['node_index'] + '</td><td>' + str(node['title']) + '</td><td>' + str(node['depth']) + '</td><td>' + str(node['link_type']) + '</td><td>' + str(node['link']) + '</td></tr>'
table_html = table_style + '<table><tr>' + headers_row + '</tr>' + nodes + '</table>'
return table_html

View File

@@ -5,6 +5,7 @@ import random
import urllib
import regex as re
import collections
from string import ascii_letters as alc
def flatten(d, parent_key='', sep='.'):
"""Flatten nested dictionary."""
@@ -25,6 +26,24 @@ def infer_postcode(address_string):
else:
return
def condense_path(path):
condensed_path = []
for i, item in enumerate(path):
item_subset = dict((k, item[k]) for k in ('title', 'depth', 'node_type', 'id', 'link_type'))
matching_items = [item_whole for item_whole in path if item_subset.items() <= item_whole.items()]
item_subset['link'] = []
for item_whole in matching_items:
item_subset['link'].append(item_whole['link'])
if item_subset not in condensed_path:
condensed_path.append(item_subset)
return condensed_path
def asciiify_path(path):
for i, item in enumerate(path):
path[i]['node_index'] = int(1+i/51)*alc[i%51]
path[i]['link'] = ", ".join([d['node_index'] for d in path if d['id'] in path[i]['link']])
return path
def get_companies_from_address_database(address, company_data):
"""Searches input dataframe (company_data) for companies at input address
(address) and returns list of dicts."""
@@ -100,50 +119,51 @@ def build_address_history(company_id):
address_changes = api.get_address_changes(company_id)
address_keys = ('start_date','end_date','address')
if address_changes:
if address_changes['items']:
# attempt to retrieve any missing items within address changes
address_changes = process_address_changes(address_changes)
addresses = []
entry = {}
entry["company_number"] = str(company_id)
entry["lat"] = ""
entry["lon"] = ""
entry["address"] = str(normalise_address(company_info_subset['registered_office_address']))
entry["start_date"] = str(address_changes['items'][0]['date'])
if 'date_of_cessation' in company_info_subset:
entry["end_date"] = str(company_info_subset['date_of_cessation'])
else:
entry["end_date"] = None
addresses.append(entry)
for i,change in enumerate(address_changes['items']):
if 'items' in address_changes:
if address_changes['items']:
# attempt to retrieve any missing items within address changes
address_changes = process_address_changes(address_changes)
addresses = []
entry = {}
entry["company_number"] = str(company_id)
entry["lat"] = ""
entry["lon"] = ""
entry["company_number"] = str(company_id)
if 'old_address' in change['description_values']:
entry["address"] = change['description_values']['old_address']
entry["address"] = str(normalise_address(company_info_subset['registered_office_address']))
entry["start_date"] = str(address_changes['items'][0]['date'])
if 'date_of_cessation' in company_info_subset:
entry["end_date"] = str(company_info_subset['date_of_cessation'])
else:
entry["address"] = ""
if i+1 < len(address_changes['items']):
entry["start_date"] = str(address_changes['items'][i+1]['date'])
else:
entry["start_date"] = company_info_subset['date_of_creation']
entry["end_date"] = str(change['date'])
entry["end_date"] = None
addresses.append(entry)
return addresses
else:
address_history = []
entry = {}
for k, key in enumerate(["date_of_creation","date_of_cessation","registered_office_address"]):
if key in company_info:
entry[address_keys[k]] = company_info[key]
else:
entry[address_keys[k]] = None
entry["company_number"] = str(company_id)
entry['address'] = normalise_address(entry['address'])
entry["lat"] = ""
entry["lon"] = ""
return [entry]
for i,change in enumerate(address_changes['items']):
entry = {}
entry["lat"] = ""
entry["lon"] = ""
entry["company_number"] = str(company_id)
if 'old_address' in change['description_values']:
entry["address"] = change['description_values']['old_address']
else:
entry["address"] = ""
if i+1 < len(address_changes['items']):
entry["start_date"] = str(address_changes['items'][i+1]['date'])
else:
entry["start_date"] = company_info_subset['date_of_creation']
entry["end_date"] = str(change['date'])
addresses.append(entry)
return addresses
else:
address_history = []
entry = {}
for k, key in enumerate(["date_of_creation","date_of_cessation","registered_office_address"]):
if key in company_info:
entry[address_keys[k]] = company_info[key]
else:
entry[address_keys[k]] = None
entry["company_number"] = str(company_id)
entry['address'] = normalise_address(entry['address'])
entry["lat"] = ""
entry["lon"] = ""
return [entry]
else:
address_history = []
entry = {}