adjusted notebooks for dict compliance

This commit is contained in:
seangreaves
2023-01-06 21:38:10 +00:00
parent da539ce46f
commit 1a65a9fb96
23 changed files with 287 additions and 199 deletions

3
.gitignore vendored
View File

@@ -48,3 +48,6 @@ coverage.xml
# Sphinx documentation
docs/_build/
# API Keys
config/config.py

View File

@@ -10,7 +10,7 @@ You will require an API key from Companies House to get data. First you will nee
## Usage
A hosted demo of the Sugartrail dashboard can be accessed [here](https://stark-island-99644.herokuapp.com/).
A hosted demo of the Sugartrail dashboard can be accessed [here](https://stark-island-99644.herokuapp.com/) (might take a few seconds to load the page).
## Installation

Binary file not shown.

Before

Width:  |  Height:  |  Size: 89 KiB

After

Width:  |  Height:  |  Size: 86 KiB

BIN
assets/networks/.DS_Store vendored Normal file

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
config/__init__.py Normal file
View File

@@ -0,0 +1 @@
from . import config

View File

@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "f17ebdd2",
"metadata": {},
"outputs": [],
@@ -18,7 +18,8 @@
"from sugartrail import mapview, api, base\n",
"import ipywidgets as widgets\n",
"from IPython.display import display\n",
"import requests"
"import requests\n",
"import pandas as pd"
]
},
{
@@ -222,13 +223,13 @@
" accordion_data.selected_index=0\n",
" build_map_button.disabled = True\n",
" with tab.children[0]:\n",
" display(network.company_ids)\n",
" display(pd.DataFrame(network.company_ids))\n",
" with tab.children[1]:\n",
" display(network.addresses)\n",
" display(pd.DataFrame(network.addresses))\n",
" with tab.children[2]:\n",
" display(network.officer_ids)\n",
" display(pd.DataFrame(network.officer_ids))\n",
" with tab.children[3]:\n",
" display(network.companies) \n",
" display(pd.DataFrame(network.companies)) \n",
"\n",
"display(build_map_button, map_container)"
]

View File

@@ -37,6 +37,7 @@
"source": [
"from sugartrail import api, mapview, base\n",
"from ipywidgets import VBox, HBox\n",
"import pandas as pd\n",
"\n",
"api.basic_auth.username = \"\""
]
@@ -291,7 +292,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.company_ids"
"pd.DataFrame(network.company_ids)"
]
},
{
@@ -309,7 +310,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.officer_ids"
"pd.DataFrame(network.officer_ids)"
]
},
{
@@ -329,7 +330,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
"pd.DataFrame(network.addresses)"
]
},
{
@@ -357,7 +358,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.hop_history"
"pd.DataFrame(network.hop_history)"
]
},
{
@@ -411,7 +412,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.address_history"
"pd.DataFrame(network.address_history)"
]
},
{
@@ -421,7 +422,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.companies "
"pd.DataFrame(network.companies)"
]
},
{
@@ -475,9 +476,9 @@
"id": "fd5d9a0d",
"metadata": {},
"source": [
"Pick N Mix London Limited (E) is a 'company at address' for 3rd Floor 13 Charles Ii Street (C) which is a 'historic address' for Kingdom of Sweets Ltd (A).\n",
"Pick N Mix London Limited (e) is a 'company at address' for 3rd Floor 13 Charles Ii Street (c) which is a 'historic address' for Kingdom of Sweets Ltd (a).\n",
"\n",
"Additionally, Pick N Mix London Limited (D) is an appointment of (B) who is an officer of Kingdom of Sweets Ltd (A). "
"Additionally, Pick N Mix London Limited (d) is an appointment of (b) who is an officer of Kingdom of Sweets Ltd (a). "
]
},
{
@@ -493,7 +494,7 @@
"id": "a68e26ca",
"metadata": {},
"source": [
"The network object can be saved with 'pickle' and reloaded when needed:"
"The network object can be saved to `../assets/networks/` as json file:"
]
},
{
@@ -503,10 +504,15 @@
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n",
" pickle.dump(network, handle)"
"network.save('kingdom_of_sweets_network.json')"
]
},
{
"cell_type": "markdown",
"id": "7632e2a8",
"metadata": {},
"source": [
"We can load the network by creating a new network and passing the filename: "
]
},
{
@@ -516,8 +522,7 @@
"metadata": {},
"outputs": [],
"source": [
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n",
" network = pickle.load(handle)"
"network = base.Network(file='kingdom_of_sweets_network.json')"
]
}
],

View File

@@ -163,7 +163,7 @@
"id": "866bc18e",
"metadata": {},
"source": [
"Lets go big and perform 6 hops. It's likely to take some time to gather all the data +1 hour. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
"Lets go big and perform 6 hops. It's likely to take some time to gather all the data +2 hours. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
]
},
{
@@ -173,9 +173,7 @@
"metadata": {},
"outputs": [],
"source": [
"# import pickle\n",
"# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n",
"# western_crown_network = pickle.load(handle)"
"# western_crown_network = base.Network(file='western_crown_network.json')"
]
},
{
@@ -231,7 +229,7 @@
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
"pd.DataFrame(western_crown_network.find_path('10289650'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
]
},
{

View File

@@ -63,7 +63,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.addresses"
"pd.DataFrame(network.addresses)"
]
},
{
@@ -73,7 +73,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.addresses['address'].unique()"
"pd.DataFrame(network.addresses)['address'].unique()"
]
},
{
@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities"
"pd.DataFrame(network.maxsize_entities)"
]
},
{
@@ -99,7 +99,7 @@
"id": "5ad7b443",
"metadata": {},
"source": [
"Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
"Because we set a limit of 50 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
]
},
{
@@ -127,7 +127,7 @@
"metadata": {},
"outputs": [],
"source": [
"len(network.company_ids['company_id'].unique())"
"len(network.company_ids)"
]
},
{
@@ -146,7 +146,7 @@
"metadata": {},
"outputs": [],
"source": [
"network.maxsize_entities['node'][0]"
"network.maxsize_entities[2]['node']"
]
},
{
@@ -230,7 +230,7 @@
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.company_ids"
"pd.DataFrame(regent_street_network.company_ids)"
]
},
{
@@ -254,13 +254,11 @@
{
"cell_type": "code",
"execution_count": null,
"id": "38937142",
"id": "4b7616c7",
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n",
" regent_street_network = pickle.load(handle)"
"# regent_street_network = base.Network(file='regent_street_network.json')"
]
},
{
@@ -278,7 +276,7 @@
"metadata": {},
"outputs": [],
"source": [
"regent_street_network.officer_ids['name'].value_counts()"
"pd.DataFrame(regent_street_network.officer_ids)['name'].value_counts()"
]
},
{
@@ -333,7 +331,7 @@
"source": [
"shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
"shelton_street_network.perform_hop(1)\n",
"shelton_street_network.maxsize_entities"
"shelton_street_network.maxsize_entities[0]"
]
},
{
@@ -372,7 +370,7 @@
"metadata": {},
"outputs": [],
"source": [
"company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
"company_data = pd.read_csv(\"../assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
]
},
{
@@ -380,7 +378,7 @@
"id": "2273cf39",
"metadata": {},
"source": [
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address:"
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address (might take several minutes- can uncomment the cell below to load pre-made network):"
]
},
{
@@ -394,7 +392,17 @@
"shelton_street_network.hop.companies_at_address_maxsize = None\n",
"shelton_street_network.hop.officers_at_address_maxsize = None\n",
"shelton_street_network.get_officers_at_address = False\n",
"shelton_street_network.perform_hop(1, company_data= company_data)"
"shelton_street_network.perform_hop(1, company_data = company_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d34c9833",
"metadata": {},
"outputs": [],
"source": [
"# shelton_street_network = base.Network(file='shelton_street_network.json')"
]
},
{
@@ -412,7 +420,7 @@
"metadata": {},
"outputs": [],
"source": [
"shelton_street_network.company_ids"
"len(shelton_street_network.company_ids)"
]
}
],

View File

@@ -35,10 +35,7 @@
"outputs": [],
"source": [
"# # network build from Domain Foundation, company_id = \"11951034\"\n",
"# import pickle\n",
"\n",
"# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n",
"# network = pickle.load(handle)"
"# network = base.Network(file='domain_corp_network.json')"
]
},
{

View File

@@ -1,9 +1,11 @@
import requests
import time
import os
import config
access_token = ""
username = access_token
# username = config.config.APIKEY
username = ""
password = ""
size = "5000"
basic_auth = requests.auth.HTTPBasicAuth(username, password)
@@ -17,25 +19,28 @@ def test():
return False
def make_request(url, input, input_type, response_type):
time.sleep(0.5)
try:
response = requests.get(url, auth=basic_auth)
response.raise_for_status()
# print("here")
if response.status_code == 200:
return response.json()
except requests.exceptions.RequestException as err:
# print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.HTTPError as errh:
# print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.ConnectionError as errc:
# print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.Timeout as errt:
# print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
if basic_auth.username:
time.sleep(0.5)
try:
response = requests.get(url, auth=basic_auth)
response.raise_for_status()
# print("here")
if response.status_code == 200:
return response.json()
except requests.exceptions.RequestException as err:
# print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.HTTPError as errh:
# print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.ConnectionError as errc:
# print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
except requests.exceptions.Timeout as errt:
# print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
return
else:
print("Authentication required")
def get_company_officers(company_id):
url = "https://api.company-information.service.gov.uk/company/" + company_id + "/officers"
@@ -72,14 +77,15 @@ def get_duplicate_officers(officer_id):
url = "https://api.company-information.service.gov.uk/search/officers?q=" + name
response = make_request(url, name, 'officer name', 'officers')
filtered_results = []
if 'items' in response:
for officer in response['items']:
if 'date_of_birth' in officer.keys() and 'date_of_birth' in officer_data.keys():
if officer['date_of_birth'] == officer_data['date_of_birth'] and officer['links']['self'] != officer_self_link:
filtered_results.append(officer)
return filtered_results
else:
return
if response:
if 'items' in response:
for officer in response['items']:
if 'date_of_birth' in officer.keys() and 'date_of_birth' in officer_data.keys():
if officer['date_of_birth'] == officer_data['date_of_birth'] and officer['links']['self'] != officer_self_link:
filtered_results.append(officer)
return filtered_results
else:
return
def get_companies_at_address(address):
url = "https://api.company-information.service.gov.uk/advanced-search/companies?location=" + address + "&size=" + "5000"
@@ -88,12 +94,13 @@ def get_companies_at_address(address):
def get_officers_at_address(address):
url = "https://api.company-information.service.gov.uk/search/officers?q=location:" + address
response = make_request(url, address, 'address', 'officers')
if 'items' in response:
officers = []
word_list = []
for word in address.replace(',','').split():
word_list.append(word)
for officer in response['items']:
if all(word in officer['address_snippet'] for word in word_list):
officers.append(officer)
return officers
if response:
if 'items' in response:
officers = []
word_list = []
for word in address.replace(',','').split():
word_list.append(word)
for officer in response['items']:
if all(word in officer['address_snippet'] for word in word_list):
officers.append(officer)
return officers

View File

@@ -1,18 +1,14 @@
from sugartrail import api
from sugartrail import processing
import pandas as pd
import IPython
import numpy as np
import math
import warnings
from string import ascii_lowercase as alc
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 150)
import json
from string import ascii_letters as alc
class Network:
def __init__(self, officer_id=None, company_id=None, address=None):
# convert all dataframes to lists of dictionaries:
def __init__(self, officer_id=None, company_id=None, address=None, file=None):
self.addresses = []
self.officer_ids = []
self.company_ids = []
@@ -23,10 +19,26 @@ class Network:
self._address = address
self.n = 0
self.link_type = None
self.initialise()
self.hop = self.Hop()
self.hop_history = []
self.maxsize_entities = []
self.processed_officers = []
self.processed_companies = []
self.processed_addresses = []
self._file = self.load(file)
self.initialise()
@property
def file(self):
return self._file
@file.setter
def file(self, new_value):
self._file = new_value
self._officer_id = None
self._company_id = None
self._address_id = None
self.load(self._file)
@property
def officer_id(self):
@@ -51,7 +63,7 @@ class Network:
self.initialise()
@property
def address(self):
def address(self, value):
return self._address
@address.setter
@@ -63,24 +75,60 @@ class Network:
# change to initialise
def initialise(self):
if self._officer_id:
if api.get_appointments(self._officer_id):
self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}))
if self.n < 1:
if self._officer_id:
if api.get_appointments(self._officer_id):
self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}))
else:
print(f"Officer with ID:{str(self._officer_id)} not found")
elif self._company_id:
self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
company = api.get_company(self._company_id)
self.companies.append(dict(processing.flatten(company)))
elif self._address:
self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
elif self.file:
pass
else:
print(f"Officer with ID:{str(self._officer_id)} not found")
elif self.company_id:
self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
company = api.get_company(self._company_id)
self.companies.append(dict(processing.flatten(company)))
elif self._address:
self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
else:
print("No input provided. Please provide either officer_id, company_id or address value as input.")
print("No input provided. Please provide either officer_id, company_id or address value as input.")
def save(self, filename):
network_data = {k: v for k, v in self.__dict__.items() if k != 'hop' and k != 'file'}
saved_network = json.dumps(network_data)
f = open(f'../assets/networks/{filename}', 'w')
f.write(saved_network)
f.close
def load(self, filename):
if filename:
f = open(f'../assets/networks/{filename}')
network_data = json.load(f)
self.addresses = network_data['addresses']
self.officer_ids = network_data['officer_ids']
self.company_ids = network_data['company_ids']
self.companies = network_data['companies']
self.address_history = network_data['address_history']
self._officer_id = network_data['_officer_id']
self._company_id = network_data['_company_id']
self._address = network_data['_address']
self.n = network_data['n']
self.link_type = network_data['link_type']
self.hop_history = network_data['hop_history']
self.maxsize_entities = network_data['maxsize_entities']
self.processed_officers = network_data['processed_officers']
self.processed_companies = network_data['processed_companies']
self.processed_addresses = network_data['processed_addresses']
def add_company_names(self):
for i, row in enumerate(self.company_ids):
self.company_ids[i]['company_name'] = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))[0]['company_name']
# self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']]
company_details = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))
if company_details:
self.company_ids[i]['company_name'] = company_details[0]['company_name']
else:
company_details = api.get_company(row['company_id'])
if company_details:
if 'company_name' in company_details:
self.company_ids[i]['company_name'] = company_details['company_name']
def get_company_from_id(self, company_df=None, company_id=None, print_progress=True):
company_list = []
@@ -91,7 +139,6 @@ class Network:
print("add valid company id")
else:
company_list = [company['company_id'] for company in self.company_ids]
# companies
companies = []
for i, company_id in enumerate(company_list):
IPython.display.clear_output(wait=True)
@@ -207,46 +254,58 @@ class Network:
link_indices = []
for j,item in enumerate(sorted_path):
if item['id'] == search_term:
link_indices.append(alc[j].upper())
link_indices.append(alc[j])
sorted_path[i]["link"] = ','.join(link_indices)
sorted_path[i]["node_index"] = alc[i].upper()
sorted_path[i]["node_index"] = alc[i]
return sorted_path
def perform_hop(self, hops, company_data=None):
hop_history = []
for hop in range(hops):
# check if previous hop completed, if any processed items then its still mid-processing:
selected_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') == self.n, self.addresses))]
# selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address']
selected_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') == self.n, self.company_ids))]
# selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id']
selected_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') == self.n, self.officer_ids))]
# selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id']
if not selected_addresses and not selected_companies and not selected_officers:
print("Edge of network reached.")
break
else:
self.n += 1
hop_history.append(self.hop.__dict__)
# self.hop_history = self.hop_history.append(self.hop.__dict__, ignore_index=True)
for i,address in enumerate(selected_addresses):
self.hop.search_address(self, address, company_data)
if address not in self.processed_addresses:
self.hop.search_address(self, address, company_data)
self.processed_addresses.append(address)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop))
print("Processed " + str(i+1) + "/" + str(len(selected_addresses)) + " addresses.")
for j,company in enumerate(selected_companies):
self.hop.search_company_id(self,company)
if company not in self.processed_companies:
self.hop.search_company_id(self,company)
self.processed_companies.append(company)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop))
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(j+1) + "/" + str(len(selected_companies)) + " companies.")
for k,officer in enumerate(selected_officers):
self.hop.search_officer_id(self,officer)
if officer not in self.processed_officers:
self.hop.search_officer_id(self,officer)
self.processed_officers.append(officer)
IPython.display.clear_output(wait=True)
print("Hop number: " + str(hop))
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.")
print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.")
self.hop_history.append(hop_history)
self.officer_ids = [i for n, i in enumerate(self.officer_ids) if i not in self.officer_ids[n + 1:]]
self.company_ids = [i for n, i in enumerate(self.company_ids) if i not in self.company_ids[n + 1:]]
self.maxsize_entities = [i for n, i in enumerate(self.maxsize_entities) if i not in self.maxsize_entities[n + 1:]]
self.addresses = [i for n, i in enumerate(self.addresses) if i not in self.addresses[n + 1:]]
self.address_history = [i for n, i in enumerate(self.address_history) if i not in self.address_history[n + 1:]]
self.companies = [i for n, i in enumerate(self.companies) if i not in self.companies[n + 1:]]
self.processed_officers = []
self.processed_companies = []
self.processed_addresses = []
self.n += 1
hop_history.append(self.hop.__dict__)
self.hop_history.extend(hop_history)
class Hop:
def __init__(self):
@@ -273,22 +332,23 @@ class Network:
officers = officers['items']
network.node_type = "Company"
network.node = company_id
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
if officers:
for officer in officers:
if processing.normalise_address(officer['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
# network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
network.link_type = "Officer"
new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
# network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if 'address' in officer:
if processing.normalise_address(officer['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
# network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
network.link_type = "Officer"
new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
# network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if self.get_psc_correspondance_address:
psc = api.get_psc(company_id)
if psc:
@@ -296,7 +356,7 @@ class Network:
if "address" in person:
network.link_type = "Person of Significant Control Address"
if processing.normalise_address(person['address']) not in lower_n_addresses:
new_address = {'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
new_address = {'address': processing.normalise_address(person['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
if self.get_company_address_history:
@@ -305,34 +365,35 @@ class Network:
for address in address_history:
network.link_type = "Historic Address"
if address['address'] not in lower_n_addresses:
new_address = {'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
new_address = {'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(dict({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}))
new_addresses.append(dict({'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}))
# network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
network.addresses.extend(new_addresses)
network.officer_ids.extend(new_officers)
def search_officer_id(self, network, officer_id):
new_addresses = []
new_companies = []
new_officers = []
network.node_type = "Person"
network.node = officer_id
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))]
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
appointments = api.get_appointments(officer_id)
if appointments:
if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0):
for appointment in appointments['items']:
if processing.normalise_address(appointment['address']) not in lower_n_addresses:
network.link_type = "Appointment Address"
new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
if appointment['appointed_to']['company_number'] not in lower_n_companies:
network.link_type = "Appointment"
new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(appointments['items']) > int(self.officer_appointments_maxsize):
@@ -342,7 +403,7 @@ class Network:
if correspondance_address:
if processing.normalise_address(correspondance_address['items'][0]['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
if self.get_officer_duplicates:
@@ -352,7 +413,7 @@ class Network:
for duplicate in duplicate_officers:
network.link_type = "Duplicate Officer"
if duplicate['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}
new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n+1, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
elif len(duplicate_officers) > int(self.officer_duplicates_maxsize):
@@ -366,9 +427,9 @@ class Network:
new_officers = []
network.node_type = "Address"
network.node = address
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))]
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
if self.get_companies_at_address:
companies = {}
if company_data is not None:
@@ -376,25 +437,27 @@ class Network:
else:
companies = api.get_companies_at_address(address)
if companies:
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
for company in companies['items']:
network.link_type = "Company at Address"
if company['company_number'] not in lower_n_companies:
new_company = {'company_id': company['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(companies['items']) > int(self.companies_at_address_maxsize):
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])}))
if 'items' in companies:
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
for company in companies['items']:
network.link_type = "Company at Address"
if company['company_number'] not in lower_n_companies:
new_company = {'company_id': company['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(companies['items']) > int(self.companies_at_address_maxsize):
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])}))
if self.get_officers_at_address:
officers = api.get_officers_at_address(address)
if officers:
if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0):
for officer in officers:
network.link_type = "Officer at Address"
if officer['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
if 'links' and 'title' in officer:
network.link_type = "Officer at Address"
if officer['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
elif len(officers) > int(self.officers_at_address_maxsize):
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)}))
network.officer_ids.extend(new_officers)

View File

@@ -2,7 +2,6 @@ from ipywidgets import HTML, Widget, Layout, Output, VBox, HBox, Textarea
from ipyleaflet import Map, Marker, MarkerCluster, AwesomeIcon, AntPath, Popup
from datetime import datetime
import functools
from string import ascii_lowercase as alc
import math
def build_map(network, clear_widget=True):
@@ -14,9 +13,10 @@ def build_map(network, clear_widget=True):
def get_address_path(network, company_id):
# company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
company_address_history = list(filter(lambda d: d.get('company_number') == company_id, network.address_history))
company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
address_path = []
for index, row in enumerate(company_address_history):
if 'lat' not in row or 'lon' not in row:
for index, row in enumerate(company_address_history_sorted):
if not row['lat'] or not row['lon']:
pass
else:
address_path.insert(0,[row['lat'], row['lon']])
@@ -27,8 +27,14 @@ def locations_from_origin_path(path, network):
for node in path:
if node['type'] == 'Company':
###
last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0]
# last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1]
company_address_history = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))
company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
last_company_address_row = {}
for address_row in company_address_history_sorted:
if address_row['lat'] and address_row['lon']:
last_company_address_row = address_row
break
# last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0]
lat = last_company_address_row['lat']
lon = last_company_address_row['lon']
if not lat or not lon:
@@ -102,28 +108,29 @@ def get_marker_data(network,address_trail, origin_trail, path_table):
origin_trail=origin_trail
ms = []
for index, row in enumerate(network.address_history):
path = ""
locations_from_origin = ""
message = HTML()
marker_color = "green"
company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
# company = network.companies.loc[network.companies['company_number'] == row['company_number']]
company_name = company['company_name']
company_status = company['company_status']
if company_status == "active":
if row['end_date'] != None:
marker_color = "red"
else:
marker_color = "black"
address = row['address']
path = network.find_path(str(row['company_number']))
locations_from_origin = locations_from_origin_path(path, network)
message.value = str(company_name) + "<hr>" + str(address)
icon = AwesomeIcon(
marker_color=marker_color
)
address_path = get_address_path(network,str(row['company_number']))
marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
ms.append(marker)
if row['lat'] and row['lon']:
path = ""
locations_from_origin = ""
message = HTML()
marker_color = "green"
company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
# company = network.companies.loc[network.companies['company_number'] == row['company_number']]
company_name = company['company_name']
company_status = company['company_status']
if company_status == "active":
if row['end_date']:
marker_color = "red"
else:
marker_color = "black"
address = row['address']
path = network.find_path(str(row['company_number']))
locations_from_origin = locations_from_origin_path(path, network)
message.value = str(company_name) + "<hr>" + str(address)
icon = AwesomeIcon(
marker_color=marker_color
)
address_path = get_address_path(network,str(row['company_number']))
marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
ms.append(marker)
return ms

View File

@@ -1,6 +1,6 @@
from sugartrail import api
import requests
# import pandas as pd
import pandas as pd
import random
import urllib
import regex as re
@@ -29,13 +29,6 @@ def get_companies_from_address_database(address, company_data):
companies['registered_office_address'] = [{'address_line_1': row['address_line_1'], 'address_line_2': row['address_line_2'], 'locality': row['locality'], 'postal_code': row['postal_code'], 'country': row['country']} for i,row in companies.iterrows()]
return companies.to_dict('records')
# def load_company_data(company_data_filepath):
# try:
# company_data = pd.read_csv(company_data_filepath)
# return company_data
# except:
# return
def get_nearby_postcode(postcode_string):
url = "http://api.postcodes.io/postcodes/" + postcode_string[:-1] + "/autocomplete"
response = requests.get(url).json()