mostly removed pandas dependency from base

This commit is contained in:
seangreaves
2023-01-04 21:03:58 +00:00
parent c9f620e5d9
commit da539ce46f
3 changed files with 133 additions and 110 deletions

View File

@@ -12,20 +12,21 @@ pd.set_option('display.max_rows', 150)
class Network:
def __init__(self, officer_id=None, company_id=None, address=None):
self.addresses = pd.DataFrame(columns=['address','lat','lon','n','link_type','node_type','node'])
self.officer_ids = pd.DataFrame(columns=['officer_id','name','n','link_type','node_type','node'])
self.company_ids = pd.DataFrame(columns=['company_id','n','link_type','node_type','node',])
self.companies = pd.DataFrame(columns=['company_number'])
self.address_history = pd.DataFrame(columns=['company_number', 'address', 'start_date', 'end_date', 'lat', 'lon'])
# convert all dataframes to lists of dictionaries:
self.addresses = []
self.officer_ids = []
self.company_ids = []
self.companies = []
self.address_history = []
self._officer_id = officer_id
self._company_id = company_id
self._address = address
self.n = 0
self.link_type = None
self.initialise_dataframe()
self.initialise()
self.hop = self.Hop()
self.hop_history = pd.DataFrame()
self.maxsize_entities = pd.DataFrame(columns=['node','type', 'maxsize_type', 'size'])
self.hop_history = []
self.maxsize_entities = []
@property
def officer_id(self):
@@ -36,7 +37,7 @@ class Network:
self._officer_id = new_value
self._company_id = None
self._address_id = None
self.initialise_dataframe()
self.initialise()
@property
def company_id(self):
@@ -47,7 +48,7 @@ class Network:
self._company_id = new_value
self._officer_id = None
self._address_id = None
self.initialise_dataframe()
self.initialise()
@property
def address(self):
@@ -58,49 +59,45 @@ class Network:
self._address = new_value
self._company_id = None
self._officer_id = None
self.initialise_dataframe()
self.initialise()
def initialise_dataframe(self):
self.company_ids = self.company_ids.iloc[0:0]
self.officer_ids = self.officer_ids.iloc[0:0]
self.addresses = self.addresses.iloc[0:0]
# change to initialise
def initialise(self):
if self._officer_id:
if api.get_appointments(self._officer_id):
self.officer_ids = pd.DataFrame([{'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}])
self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}))
else:
print(f"Officer with ID:{str(self._officer_id)} not found")
elif self.company_id:
self.company_ids = pd.DataFrame([{'company_id': self._company_id, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}])
self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
company = api.get_company(self._company_id)
# company['n'] = self.n
# company['link_type'] = self.link_type
self.companies = pd.DataFrame(pd.json_normalize(company))
# self.companies = pd.DataFrame([company])
self.companies.append(dict(processing.flatten(company)))
elif self._address:
self.addresses = pd.DataFrame.from_dict([{'address': self._address, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None,}])
self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
else:
print("No input provided. Please provide either officer_id, company_id or address value as input.")
def add_company_names(self):
self.company_ids['name'] = ''
for i, row in self.company_ids.iterrows():
self.company_ids['name'][i] = self.companies.loc[self.companies['company_number'] == self.company_ids['company_id'][i]]['company_name'].unique()[0]
self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']]
for i, row in enumerate(self.company_ids):
self.company_ids[i]['company_name'] = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))[0]['company_name']
# self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']]
def get_company_from_id(self, company_df=None, company_id=None, print_progress=True):
company_list = []
if company_id:
if company_id in self.company_ids['company_id'].unique():
if company_id in [company['company_id'] for company in self.company_ids]:
company_list = [company_id]
else:
print("add valid company id")
else:
company_list = self.company_ids['company_id'].unique()
company_list = [company['company_id'] for company in self.company_ids]
# companies
companies = []
for i, company_id in enumerate(company_list):
IPython.display.clear_output(wait=True)
if print_progress:
print("Processed " + str(i+1) + "/" + str(len(company_list)) + " companies.")
if company_id not in self.companies['company_number'].unique():
if company_id not in [company['company_number'] for company in self.companies]:
if company_df is not None:
try:
company = company_df[company_df[" CompanyNumber"] == str(company_id)]["CompanyName"].item()
@@ -121,43 +118,49 @@ class Network:
# self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
companies.append(company)
# add companies to dataframe
self.companies = self.companies.append(companies, ignore_index=True)
self.companies.extend(companies)
def run_map_preprocessing(self):
self.get_company_from_id()
self.add_company_names()
self.get_coords()
self.address_history = [dict(t) for t in {tuple(d.items()) for d in self.address_history}]
return
def get_coords(self):
for i, row in self.addresses.iterrows():
for i, row in enumerate(self.addresses):
IPython.display.clear_output(wait=True)
print("Processed " + str(i+1) + "/" + str(len(self.addresses)) + " addresses.")
if row.isnull()['lat'] and row.isnull()['lon']:
if 'lat' not in row or 'lon' not in row:
coords = processing.get_coords_from_address(row['address'])
if coords:
self.addresses['lat'][i] = coords['lat']
self.addresses['lon'][i] = coords['lon']
self.addresses[i]['lat'] = coords['lat']
self.addresses[i]['lon'] = coords['lon']
historic_addresses = list(filter(lambda d: d.get('address') == row['address'], self.address_history))
for j, historic_address in enumerate(self.address_history):
if historic_address['address'] == row['address']:
self.address_history[j]['lon'] = coords['lon']
self.address_history[j]['lat'] = coords['lat']
else:
self.addresses[i]['lat'] = ""
self.addresses[i]['lon'] = ""
print("No coords found: " + row['address'])
historic_indices = self.address_history.index[self.address_history["address"]==row['address']].tolist()
for j in historic_indices:
self.address_history["lon"][j] = self.addresses['lon'][i]
self.address_history["lat"][j] = self.addresses['lat'][i]
def find_path(self, select_company):
network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company]
# network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company]
network_link_type_rows = list(filter(lambda d: d.get('company_id') == select_company, self.company_ids))
path = []
company_info = self.get_company_from_id(company_id=select_company, print_progress=False)
for i, row in network_link_type_rows.iterrows():
path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": self.companies[self.companies["company_number"] == select_company]['company_name'].item(), "node_type": row['link_type'], "link_id": row['node']})
for i, row in enumerate(network_link_type_rows):
path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": row['company_name'], "node_type": row['link_type'], "link_id": row['node']})
search_terms = [{'n': row['n']-1, 'node_type':row['node_type'], 'node':row['node']}]
for j in range(row['n']-1,-1,-1):
for term in search_terms:
if term['n'] == j:
if term['node_type'] == "Address":
select_rows = self.addresses.loc[(self.addresses['address'] == term['node']) & (self.addresses['n'] == j)]
for k, select_row in select_rows.iterrows():
###
select_rows = list(filter(lambda d: d.get('address') == term['node'] and d.get('n') == j, self.addresses))
for k, select_row in enumerate(select_rows):
if select_row['n'] == 0:
origin = {'hop': j, "type": "Address", "id": select_row['address'], "node": select_row['address'], "node_type": "", "link_id": ""}
if origin not in path:
@@ -169,22 +172,22 @@ class Network:
path.insert(0, item)
search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']})
elif term['node_type'] == "Company":
select_rows = self.company_ids.loc[(self.company_ids['company_id'] == term['node']) & (self.company_ids['n'] == j)]
for l, select_row in select_rows.iterrows():
select_rows = list(filter(lambda d: d.get('company_id') == term['node'] and d.get('n') == j, self.company_ids))
for l, select_row in enumerate(select_rows):
self.get_company_from_id(company_id=select_row['company_id'], print_progress=False)
if select_row['n'] == 0:
origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": "", "link_id": ""}
origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": "", "link_id": ""}
if origin not in path:
path.insert(0, origin)
break
else:
item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": select_row['link_type'], "link_id": select_row['node']}
item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": select_row['link_type'], "link_id": select_row['node']}
if item not in path:
path.insert(0, item)
search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']})
elif term['node_type'] == "Person":
select_rows = self.officer_ids.loc[(self.officer_ids['officer_id'] == term['node']) & (self.officer_ids['n'] == j)]
for m, select_row in select_rows.iterrows():
select_rows = list(filter(lambda d: d.get('officer_id') == term['node'] and d.get('n') == j, self.officer_ids))
for m, select_row in enumerate(select_rows):
if select_row['link_type'] == 0:
origin = {'hop': j, "type": "Person", "id": select_row["officer_id"], "node": select_row['name'], "node_type": "", "link_id": ""}
if origin not in path:
@@ -212,11 +215,14 @@ class Network:
def perform_hop(self, hops, company_data=None):
hop_history = []
for hop in range(hops):
selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address']
selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id']
selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id']
if len(selected_addresses.index) == 0 and len(selected_companies.index) == 0 and len(selected_officers.index) == 0:
print("link_type of network reached.")
selected_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') == self.n, self.addresses))]
# selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address']
selected_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') == self.n, self.company_ids))]
# selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id']
selected_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') == self.n, self.officer_ids))]
# selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id']
if not selected_addresses and not selected_companies and not selected_officers:
print("Edge of network reached.")
break
else:
self.n += 1
@@ -240,7 +246,7 @@ class Network:
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.")
print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.")
self.hop_history = self.hop_history.append(hop_history)
self.hop_history.append(hop_history)
class Hop:
def __init__(self):
@@ -267,15 +273,17 @@ class Network:
officers = officers['items']
network.node_type = "Company"
network.node = company_id
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
if officers:
for officer in officers:
if processing.normalise_address(officer['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
if processing.normalise_address(officer['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
# network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if officer['links']['officer']['appointments'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
network.link_type = "Officer"
new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
@@ -287,27 +295,22 @@ class Network:
for person in psc['items']:
if "address" in person:
network.link_type = "Person of Significant Control Address"
if processing.normalise_address(person['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
if processing.normalise_address(person['address']) not in lower_n_addresses:
new_address = {'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
# network.addresses = network.addresses.append({'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if self.get_company_address_history:
address_history = processing.build_address_history(company_id)
network.address_history = network.address_history.append(address_history, ignore_index=True)
network.address_history.extend(address_history)
for address in address_history:
network.link_type = "Historic Address"
if address['address'] not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
if address['address'] not in lower_n_addresses:
new_address = {'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node})
new_addresses.append(dict({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}))
# network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
network.addresses = network.addresses.append(new_addresses, ignore_index=True)
network.officer_ids = network.officer_ids.append(new_officers, ignore_index=True)
# network.addresses = network.addresses.drop_duplicates().reset_index(drop=True)
# network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
# network.address_history = network.address_history.drop_duplicates().reset_index(drop=True)
network.addresses.extend(new_addresses)
network.officer_ids.extend(new_officers)
def search_officer_id(self, network, officer_id):
new_addresses = []
@@ -315,58 +318,57 @@ class Network:
new_officers = []
network.node_type = "Person"
network.node = officer_id
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))]
appointments = api.get_appointments(officer_id)
if appointments:
if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0):
for appointment in appointments['items']:
if processing.normalise_address(appointment['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
if processing.normalise_address(appointment['address']) not in lower_n_addresses:
network.link_type = "Appointment Address"
new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
# network.addresses = network.addresses.append({'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if appointment['appointed_to']['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique():
if appointment['appointed_to']['company_number'] not in lower_n_companies:
network.link_type = "Appointment"
# network.company_ids = network.company_ids.append({'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(appointments['items']) > int(self.officer_appointments_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])}, ignore_index=True)
network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])}))
if self.get_officer_correspondance_address:
correspondance_address = api.get_correspondance_address(officer_id)
if correspondance_address:
if processing.normalise_address(correspondance_address['items'][0]['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
if processing.normalise_address(correspondance_address['items'][0]['address']) not in lower_n_addresses:
network.link_type = "Officer Corresponance Address"
new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_address not in new_addresses:
new_addresses.append(new_address)
# network.addresses = network.addresses.append({'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
if self.get_officer_duplicates:
duplicate_officers = api.get_duplicate_officers(officer_id)
if duplicate_officers:
if self.officer_duplicates_maxsize == None or len(duplicate_officers) < int(self.officer_duplicates_maxsize or 0):
for duplicate in duplicate_officers:
network.link_type = "Duplicate Officer"
if duplicate['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
if duplicate['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
# network.officer_ids = network.officer_ids.append({'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
elif len(duplicate_officers) > int(self.officer_duplicates_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)}, ignore_index=True)
network.addresses = network.addresses.append(new_addresses)
network.officer_ids = network.officer_ids.append(new_officers, ignore_index=True)
network.company_ids = network.company_ids.append(new_companies, ignore_index=True)
# network.addresses = network.addresses.drop_duplicates().reset_index(drop=True)
# network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
# network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True)
network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)}))
network.addresses.extend(new_addresses)
network.officer_ids.extend(new_officers)
network.company_ids.extend(new_companies)
def search_address(self, network, address, company_data):
new_companies = []
new_officers = []
network.node_type = "Address"
network.node = address
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))]
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))]
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))]
if self.get_companies_at_address:
companies = {}
if company_data is not None:
@@ -377,26 +379,23 @@ class Network:
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
for company in companies['items']:
network.link_type = "Company at Address"
if company['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique():
if company['company_number'] not in lower_n_companies:
new_company = {'company_id': company['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_company not in new_companies:
new_companies.append(new_company)
elif len(companies['items']) > int(self.companies_at_address_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])},ignore_index=True)
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])}))
if self.get_officers_at_address:
officers = api.get_officers_at_address(address)
if officers:
if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0):
for officer in officers:
network.link_type = "Officer at Address"
if officer['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
if officer['links']['self'].split('/')[2] not in lower_n_officers:
new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
if new_officer not in new_officers:
new_officers.append(new_officer)
# network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
elif len(officers) > int(self.officers_at_address_maxsize):
network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)},ignore_index=True)
network.officer_ids = network.officer_ids.append(new_officers, ignore_index=True)
network.company_ids = network.company_ids.append(new_companies, ignore_index=True)
# network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
# network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True)
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)}))
network.officer_ids.extend(new_officers)
network.company_ids.extend(new_companies)

View File

@@ -12,10 +12,11 @@ def build_map(network, clear_widget=True):
return m, path_table
def get_address_path(network, company_id):
company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
# company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
company_address_history = list(filter(lambda d: d.get('company_number') == company_id, network.address_history))
address_path = []
for index, row in company_address_history.iterrows():
if math.isnan(float(row['lat'])) or math.isnan(float(row['lon'])):
for index, row in enumerate(company_address_history):
if 'lat' not in row or 'lon' not in row:
pass
else:
address_path.insert(0,[row['lat'], row['lon']])
@@ -25,21 +26,24 @@ def locations_from_origin_path(path, network):
locations = []
for node in path:
if node['type'] == 'Company':
last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1]
lat = last_company_address_row['lat'].item()
lon = last_company_address_row['lon'].item()
if math.isnan(float(lat)):
###
last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0]
# last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1]
lat = last_company_address_row['lat']
lon = last_company_address_row['lon']
if not lat or not lon:
pass
else:
locations.append([float(lat),float(lon)])
locations.append([lat,lon])
elif node['type'] == 'Address':
address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1]
lat = address_row['lat'].item()
lon = address_row['lon'].item()
if math.isnan(float(lat)) or math.isnan(float(lon)):
address_row = list(filter(lambda d: d.get('address') == node['node'], network.addresses))[0]
# address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1]
lat = address_row['lat']
lon = address_row['lon']
if not lat or not lon:
pass
else:
locations.append([float(lat),float(lon)])
locations.append([lat,lon])
return locations
def on_button_clicked(address_path, path, location, address_trail, path_table, origin_trail, locations_from_origin, **kwargs):
@@ -97,14 +101,15 @@ def get_marker_data(network,address_trail, origin_trail, path_table):
address_trail=address_trail
origin_trail=origin_trail
ms = []
for index, row in network.address_history.iterrows():
for index, row in enumerate(network.address_history):
path = ""
locations_from_origin = ""
message = HTML()
marker_color = "green"
company = network.companies.loc[network.companies['company_number'] == row['company_number']]
company_name = company['company_name'].item()
company_status = company['company_status'].item()
company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
# company = network.companies.loc[network.companies['company_number'] == row['company_number']]
company_name = company['company_name']
company_status = company['company_status']
if company_status == "active":
if row['end_date'] != None:
marker_color = "red"

View File

@@ -4,6 +4,17 @@ import requests
import random
import urllib
import regex as re
import collections
def flatten(d, parent_key='', sep='.'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
def infer_postcode(address_string):
postcode = re.findall(r'\b[A-Z]{1,2}[0-9][A-Z0-9]? [0-9][ABD-HJLNP-UW-Z]{2}\b', address_string)
@@ -89,6 +100,8 @@ def build_address_history(company_id):
addresses = []
entry = {}
entry["company_number"] = str(company_id)
entry["lat"] = ""
entry["lon"] = ""
entry["address"] = str(normalise_address(company_info_subset['registered_office_address']))
entry["start_date"] = str(address_changes['items'][0]['date'])
if 'date_of_cessation' in company_info_subset:
@@ -98,6 +111,8 @@ def build_address_history(company_id):
addresses.append(entry)
for i,change in enumerate(address_changes['items']):
entry = {}
entry["lat"] = ""
entry["lon"] = ""
entry["company_number"] = str(company_id)
if 'old_address' in change['description_values']:
entry["address"] = change['description_values']['old_address']
@@ -120,6 +135,8 @@ def build_address_history(company_id):
entry[address_keys[k]] = None
entry["company_number"] = str(company_id)
entry['address'] = normalise_address(entry['address'])
entry["lat"] = ""
entry["lon"] = ""
return [entry]
else:
address_history = []
@@ -131,6 +148,8 @@ def build_address_history(company_id):
entry[address_keys[k]] = None
entry["company_number"] = str(company_id)
entry['address'] = normalise_address(entry['address'])
entry["lat"] = ""
entry["lon"] = ""
return [entry]
else:
return []