diff --git a/sugartrail/base.py b/sugartrail/base.py index c0ecea2..8a09c5e 100644 --- a/sugartrail/base.py +++ b/sugartrail/base.py @@ -12,20 +12,21 @@ pd.set_option('display.max_rows', 150) class Network: def __init__(self, officer_id=None, company_id=None, address=None): - self.addresses = pd.DataFrame(columns=['address','lat','lon','n','link_type','node_type','node']) - self.officer_ids = pd.DataFrame(columns=['officer_id','name','n','link_type','node_type','node']) - self.company_ids = pd.DataFrame(columns=['company_id','n','link_type','node_type','node',]) - self.companies = pd.DataFrame(columns=['company_number']) - self.address_history = pd.DataFrame(columns=['company_number', 'address', 'start_date', 'end_date', 'lat', 'lon']) + # convert all dataframes to lists of dictionaries: + self.addresses = [] + self.officer_ids = [] + self.company_ids = [] + self.companies = [] + self.address_history = [] self._officer_id = officer_id self._company_id = company_id self._address = address self.n = 0 self.link_type = None - self.initialise_dataframe() + self.initialise() self.hop = self.Hop() - self.hop_history = pd.DataFrame() - self.maxsize_entities = pd.DataFrame(columns=['node','type', 'maxsize_type', 'size']) + self.hop_history = [] + self.maxsize_entities = [] @property def officer_id(self): @@ -36,7 +37,7 @@ class Network: self._officer_id = new_value self._company_id = None self._address_id = None - self.initialise_dataframe() + self.initialise() @property def company_id(self): @@ -47,7 +48,7 @@ class Network: self._company_id = new_value self._officer_id = None self._address_id = None - self.initialise_dataframe() + self.initialise() @property def address(self): @@ -58,49 +59,45 @@ class Network: self._address = new_value self._company_id = None self._officer_id = None - self.initialise_dataframe() + self.initialise() - def initialise_dataframe(self): - self.company_ids = self.company_ids.iloc[0:0] - self.officer_ids = self.officer_ids.iloc[0:0] - self.addresses = self.addresses.iloc[0:0] + # change to initialise + def initialise(self): if self._officer_id: if api.get_appointments(self._officer_id): - self.officer_ids = pd.DataFrame([{'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}]) + self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None})) + else: + print(f"Officer with ID:{str(self._officer_id)} not found") elif self.company_id: - self.company_ids = pd.DataFrame([{'company_id': self._company_id, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}]) + self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''})) company = api.get_company(self._company_id) - # company['n'] = self.n - # company['link_type'] = self.link_type - self.companies = pd.DataFrame(pd.json_normalize(company)) - # self.companies = pd.DataFrame([company]) + self.companies.append(dict(processing.flatten(company))) elif self._address: - self.addresses = pd.DataFrame.from_dict([{'address': self._address, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None,}]) + self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''})) else: print("No input provided. Please provide either officer_id, company_id or address value as input.") def add_company_names(self): - self.company_ids['name'] = '' - for i, row in self.company_ids.iterrows(): - self.company_ids['name'][i] = self.companies.loc[self.companies['company_number'] == self.company_ids['company_id'][i]]['company_name'].unique()[0] - self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']] + for i, row in enumerate(self.company_ids): + self.company_ids[i]['company_name'] = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))[0]['company_name'] + # self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']] def get_company_from_id(self, company_df=None, company_id=None, print_progress=True): company_list = [] if company_id: - if company_id in self.company_ids['company_id'].unique(): + if company_id in [company['company_id'] for company in self.company_ids]: company_list = [company_id] else: print("add valid company id") else: - company_list = self.company_ids['company_id'].unique() + company_list = [company['company_id'] for company in self.company_ids] # companies companies = [] for i, company_id in enumerate(company_list): IPython.display.clear_output(wait=True) if print_progress: print("Processed " + str(i+1) + "/" + str(len(company_list)) + " companies.") - if company_id not in self.companies['company_number'].unique(): + if company_id not in [company['company_number'] for company in self.companies]: if company_df is not None: try: company = company_df[company_df[" CompanyNumber"] == str(company_id)]["CompanyName"].item() @@ -121,43 +118,49 @@ class Network: # self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True) companies.append(company) # add companies to dataframe - self.companies = self.companies.append(companies, ignore_index=True) + self.companies.extend(companies) def run_map_preprocessing(self): self.get_company_from_id() self.add_company_names() self.get_coords() + self.address_history = [dict(t) for t in {tuple(d.items()) for d in self.address_history}] return def get_coords(self): - for i, row in self.addresses.iterrows(): + for i, row in enumerate(self.addresses): IPython.display.clear_output(wait=True) print("Processed " + str(i+1) + "/" + str(len(self.addresses)) + " addresses.") - if row.isnull()['lat'] and row.isnull()['lon']: + if 'lat' not in row or 'lon' not in row: coords = processing.get_coords_from_address(row['address']) if coords: - self.addresses['lat'][i] = coords['lat'] - self.addresses['lon'][i] = coords['lon'] + self.addresses[i]['lat'] = coords['lat'] + self.addresses[i]['lon'] = coords['lon'] + historic_addresses = list(filter(lambda d: d.get('address') == row['address'], self.address_history)) + for j, historic_address in enumerate(self.address_history): + if historic_address['address'] == row['address']: + self.address_history[j]['lon'] = coords['lon'] + self.address_history[j]['lat'] = coords['lat'] else: + self.addresses[i]['lat'] = "" + self.addresses[i]['lon'] = "" print("No coords found: " + row['address']) - historic_indices = self.address_history.index[self.address_history["address"]==row['address']].tolist() - for j in historic_indices: - self.address_history["lon"][j] = self.addresses['lon'][i] - self.address_history["lat"][j] = self.addresses['lat'][i] def find_path(self, select_company): - network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company] + # network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company] + network_link_type_rows = list(filter(lambda d: d.get('company_id') == select_company, self.company_ids)) path = [] company_info = self.get_company_from_id(company_id=select_company, print_progress=False) - for i, row in network_link_type_rows.iterrows(): - path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": self.companies[self.companies["company_number"] == select_company]['company_name'].item(), "node_type": row['link_type'], "link_id": row['node']}) + for i, row in enumerate(network_link_type_rows): + path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": row['company_name'], "node_type": row['link_type'], "link_id": row['node']}) search_terms = [{'n': row['n']-1, 'node_type':row['node_type'], 'node':row['node']}] for j in range(row['n']-1,-1,-1): for term in search_terms: if term['n'] == j: if term['node_type'] == "Address": - select_rows = self.addresses.loc[(self.addresses['address'] == term['node']) & (self.addresses['n'] == j)] - for k, select_row in select_rows.iterrows(): + ### + select_rows = list(filter(lambda d: d.get('address') == term['node'] and d.get('n') == j, self.addresses)) + for k, select_row in enumerate(select_rows): if select_row['n'] == 0: origin = {'hop': j, "type": "Address", "id": select_row['address'], "node": select_row['address'], "node_type": "", "link_id": ""} if origin not in path: @@ -169,22 +172,22 @@ class Network: path.insert(0, item) search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']}) elif term['node_type'] == "Company": - select_rows = self.company_ids.loc[(self.company_ids['company_id'] == term['node']) & (self.company_ids['n'] == j)] - for l, select_row in select_rows.iterrows(): + select_rows = list(filter(lambda d: d.get('company_id') == term['node'] and d.get('n') == j, self.company_ids)) + for l, select_row in enumerate(select_rows): self.get_company_from_id(company_id=select_row['company_id'], print_progress=False) if select_row['n'] == 0: - origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": "", "link_id": ""} + origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": "", "link_id": ""} if origin not in path: path.insert(0, origin) break else: - item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": select_row['link_type'], "link_id": select_row['node']} + item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": select_row['link_type'], "link_id": select_row['node']} if item not in path: path.insert(0, item) search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']}) elif term['node_type'] == "Person": - select_rows = self.officer_ids.loc[(self.officer_ids['officer_id'] == term['node']) & (self.officer_ids['n'] == j)] - for m, select_row in select_rows.iterrows(): + select_rows = list(filter(lambda d: d.get('officer_id') == term['node'] and d.get('n') == j, self.officer_ids)) + for m, select_row in enumerate(select_rows): if select_row['link_type'] == 0: origin = {'hop': j, "type": "Person", "id": select_row["officer_id"], "node": select_row['name'], "node_type": "", "link_id": ""} if origin not in path: @@ -212,11 +215,14 @@ class Network: def perform_hop(self, hops, company_data=None): hop_history = [] for hop in range(hops): - selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address'] - selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id'] - selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id'] - if len(selected_addresses.index) == 0 and len(selected_companies.index) == 0 and len(selected_officers.index) == 0: - print("link_type of network reached.") + selected_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') == self.n, self.addresses))] + # selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address'] + selected_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') == self.n, self.company_ids))] + # selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id'] + selected_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') == self.n, self.officer_ids))] + # selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id'] + if not selected_addresses and not selected_companies and not selected_officers: + print("Edge of network reached.") break else: self.n += 1 @@ -240,7 +246,7 @@ class Network: print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.") print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.") print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.") - self.hop_history = self.hop_history.append(hop_history) + self.hop_history.append(hop_history) class Hop: def __init__(self): @@ -267,15 +273,17 @@ class Network: officers = officers['items'] network.node_type = "Company" network.node = company_id + lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))] + lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))] if officers: for officer in officers: - if processing.normalise_address(officer['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique(): + if processing.normalise_address(officer['address']) not in lower_n_addresses: network.link_type = "Officer Corresponance Address" new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_address not in new_addresses: new_addresses.append(new_address) # network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) - if officer['links']['officer']['appointments'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique(): + if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers: network.link_type = "Officer" new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_officer not in new_officers: @@ -287,27 +295,22 @@ class Network: for person in psc['items']: if "address" in person: network.link_type = "Person of Significant Control Address" - if processing.normalise_address(person['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique(): + if processing.normalise_address(person['address']) not in lower_n_addresses: new_address = {'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_address not in new_addresses: new_addresses.append(new_address) - # network.addresses = network.addresses.append({'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) if self.get_company_address_history: address_history = processing.build_address_history(company_id) - network.address_history = network.address_history.append(address_history, ignore_index=True) + network.address_history.extend(address_history) for address in address_history: network.link_type = "Historic Address" - if address['address'] not in network.addresses[network.addresses['n'] < network.n]['address'].unique(): + if address['address'] not in lower_n_addresses: new_address = {'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_address not in new_addresses: - new_addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}) + new_addresses.append(dict({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node})) # network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) - network.addresses = network.addresses.append(new_addresses, ignore_index=True) - network.officer_ids = network.officer_ids.append(new_officers, ignore_index=True) - # network.addresses = network.addresses.drop_duplicates().reset_index(drop=True) - # network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True) - # network.address_history = network.address_history.drop_duplicates().reset_index(drop=True) - + network.addresses.extend(new_addresses) + network.officer_ids.extend(new_officers) def search_officer_id(self, network, officer_id): new_addresses = [] @@ -315,58 +318,57 @@ class Network: new_officers = [] network.node_type = "Person" network.node = officer_id + lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))] + lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))] + lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))] appointments = api.get_appointments(officer_id) if appointments: if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0): for appointment in appointments['items']: - if processing.normalise_address(appointment['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique(): + if processing.normalise_address(appointment['address']) not in lower_n_addresses: network.link_type = "Appointment Address" new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_address not in new_addresses: new_addresses.append(new_address) - # network.addresses = network.addresses.append({'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) - if appointment['appointed_to']['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique(): + if appointment['appointed_to']['company_number'] not in lower_n_companies: network.link_type = "Appointment" - # network.company_ids = network.company_ids.append({'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_company not in new_companies: new_companies.append(new_company) elif len(appointments['items']) > int(self.officer_appointments_maxsize): - network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])}, ignore_index=True) + network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])})) if self.get_officer_correspondance_address: correspondance_address = api.get_correspondance_address(officer_id) if correspondance_address: - if processing.normalise_address(correspondance_address['items'][0]['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique(): + if processing.normalise_address(correspondance_address['items'][0]['address']) not in lower_n_addresses: network.link_type = "Officer Corresponance Address" new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_address not in new_addresses: new_addresses.append(new_address) - # network.addresses = network.addresses.append({'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) if self.get_officer_duplicates: duplicate_officers = api.get_duplicate_officers(officer_id) if duplicate_officers: if self.officer_duplicates_maxsize == None or len(duplicate_officers) < int(self.officer_duplicates_maxsize or 0): for duplicate in duplicate_officers: network.link_type = "Duplicate Officer" - if duplicate['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique(): + if duplicate['links']['self'].split('/')[2] not in lower_n_officers: new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node} if new_officer not in new_officers: new_officers.append(new_officer) - # network.officer_ids = network.officer_ids.append({'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) elif len(duplicate_officers) > int(self.officer_duplicates_maxsize): - network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)}, ignore_index=True) - network.addresses = network.addresses.append(new_addresses) - network.officer_ids = network.officer_ids.append(new_officers, ignore_index=True) - network.company_ids = network.company_ids.append(new_companies, ignore_index=True) - # network.addresses = network.addresses.drop_duplicates().reset_index(drop=True) - # network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True) - # network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True) + network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)})) + network.addresses.extend(new_addresses) + network.officer_ids.extend(new_officers) + network.company_ids.extend(new_companies) def search_address(self, network, address, company_data): new_companies = [] new_officers = [] network.node_type = "Address" network.node = address + lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n, network.addresses))] + lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n, network.officer_ids))] + lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n, network.company_ids))] if self.get_companies_at_address: companies = {} if company_data is not None: @@ -377,26 +379,23 @@ class Network: if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0): for company in companies['items']: network.link_type = "Company at Address" - if company['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique(): + if company['company_number'] not in lower_n_companies: new_company = {'company_id': company['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_company not in new_companies: new_companies.append(new_company) elif len(companies['items']) > int(self.companies_at_address_maxsize): - network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])},ignore_index=True) + network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])})) if self.get_officers_at_address: officers = api.get_officers_at_address(address) if officers: if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0): for officer in officers: network.link_type = "Officer at Address" - if officer['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique(): + if officer['links']['self'].split('/')[2] not in lower_n_officers: new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node} if new_officer not in new_officers: new_officers.append(new_officer) - # network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True) elif len(officers) > int(self.officers_at_address_maxsize): - network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)},ignore_index=True) - network.officer_ids = network.officer_ids.append(new_officers, ignore_index=True) - network.company_ids = network.company_ids.append(new_companies, ignore_index=True) - # network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True) - # network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True) + network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)})) + network.officer_ids.extend(new_officers) + network.company_ids.extend(new_companies) diff --git a/sugartrail/mapview.py b/sugartrail/mapview.py index bf38773..1512e64 100644 --- a/sugartrail/mapview.py +++ b/sugartrail/mapview.py @@ -12,10 +12,11 @@ def build_map(network, clear_widget=True): return m, path_table def get_address_path(network, company_id): - company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id] + # company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id] + company_address_history = list(filter(lambda d: d.get('company_number') == company_id, network.address_history)) address_path = [] - for index, row in company_address_history.iterrows(): - if math.isnan(float(row['lat'])) or math.isnan(float(row['lon'])): + for index, row in enumerate(company_address_history): + if 'lat' not in row or 'lon' not in row: pass else: address_path.insert(0,[row['lat'], row['lon']]) @@ -25,21 +26,24 @@ def locations_from_origin_path(path, network): locations = [] for node in path: if node['type'] == 'Company': - last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1] - lat = last_company_address_row['lat'].item() - lon = last_company_address_row['lon'].item() - if math.isnan(float(lat)): + ### + last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0] + # last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1] + lat = last_company_address_row['lat'] + lon = last_company_address_row['lon'] + if not lat or not lon: pass else: - locations.append([float(lat),float(lon)]) + locations.append([lat,lon]) elif node['type'] == 'Address': - address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1] - lat = address_row['lat'].item() - lon = address_row['lon'].item() - if math.isnan(float(lat)) or math.isnan(float(lon)): + address_row = list(filter(lambda d: d.get('address') == node['node'], network.addresses))[0] + # address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1] + lat = address_row['lat'] + lon = address_row['lon'] + if not lat or not lon: pass else: - locations.append([float(lat),float(lon)]) + locations.append([lat,lon]) return locations def on_button_clicked(address_path, path, location, address_trail, path_table, origin_trail, locations_from_origin, **kwargs): @@ -97,14 +101,15 @@ def get_marker_data(network,address_trail, origin_trail, path_table): address_trail=address_trail origin_trail=origin_trail ms = [] - for index, row in network.address_history.iterrows(): + for index, row in enumerate(network.address_history): path = "" locations_from_origin = "" message = HTML() marker_color = "green" - company = network.companies.loc[network.companies['company_number'] == row['company_number']] - company_name = company['company_name'].item() - company_status = company['company_status'].item() + company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0] + # company = network.companies.loc[network.companies['company_number'] == row['company_number']] + company_name = company['company_name'] + company_status = company['company_status'] if company_status == "active": if row['end_date'] != None: marker_color = "red" diff --git a/sugartrail/processing.py b/sugartrail/processing.py index ffe4dae..8fe27a0 100644 --- a/sugartrail/processing.py +++ b/sugartrail/processing.py @@ -4,6 +4,17 @@ import requests import random import urllib import regex as re +import collections + +def flatten(d, parent_key='', sep='.'): + items = [] + for k, v in d.items(): + new_key = parent_key + sep + k if parent_key else k + if isinstance(v, collections.MutableMapping): + items.extend(flatten(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) def infer_postcode(address_string): postcode = re.findall(r'\b[A-Z]{1,2}[0-9][A-Z0-9]? [0-9][ABD-HJLNP-UW-Z]{2}\b', address_string) @@ -89,6 +100,8 @@ def build_address_history(company_id): addresses = [] entry = {} entry["company_number"] = str(company_id) + entry["lat"] = "" + entry["lon"] = "" entry["address"] = str(normalise_address(company_info_subset['registered_office_address'])) entry["start_date"] = str(address_changes['items'][0]['date']) if 'date_of_cessation' in company_info_subset: @@ -98,6 +111,8 @@ def build_address_history(company_id): addresses.append(entry) for i,change in enumerate(address_changes['items']): entry = {} + entry["lat"] = "" + entry["lon"] = "" entry["company_number"] = str(company_id) if 'old_address' in change['description_values']: entry["address"] = change['description_values']['old_address'] @@ -120,6 +135,8 @@ def build_address_history(company_id): entry[address_keys[k]] = None entry["company_number"] = str(company_id) entry['address'] = normalise_address(entry['address']) + entry["lat"] = "" + entry["lon"] = "" return [entry] else: address_history = [] @@ -131,6 +148,8 @@ def build_address_history(company_id): entry[address_keys[k]] = None entry["company_number"] = str(company_id) entry['address'] = normalise_address(entry['address']) + entry["lat"] = "" + entry["lon"] = "" return [entry] else: return []