mirror of
https://github.com/bellingcat/sugartrail.git
synced 2026-06-11 04:58:29 +03:00
3
.gitignore
vendored
3
.gitignore
vendored
@@ -48,3 +48,6 @@ coverage.xml
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# API Keys
|
||||
config/config.py
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 89 KiB After Width: | Height: | Size: 86 KiB |
BIN
assets/networks/.DS_Store
vendored
Normal file
BIN
assets/networks/.DS_Store
vendored
Normal file
Binary file not shown.
1
assets/networks/domain_corp_network.json
Normal file
1
assets/networks/domain_corp_network.json
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
1
assets/networks/kingdom_of_sweets_network.json
Normal file
1
assets/networks/kingdom_of_sweets_network.json
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
1
assets/networks/regent_street_network.json
Normal file
1
assets/networks/regent_street_network.json
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
1
assets/networks/shelton_street_network.json
Normal file
1
assets/networks/shelton_street_network.json
Normal file
File diff suppressed because one or more lines are too long
1
assets/networks/western_crown_network.json
Normal file
1
assets/networks/western_crown_network.json
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
1
config/__init__.py
Normal file
1
config/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from . import config
|
||||
@@ -10,7 +10,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "f17ebdd2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -18,7 +18,8 @@
|
||||
"from sugartrail import mapview, api, base\n",
|
||||
"import ipywidgets as widgets\n",
|
||||
"from IPython.display import display\n",
|
||||
"import requests"
|
||||
"import requests\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -222,13 +223,13 @@
|
||||
" accordion_data.selected_index=0\n",
|
||||
" build_map_button.disabled = True\n",
|
||||
" with tab.children[0]:\n",
|
||||
" display(network.company_ids)\n",
|
||||
" display(pd.DataFrame(network.company_ids))\n",
|
||||
" with tab.children[1]:\n",
|
||||
" display(network.addresses)\n",
|
||||
" display(pd.DataFrame(network.addresses))\n",
|
||||
" with tab.children[2]:\n",
|
||||
" display(network.officer_ids)\n",
|
||||
" display(pd.DataFrame(network.officer_ids))\n",
|
||||
" with tab.children[3]:\n",
|
||||
" display(network.companies) \n",
|
||||
" display(pd.DataFrame(network.companies)) \n",
|
||||
"\n",
|
||||
"display(build_map_button, map_container)"
|
||||
]
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
"source": [
|
||||
"from sugartrail import api, mapview, base\n",
|
||||
"from ipywidgets import VBox, HBox\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"api.basic_auth.username = \"\""
|
||||
]
|
||||
@@ -291,7 +292,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.company_ids"
|
||||
"pd.DataFrame(network.company_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -309,7 +310,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.officer_ids"
|
||||
"pd.DataFrame(network.officer_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -329,7 +330,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.addresses"
|
||||
"pd.DataFrame(network.addresses)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -357,7 +358,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.hop_history"
|
||||
"pd.DataFrame(network.hop_history)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -411,7 +412,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.address_history"
|
||||
"pd.DataFrame(network.address_history)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -421,7 +422,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.companies "
|
||||
"pd.DataFrame(network.companies)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -475,9 +476,9 @@
|
||||
"id": "fd5d9a0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Pick N Mix London Limited (E) is a 'company at address' for 3rd Floor 13 Charles Ii Street (C) which is a 'historic address' for Kingdom of Sweets Ltd (A).\n",
|
||||
"Pick N Mix London Limited (e) is a 'company at address' for 3rd Floor 13 Charles Ii Street (c) which is a 'historic address' for Kingdom of Sweets Ltd (a).\n",
|
||||
"\n",
|
||||
"Additionally, Pick N Mix London Limited (D) is an appointment of (B) who is an officer of Kingdom of Sweets Ltd (A). "
|
||||
"Additionally, Pick N Mix London Limited (d) is an appointment of (b) who is an officer of Kingdom of Sweets Ltd (a). "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -493,7 +494,7 @@
|
||||
"id": "a68e26ca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The network object can be saved with 'pickle' and reloaded when needed:"
|
||||
"The network object can be saved to `../assets/networks/` as json file:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -503,10 +504,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pickle\n",
|
||||
"\n",
|
||||
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'wb') as handle:\n",
|
||||
" pickle.dump(network, handle)"
|
||||
"network.save('kingdom_of_sweets_network.json')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7632e2a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can load the network by creating a new network and passing the filename: "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -516,8 +522,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../assets/networks/kingdom_of_sweets_network.pickle', 'rb') as handle:\n",
|
||||
" network = pickle.load(handle)"
|
||||
"network = base.Network(file='kingdom_of_sweets_network.json')"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -163,7 +163,7 @@
|
||||
"id": "866bc18e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Lets go big and perform 6 hops. It's likely to take some time to gather all the data +1 hour. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
|
||||
"Lets go big and perform 6 hops. It's likely to take some time to gather all the data +2 hours. If you don't want to wait, you can also use uncomment the block below to load a pre-made network instance, in which case, jump to the 'generate map' cell :"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -173,9 +173,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import pickle\n",
|
||||
"# with open('../assets/networks/western_crown_network.pickle', 'rb') as handle:\n",
|
||||
"# western_crown_network = pickle.load(handle)"
|
||||
"# western_crown_network = base.Network(file='western_crown_network.json')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -231,7 +229,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd.DataFrame(western_crown_network.find_path('10643744'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
|
||||
"pd.DataFrame(western_crown_network.find_path('10289650'))[['node_index', 'node', 'hop', 'node_type', 'link']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.addresses"
|
||||
"pd.DataFrame(network.addresses)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,7 +73,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.addresses['address'].unique()"
|
||||
"pd.DataFrame(network.addresses)['address'].unique()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -91,7 +91,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.maxsize_entities"
|
||||
"pd.DataFrame(network.maxsize_entities)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -99,7 +99,7 @@
|
||||
"id": "5ad7b443",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Because we set a limit of 500 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
|
||||
"Because we set a limit of 50 companies on the maxsize of companies returned via `companies_at_address_maxsize`, these companies will not be added to `companies_id`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -127,7 +127,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"len(network.company_ids['company_id'].unique())"
|
||||
"len(network.company_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,7 +146,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"network.maxsize_entities['node'][0]"
|
||||
"network.maxsize_entities[2]['node']"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -230,7 +230,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"regent_street_network.company_ids"
|
||||
"pd.DataFrame(regent_street_network.company_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -254,13 +254,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38937142",
|
||||
"id": "4b7616c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pickle\n",
|
||||
"with open('../assets/networks/regent_street_network.pickle', 'rb') as handle:\n",
|
||||
" regent_street_network = pickle.load(handle)"
|
||||
"# regent_street_network = base.Network(file='regent_street_network.json')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -278,7 +276,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"regent_street_network.officer_ids['name'].value_counts()"
|
||||
"pd.DataFrame(regent_street_network.officer_ids)['name'].value_counts()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -333,7 +331,7 @@
|
||||
"source": [
|
||||
"shelton_street_network = base.Network(address=\"71-75, Shelton Street, Covent Garden, London, WC2H 9JQ\")\n",
|
||||
"shelton_street_network.perform_hop(1)\n",
|
||||
"shelton_street_network.maxsize_entities"
|
||||
"shelton_street_network.maxsize_entities[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -372,7 +370,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"company_data = pd.read_csv(\"assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
|
||||
"company_data = pd.read_csv(\"../assets/company_data/BasicCompanyDataAsOneFile-2022-11-01.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -380,7 +378,7 @@
|
||||
"id": "2273cf39",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address:"
|
||||
"Now lets try get every company at the very overcrowded 71-75 Shelton Street address (might take several minutes- can uncomment the cell below to load pre-made network):"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -394,7 +392,17 @@
|
||||
"shelton_street_network.hop.companies_at_address_maxsize = None\n",
|
||||
"shelton_street_network.hop.officers_at_address_maxsize = None\n",
|
||||
"shelton_street_network.get_officers_at_address = False\n",
|
||||
"shelton_street_network.perform_hop(1, company_data= company_data)"
|
||||
"shelton_street_network.perform_hop(1, company_data = company_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d34c9833",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# shelton_street_network = base.Network(file='shelton_street_network.json')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -412,7 +420,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"shelton_street_network.company_ids"
|
||||
"len(shelton_street_network.company_ids)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -35,10 +35,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # network build from Domain Foundation, company_id = \"11951034\"\n",
|
||||
"# import pickle\n",
|
||||
"\n",
|
||||
"# with open('../assets/networks/domain_corp_network.pickle', 'rb') as handle:\n",
|
||||
"# network = pickle.load(handle)"
|
||||
"# network = base.Network(file='domain_corp_network.json')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import requests
|
||||
import time
|
||||
import os
|
||||
import config
|
||||
|
||||
access_token = ""
|
||||
username = access_token
|
||||
# username = config.config.APIKEY
|
||||
username = ""
|
||||
password = ""
|
||||
size = "5000"
|
||||
basic_auth = requests.auth.HTTPBasicAuth(username, password)
|
||||
@@ -17,25 +19,28 @@ def test():
|
||||
return False
|
||||
|
||||
def make_request(url, input, input_type, response_type):
|
||||
time.sleep(0.5)
|
||||
try:
|
||||
response = requests.get(url, auth=basic_auth)
|
||||
response.raise_for_status()
|
||||
# print("here")
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as err:
|
||||
# print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
except requests.exceptions.HTTPError as errh:
|
||||
# print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
except requests.exceptions.ConnectionError as errc:
|
||||
# print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
except requests.exceptions.Timeout as errt:
|
||||
# print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
if basic_auth.username:
|
||||
time.sleep(0.5)
|
||||
try:
|
||||
response = requests.get(url, auth=basic_auth)
|
||||
response.raise_for_status()
|
||||
# print("here")
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as err:
|
||||
# print (err, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
except requests.exceptions.HTTPError as errh:
|
||||
# print (errh, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
except requests.exceptions.ConnectionError as errc:
|
||||
# print (errc, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
except requests.exceptions.Timeout as errt:
|
||||
# print (errt, f"{os.linesep}Failed to get {response_type} for {input_type}:", str(input))
|
||||
return
|
||||
else:
|
||||
print("Authentication required")
|
||||
|
||||
def get_company_officers(company_id):
|
||||
url = "https://api.company-information.service.gov.uk/company/" + company_id + "/officers"
|
||||
@@ -89,12 +94,13 @@ def get_companies_at_address(address):
|
||||
def get_officers_at_address(address):
|
||||
url = "https://api.company-information.service.gov.uk/search/officers?q=location:" + address
|
||||
response = make_request(url, address, 'address', 'officers')
|
||||
if 'items' in response:
|
||||
officers = []
|
||||
word_list = []
|
||||
for word in address.replace(',','').split():
|
||||
word_list.append(word)
|
||||
for officer in response['items']:
|
||||
if all(word in officer['address_snippet'] for word in word_list):
|
||||
officers.append(officer)
|
||||
return officers
|
||||
if response:
|
||||
if 'items' in response:
|
||||
officers = []
|
||||
word_list = []
|
||||
for word in address.replace(',','').split():
|
||||
word_list.append(word)
|
||||
for officer in response['items']:
|
||||
if all(word in officer['address_snippet'] for word in word_list):
|
||||
officers.append(officer)
|
||||
return officers
|
||||
|
||||
@@ -1,31 +1,44 @@
|
||||
from sugartrail import api
|
||||
from sugartrail import processing
|
||||
import pandas as pd
|
||||
import IPython
|
||||
import numpy as np
|
||||
import math
|
||||
import warnings
|
||||
from string import ascii_lowercase as alc
|
||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.max_rows', 150)
|
||||
import json
|
||||
from string import ascii_letters as alc
|
||||
|
||||
class Network:
|
||||
def __init__(self, officer_id=None, company_id=None, address=None):
|
||||
self.addresses = pd.DataFrame(columns=['address','lat','lon','n','link_type','node_type','node'])
|
||||
self.officer_ids = pd.DataFrame(columns=['officer_id','name','n','link_type','node_type','node'])
|
||||
self.company_ids = pd.DataFrame(columns=['company_id','n','link_type','node_type','node',])
|
||||
self.companies = pd.DataFrame(columns=['company_number'])
|
||||
self.address_history = pd.DataFrame(columns=['company_number', 'address', 'start_date', 'end_date', 'lat', 'lon'])
|
||||
def __init__(self, officer_id=None, company_id=None, address=None, file=None):
|
||||
self.addresses = []
|
||||
self.officer_ids = []
|
||||
self.company_ids = []
|
||||
self.companies = []
|
||||
self.address_history = []
|
||||
self._officer_id = officer_id
|
||||
self._company_id = company_id
|
||||
self._address = address
|
||||
self.n = 0
|
||||
self.link_type = None
|
||||
self.initialise_dataframe()
|
||||
self.hop = self.Hop()
|
||||
self.hop_history = pd.DataFrame()
|
||||
self.maxsize_entities = pd.DataFrame(columns=['node','type', 'maxsize_type', 'size'])
|
||||
self.hop_history = []
|
||||
self.maxsize_entities = []
|
||||
self.processed_officers = []
|
||||
self.processed_companies = []
|
||||
self.processed_addresses = []
|
||||
self._file = self.load(file)
|
||||
self.initialise()
|
||||
|
||||
@property
|
||||
def file(self):
|
||||
return self._file
|
||||
|
||||
@file.setter
|
||||
def file(self, new_value):
|
||||
self._file = new_value
|
||||
self._officer_id = None
|
||||
self._company_id = None
|
||||
self._address_id = None
|
||||
self.load(self._file)
|
||||
|
||||
@property
|
||||
def officer_id(self):
|
||||
@@ -36,7 +49,7 @@ class Network:
|
||||
self._officer_id = new_value
|
||||
self._company_id = None
|
||||
self._address_id = None
|
||||
self.initialise_dataframe()
|
||||
self.initialise()
|
||||
|
||||
@property
|
||||
def company_id(self):
|
||||
@@ -47,10 +60,10 @@ class Network:
|
||||
self._company_id = new_value
|
||||
self._officer_id = None
|
||||
self._address_id = None
|
||||
self.initialise_dataframe()
|
||||
self.initialise()
|
||||
|
||||
@property
|
||||
def address(self):
|
||||
def address(self, value):
|
||||
return self._address
|
||||
|
||||
@address.setter
|
||||
@@ -58,98 +71,143 @@ class Network:
|
||||
self._address = new_value
|
||||
self._company_id = None
|
||||
self._officer_id = None
|
||||
self.initialise_dataframe()
|
||||
self.initialise()
|
||||
|
||||
def initialise_dataframe(self):
|
||||
self.company_ids = self.company_ids.iloc[0:0]
|
||||
self.officer_ids = self.officer_ids.iloc[0:0]
|
||||
self.addresses = self.addresses.iloc[0:0]
|
||||
if self._officer_id:
|
||||
if api.get_appointments(self._officer_id):
|
||||
self.officer_ids = self.officer_ids.append({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}, ignore_index=True)
|
||||
elif self.company_id:
|
||||
self.company_ids = self.company_ids.append({'company_id': self._company_id, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}, ignore_index=True)
|
||||
company = api.get_company(self._company_id)
|
||||
# company['n'] = self.n
|
||||
company['link_type'] = self.link_type
|
||||
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
|
||||
elif self._address:
|
||||
self.addresses = self.addresses.append({'address': self._address, 'n':self.n, 'link_type': None, 'node_type': None, 'node': None,}, ignore_index=True)
|
||||
else:
|
||||
print("No input provided. Please provide either officer_id, company_id or address value as input.")
|
||||
# change to initialise
|
||||
def initialise(self):
|
||||
if self.n < 1:
|
||||
if self._officer_id:
|
||||
if api.get_appointments(self._officer_id):
|
||||
self.officer_ids.append(dict({'officer_id': self._officer_id, 'name': api.get_appointments(self._officer_id)['items'][0]['name'], 'n':self.n, 'link_type': None, 'node_type': None, 'node': None}))
|
||||
else:
|
||||
print(f"Officer with ID:{str(self._officer_id)} not found")
|
||||
elif self._company_id:
|
||||
self.company_ids.append(dict({'company_id': self._company_id, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
|
||||
company = api.get_company(self._company_id)
|
||||
self.companies.append(dict(processing.flatten(company)))
|
||||
elif self._address:
|
||||
self.addresses.append(dict({'address': self._address, 'n':self.n, 'link_type': '', 'node_type': '', 'node': ''}))
|
||||
elif self.file:
|
||||
pass
|
||||
else:
|
||||
print("No input provided. Please provide either officer_id, company_id or address value as input.")
|
||||
|
||||
def save(self, filename):
|
||||
network_data = {k: v for k, v in self.__dict__.items() if k != 'hop' and k != 'file'}
|
||||
saved_network = json.dumps(network_data)
|
||||
f = open(f'../assets/networks/{filename}', 'w')
|
||||
f.write(saved_network)
|
||||
f.close
|
||||
|
||||
def load(self, filename):
|
||||
if filename:
|
||||
f = open(f'../assets/networks/{filename}')
|
||||
network_data = json.load(f)
|
||||
self.addresses = network_data['addresses']
|
||||
self.officer_ids = network_data['officer_ids']
|
||||
self.company_ids = network_data['company_ids']
|
||||
self.companies = network_data['companies']
|
||||
self.address_history = network_data['address_history']
|
||||
self._officer_id = network_data['_officer_id']
|
||||
self._company_id = network_data['_company_id']
|
||||
self._address = network_data['_address']
|
||||
self.n = network_data['n']
|
||||
self.link_type = network_data['link_type']
|
||||
self.hop_history = network_data['hop_history']
|
||||
self.maxsize_entities = network_data['maxsize_entities']
|
||||
self.processed_officers = network_data['processed_officers']
|
||||
self.processed_companies = network_data['processed_companies']
|
||||
self.processed_addresses = network_data['processed_addresses']
|
||||
|
||||
def add_company_names(self):
|
||||
self.company_ids['name'] = ''
|
||||
for i, row in self.company_ids.iterrows():
|
||||
self.company_ids['name'][i] = self.companies.loc[self.companies['company_number'] == self.company_ids['company_id'][i]]['company_name'].unique()[0]
|
||||
self.company_ids = self.company_ids[['company_id', 'name', 'n', 'link_type', 'node_type', 'node']]
|
||||
for i, row in enumerate(self.company_ids):
|
||||
company_details = list(filter(lambda d: d.get('company_number') == row['company_id'], self.companies))
|
||||
if company_details:
|
||||
self.company_ids[i]['company_name'] = company_details[0]['company_name']
|
||||
else:
|
||||
company_details = api.get_company(row['company_id'])
|
||||
if company_details:
|
||||
if 'company_name' in company_details:
|
||||
self.company_ids[i]['company_name'] = company_details['company_name']
|
||||
|
||||
def get_company_from_id(self, company_df=None, company_id=None, print_progress=True):
|
||||
company_list = []
|
||||
if company_id:
|
||||
if company_id in self.company_ids['company_id'].unique():
|
||||
if company_id in [company['company_id'] for company in self.company_ids]:
|
||||
company_list = [company_id]
|
||||
else:
|
||||
print("add valid company id")
|
||||
else:
|
||||
company_list = self.company_ids['company_id'].unique()
|
||||
company_list = [company['company_id'] for company in self.company_ids]
|
||||
companies = []
|
||||
for i, company_id in enumerate(company_list):
|
||||
IPython.display.clear_output(wait=True)
|
||||
if print_progress:
|
||||
print("Processed " + str(i+1) + "/" + str(len(company_list)) + " companies.")
|
||||
if company_id not in self.companies['company_number'].unique():
|
||||
if company_id not in [company['company_number'] for company in self.companies]:
|
||||
if company_df is not None:
|
||||
try:
|
||||
company = company_df[company_df[" CompanyNumber"] == str(company_id)]["CompanyName"].item()
|
||||
if company:
|
||||
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
|
||||
# self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
|
||||
companies.append(company)
|
||||
except:
|
||||
try:
|
||||
company = api.get_company(company_id)
|
||||
if company:
|
||||
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
|
||||
# self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
|
||||
companies.append(company)
|
||||
except:
|
||||
print(f"Failed to get data for {company_id}")
|
||||
else:
|
||||
company = api.get_company(company_id)
|
||||
if company:
|
||||
self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
|
||||
# self.companies = self.companies.append(pd.json_normalize(company), ignore_index=True)
|
||||
companies.append(company)
|
||||
# add companies to dataframe
|
||||
self.companies.extend(companies)
|
||||
|
||||
def run_map_preprocessing(self):
|
||||
self.get_company_from_id()
|
||||
self.add_company_names()
|
||||
self.get_coords()
|
||||
self.address_history = [dict(t) for t in {tuple(d.items()) for d in self.address_history}]
|
||||
return
|
||||
|
||||
def get_coords(self):
|
||||
for i, row in self.addresses.iterrows():
|
||||
for i, row in enumerate(self.addresses):
|
||||
IPython.display.clear_output(wait=True)
|
||||
print("Processed " + str(i+1) + "/" + str(len(self.addresses)) + " addresses.")
|
||||
if row.isnull()['lat'] and row.isnull()['lon']:
|
||||
if 'lat' not in row or 'lon' not in row:
|
||||
coords = processing.get_coords_from_address(row['address'])
|
||||
if coords:
|
||||
self.addresses['lat'][i] = coords['lat']
|
||||
self.addresses['lon'][i] = coords['lon']
|
||||
self.addresses[i]['lat'] = coords['lat']
|
||||
self.addresses[i]['lon'] = coords['lon']
|
||||
historic_addresses = list(filter(lambda d: d.get('address') == row['address'], self.address_history))
|
||||
for j, historic_address in enumerate(self.address_history):
|
||||
if historic_address['address'] == row['address']:
|
||||
self.address_history[j]['lon'] = coords['lon']
|
||||
self.address_history[j]['lat'] = coords['lat']
|
||||
else:
|
||||
self.addresses[i]['lat'] = ""
|
||||
self.addresses[i]['lon'] = ""
|
||||
print("No coords found: " + row['address'])
|
||||
historic_indices = self.address_history.index[self.address_history["address"]==row['address']].tolist()
|
||||
for j in historic_indices:
|
||||
self.address_history["lon"][j] = self.addresses['lon'][i]
|
||||
self.address_history["lat"][j] = self.addresses['lat'][i]
|
||||
|
||||
def find_path(self, select_company):
|
||||
network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company]
|
||||
# network_link_type_rows = self.company_ids.loc[self.company_ids['company_id'] == select_company]
|
||||
network_link_type_rows = list(filter(lambda d: d.get('company_id') == select_company, self.company_ids))
|
||||
path = []
|
||||
company_info = self.get_company_from_id(company_id=select_company, print_progress=False)
|
||||
for i, row in network_link_type_rows.iterrows():
|
||||
path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": self.companies[self.companies["company_number"] == select_company]['company_name'].item(), "node_type": row['link_type'], "link_id": row['node']})
|
||||
for i, row in enumerate(network_link_type_rows):
|
||||
path.insert(0, {'hop': row['n'], "type": "Company", "id": select_company, "node": row['company_name'], "node_type": row['link_type'], "link_id": row['node']})
|
||||
search_terms = [{'n': row['n']-1, 'node_type':row['node_type'], 'node':row['node']}]
|
||||
for j in range(row['n']-1,-1,-1):
|
||||
for term in search_terms:
|
||||
if term['n'] == j:
|
||||
if term['node_type'] == "Address":
|
||||
select_rows = self.addresses.loc[(self.addresses['address'] == term['node']) & (self.addresses['n'] == j)]
|
||||
for k, select_row in select_rows.iterrows():
|
||||
###
|
||||
select_rows = list(filter(lambda d: d.get('address') == term['node'] and d.get('n') == j, self.addresses))
|
||||
for k, select_row in enumerate(select_rows):
|
||||
if select_row['n'] == 0:
|
||||
origin = {'hop': j, "type": "Address", "id": select_row['address'], "node": select_row['address'], "node_type": "", "link_id": ""}
|
||||
if origin not in path:
|
||||
@@ -161,22 +219,22 @@ class Network:
|
||||
path.insert(0, item)
|
||||
search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']})
|
||||
elif term['node_type'] == "Company":
|
||||
select_rows = self.company_ids.loc[(self.company_ids['company_id'] == term['node']) & (self.company_ids['n'] == j)]
|
||||
for l, select_row in select_rows.iterrows():
|
||||
select_rows = list(filter(lambda d: d.get('company_id') == term['node'] and d.get('n') == j, self.company_ids))
|
||||
for l, select_row in enumerate(select_rows):
|
||||
self.get_company_from_id(company_id=select_row['company_id'], print_progress=False)
|
||||
if select_row['n'] == 0:
|
||||
origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": "", "link_id": ""}
|
||||
origin = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": "", "link_id": ""}
|
||||
if origin not in path:
|
||||
path.insert(0, origin)
|
||||
break
|
||||
else:
|
||||
item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": self.companies[self.companies["company_number"] == select_row['company_id']]['company_name'].item(), "node_type": select_row['link_type'], "link_id": select_row['node']}
|
||||
item = {'hop': j, "type": "Company", "id": select_row['company_id'], "node": select_row['company_name'], "node_type": select_row['link_type'], "link_id": select_row['node']}
|
||||
if item not in path:
|
||||
path.insert(0, item)
|
||||
search_terms.append({'n': j-1, 'node_type':select_row['node_type'], 'node':select_row['node']})
|
||||
elif term['node_type'] == "Person":
|
||||
select_rows = self.officer_ids.loc[(self.officer_ids['officer_id'] == term['node']) & (self.officer_ids['n'] == j)]
|
||||
for m, select_row in select_rows.iterrows():
|
||||
select_rows = list(filter(lambda d: d.get('officer_id') == term['node'] and d.get('n') == j, self.officer_ids))
|
||||
for m, select_row in enumerate(select_rows):
|
||||
if select_row['link_type'] == 0:
|
||||
origin = {'hop': j, "type": "Person", "id": select_row["officer_id"], "node": select_row['name'], "node_type": "", "link_id": ""}
|
||||
if origin not in path:
|
||||
@@ -196,40 +254,58 @@ class Network:
|
||||
link_indices = []
|
||||
for j,item in enumerate(sorted_path):
|
||||
if item['id'] == search_term:
|
||||
link_indices.append(alc[j].upper())
|
||||
link_indices.append(alc[j])
|
||||
sorted_path[i]["link"] = ','.join(link_indices)
|
||||
sorted_path[i]["node_index"] = alc[i].upper()
|
||||
sorted_path[i]["node_index"] = alc[i]
|
||||
return sorted_path
|
||||
|
||||
def perform_hop(self, hops, company_data=None):
|
||||
hop_history = []
|
||||
for hop in range(hops):
|
||||
selected_addresses = self.addresses.loc[self.addresses['n'] == self.n]['address']
|
||||
selected_companies = self.company_ids.loc[self.company_ids['n'] == self.n]['company_id']
|
||||
selected_officers = self.officer_ids.loc[self.officer_ids['n'] == self.n]['officer_id']
|
||||
if len(selected_addresses.index) == 0 and len(selected_companies.index) == 0 and len(selected_officers.index) == 0:
|
||||
print("link_type of network reached.")
|
||||
# check if previous hop completed, if any processed items then its still mid-processing:
|
||||
selected_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') == self.n, self.addresses))]
|
||||
selected_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') == self.n, self.company_ids))]
|
||||
selected_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') == self.n, self.officer_ids))]
|
||||
if not selected_addresses and not selected_companies and not selected_officers:
|
||||
print("Edge of network reached.")
|
||||
break
|
||||
else:
|
||||
self.n += 1
|
||||
self.hop_history = self.hop_history.append(self.hop.__dict__, ignore_index=True)
|
||||
for i,address in enumerate(selected_addresses):
|
||||
self.hop.search_address(self, address, company_data)
|
||||
if address not in self.processed_addresses:
|
||||
self.hop.search_address(self, address, company_data)
|
||||
self.processed_addresses.append(address)
|
||||
IPython.display.clear_output(wait=True)
|
||||
print("Hop number: " + str(hop))
|
||||
print("Processed " + str(i+1) + "/" + str(len(selected_addresses)) + " addresses.")
|
||||
for j,company in enumerate(selected_companies):
|
||||
self.hop.search_company_id(self,company)
|
||||
if company not in self.processed_companies:
|
||||
self.hop.search_company_id(self,company)
|
||||
self.processed_companies.append(company)
|
||||
IPython.display.clear_output(wait=True)
|
||||
print("Hop number: " + str(hop))
|
||||
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
|
||||
print("Processed " + str(j+1) + "/" + str(len(selected_companies)) + " companies.")
|
||||
for k,officer in enumerate(selected_officers):
|
||||
self.hop.search_officer_id(self,officer)
|
||||
if officer not in self.processed_officers:
|
||||
self.hop.search_officer_id(self,officer)
|
||||
self.processed_officers.append(officer)
|
||||
IPython.display.clear_output(wait=True)
|
||||
print("Hop number: " + str(hop))
|
||||
print("Processed " + str(len(selected_addresses)) + "/" + str(len(selected_addresses)) + " addresses.")
|
||||
print("Processed " + str(len(selected_companies)) + "/" + str(len(selected_companies)) + " companies.")
|
||||
print("Processed " + str(k+1) + "/" + str(len(selected_officers)) + " officers.")
|
||||
self.officer_ids = [i for n, i in enumerate(self.officer_ids) if i not in self.officer_ids[n + 1:]]
|
||||
self.company_ids = [i for n, i in enumerate(self.company_ids) if i not in self.company_ids[n + 1:]]
|
||||
self.maxsize_entities = [i for n, i in enumerate(self.maxsize_entities) if i not in self.maxsize_entities[n + 1:]]
|
||||
self.addresses = [i for n, i in enumerate(self.addresses) if i not in self.addresses[n + 1:]]
|
||||
self.address_history = [i for n, i in enumerate(self.address_history) if i not in self.address_history[n + 1:]]
|
||||
self.companies = [i for n, i in enumerate(self.companies) if i not in self.companies[n + 1:]]
|
||||
self.processed_officers = []
|
||||
self.processed_companies = []
|
||||
self.processed_addresses = []
|
||||
self.n += 1
|
||||
hop_history.append(self.hop.__dict__)
|
||||
self.hop_history.extend(hop_history)
|
||||
|
||||
class Hop:
|
||||
def __init__(self):
|
||||
@@ -248,104 +324,141 @@ class Network:
|
||||
|
||||
def search_company_id(self, network, company_id):
|
||||
officers = []
|
||||
new_addresses = []
|
||||
new_officers = []
|
||||
if self.get_company_officers:
|
||||
officers = api.get_company_officers(company_id)
|
||||
if officers:
|
||||
officers = officers['items']
|
||||
network.node_type = "Company"
|
||||
network.node = company_id
|
||||
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
|
||||
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
|
||||
if officers:
|
||||
for officer in officers:
|
||||
if processing.normalise_address(officer['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
|
||||
network.link_type = "Officer Corresponance Address"
|
||||
network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if officer['links']['officer']['appointments'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
|
||||
network.link_type = "Officer"
|
||||
network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if 'address' in officer:
|
||||
if processing.normalise_address(officer['address']) not in lower_n_addresses:
|
||||
network.link_type = "Officer Corresponance Address"
|
||||
new_address = {'address': processing.normalise_address(officer['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_address not in new_addresses:
|
||||
new_addresses.append(new_address)
|
||||
# network.addresses = network.addresses.append({'address': processing.normalise_address(officer['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if officer['links']['officer']['appointments'].split('/')[2] not in lower_n_officers:
|
||||
network.link_type = "Officer"
|
||||
new_officer = {'officer_id': str(officer['links']['officer']['appointments'].split('/')[2]), 'name': processing.normalise_name(officer['name']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_officer not in new_officers:
|
||||
new_officers.append(new_officer)
|
||||
# network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['officer']['appointments'].split('/')[2], 'name': processing.normalise_name(officer['name']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if self.get_psc_correspondance_address:
|
||||
psc = api.get_psc(company_id)
|
||||
if psc:
|
||||
for person in psc['items']:
|
||||
if "address" in person:
|
||||
network.link_type = "Person of Significant Control Address"
|
||||
if processing.normalise_address(person['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
|
||||
network.addresses = network.addresses.append({'address': processing.normalise_address(person['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if processing.normalise_address(person['address']) not in lower_n_addresses:
|
||||
new_address = {'address': processing.normalise_address(person['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_address not in new_addresses:
|
||||
new_addresses.append(new_address)
|
||||
if self.get_company_address_history:
|
||||
address_history = processing.build_address_history(company_id)
|
||||
network.address_history = network.address_history.append(address_history, ignore_index=True)
|
||||
network.address_history.extend(address_history)
|
||||
for address in address_history:
|
||||
network.link_type = "Historic Address"
|
||||
if address['address'] not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
|
||||
network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
network.address_history = network.address_history.drop_duplicates().reset_index(drop=True)
|
||||
network.addresses = network.addresses.drop_duplicates().reset_index(drop=True)
|
||||
network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
|
||||
if address['address'] not in lower_n_addresses:
|
||||
new_address = {'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_address not in new_addresses:
|
||||
new_addresses.append(dict({'address': address['address'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}))
|
||||
# network.addresses = network.addresses.append({'address': address['address'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
network.addresses.extend(new_addresses)
|
||||
network.officer_ids.extend(new_officers)
|
||||
|
||||
|
||||
def search_officer_id(self, network, officer_id):
|
||||
new_addresses = []
|
||||
new_companies = []
|
||||
new_officers = []
|
||||
network.node_type = "Person"
|
||||
network.node = officer_id
|
||||
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
|
||||
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
|
||||
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
|
||||
appointments = api.get_appointments(officer_id)
|
||||
if appointments:
|
||||
if self.officer_appointments_maxsize == None or len(appointments['items']) < int(self.officer_appointments_maxsize or 0):
|
||||
for appointment in appointments['items']:
|
||||
if processing.normalise_address(appointment['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
|
||||
if processing.normalise_address(appointment['address']) not in lower_n_addresses:
|
||||
network.link_type = "Appointment Address"
|
||||
network.addresses = network.addresses.append({'address': processing.normalise_address(appointment['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if appointment['appointed_to']['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique():
|
||||
new_address = {'address': processing.normalise_address(appointment['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_address not in new_addresses:
|
||||
new_addresses.append(new_address)
|
||||
if appointment['appointed_to']['company_number'] not in lower_n_companies:
|
||||
network.link_type = "Appointment"
|
||||
network.company_ids = network.company_ids.append({'company_id': appointment['appointed_to']['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
new_company = {'company_id': appointment['appointed_to']['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_company not in new_companies:
|
||||
new_companies.append(new_company)
|
||||
elif len(appointments['items']) > int(self.officer_appointments_maxsize):
|
||||
network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])}, ignore_index=True)
|
||||
network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Appointments', 'size': len(appointments['items'])}))
|
||||
if self.get_officer_correspondance_address:
|
||||
correspondance_address = api.get_correspondance_address(officer_id)
|
||||
if correspondance_address:
|
||||
if processing.normalise_address(correspondance_address['items'][0]['address']) not in network.addresses[network.addresses['n'] < network.n]['address'].unique():
|
||||
if processing.normalise_address(correspondance_address['items'][0]['address']) not in lower_n_addresses:
|
||||
network.link_type = "Officer Corresponance Address"
|
||||
network.addresses = network.addresses.append({'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
new_address = {'address': processing.normalise_address(correspondance_address['items'][0]['address']), 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_address not in new_addresses:
|
||||
new_addresses.append(new_address)
|
||||
if self.get_officer_duplicates:
|
||||
duplicate_officers = api.get_duplicate_officers(officer_id)
|
||||
if duplicate_officers:
|
||||
if self.officer_duplicates_maxsize == None or len(duplicate_officers) < int(self.officer_duplicates_maxsize or 0):
|
||||
for duplicate in duplicate_officers:
|
||||
network.link_type = "Duplicate Officer"
|
||||
if duplicate['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
|
||||
network.officer_ids = network.officer_ids.append({'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if duplicate['links']['self'].split('/')[2] not in lower_n_officers:
|
||||
new_officer = {'officer_id': duplicate['links']['self'].split('/')[2], 'name': duplicate['title'], 'n':network.n+1, 'link_type': network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_officer not in new_officers:
|
||||
new_officers.append(new_officer)
|
||||
elif len(duplicate_officers) > int(self.officer_duplicates_maxsize):
|
||||
network.maxsize_entities = network.maxsize_entities.append({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)}, ignore_index=True)
|
||||
network.addresses = network.addresses.drop_duplicates().reset_index(drop=True)
|
||||
network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
|
||||
network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True)
|
||||
network.maxsize_entities.append(dict({'node':officer_id,'type': 'Officer', 'maxsize_type': 'Duplicates', 'size': len(duplicate_officers)}))
|
||||
network.addresses.extend(new_addresses)
|
||||
network.officer_ids.extend(new_officers)
|
||||
network.company_ids.extend(new_companies)
|
||||
|
||||
def search_address(self, network, address, company_data):
|
||||
new_companies = []
|
||||
new_officers = []
|
||||
network.node_type = "Address"
|
||||
network.node = address
|
||||
lower_n_addresses = [address['address'] for address in list(filter(lambda d: d.get('n') < network.n+1, network.addresses))]
|
||||
lower_n_officers = [officer['officer_id'] for officer in list(filter(lambda d: d.get('n') < network.n+1, network.officer_ids))]
|
||||
lower_n_companies = [company['company_id'] for company in list(filter(lambda d: d.get('n') < network.n+1, network.company_ids))]
|
||||
if self.get_companies_at_address:
|
||||
# database method here:
|
||||
companies = {}
|
||||
if company_data is not None:
|
||||
companies['items'] = processing.get_companies_from_address_database(address, company_data)
|
||||
else:
|
||||
companies = api.get_companies_at_address(address)
|
||||
if companies:
|
||||
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
|
||||
company_ids = []
|
||||
for company in companies['items']:
|
||||
network.link_type = "Company at Address"
|
||||
if company['company_number'] not in network.company_ids[network.company_ids['n'] < network.n]['company_id'].unique():
|
||||
company_ids.append({'company_id': company['company_number'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node})
|
||||
network.company_ids = network.company_ids.append(company_ids, ignore_index=True)
|
||||
elif len(companies['items']) > int(self.companies_at_address_maxsize):
|
||||
network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])},ignore_index=True)
|
||||
|
||||
if 'items' in companies:
|
||||
if self.companies_at_address_maxsize == None or len(companies['items']) < int(self.companies_at_address_maxsize or 0):
|
||||
for company in companies['items']:
|
||||
network.link_type = "Company at Address"
|
||||
if company['company_number'] not in lower_n_companies:
|
||||
new_company = {'company_id': company['company_number'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_company not in new_companies:
|
||||
new_companies.append(new_company)
|
||||
elif len(companies['items']) > int(self.companies_at_address_maxsize):
|
||||
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Companies', 'size': len(companies['items'])}))
|
||||
if self.get_officers_at_address:
|
||||
officers = api.get_officers_at_address(address)
|
||||
if officers:
|
||||
if self.officers_at_address_maxsize == None or len(officers) < int(self.officers_at_address_maxsize or 0):
|
||||
for officer in officers:
|
||||
network.link_type = "Officer at Address"
|
||||
if officer['links']['self'].split('/')[2] not in network.officer_ids[network.officer_ids['n'] < network.n]['officer_id'].unique():
|
||||
network.officer_ids = network.officer_ids.append({'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}, ignore_index=True)
|
||||
if 'links' and 'title' in officer:
|
||||
network.link_type = "Officer at Address"
|
||||
if officer['links']['self'].split('/')[2] not in lower_n_officers:
|
||||
new_officer = {'officer_id': officer['links']['self'].split('/')[2], 'name': officer['title'], 'n':network.n+1, 'link_type':network.link_type, 'node_type': network.node_type, 'node': network.node}
|
||||
if new_officer not in new_officers:
|
||||
new_officers.append(new_officer)
|
||||
elif len(officers) > int(self.officers_at_address_maxsize):
|
||||
network.maxsize_entities = network.maxsize_entities.append({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)},ignore_index=True)
|
||||
network.officer_ids = network.officer_ids.drop_duplicates().reset_index(drop=True)
|
||||
network.company_ids = network.company_ids.drop_duplicates().reset_index(drop=True)
|
||||
network.maxsize_entities.append(dict({'node':address,'type': 'Address', 'maxsize_type': 'Officers', 'size': len(officers)}))
|
||||
network.officer_ids.extend(new_officers)
|
||||
network.company_ids.extend(new_companies)
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
from ipywidgets import HTML, Widget, Layout, Output, VBox, HBox, Textarea
|
||||
from ipyleaflet import Map, Marker, MarkerCluster, AwesomeIcon, AntPath, Popup
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import functools
|
||||
from string import ascii_lowercase as alc
|
||||
import math
|
||||
|
||||
def build_map(network, clear_widget=True):
|
||||
@@ -13,10 +11,12 @@ def build_map(network, clear_widget=True):
|
||||
return m, path_table
|
||||
|
||||
def get_address_path(network, company_id):
|
||||
company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
|
||||
# company_address_history = network.address_history.loc[network.address_history['company_number'] == company_id]
|
||||
company_address_history = list(filter(lambda d: d.get('company_number') == company_id, network.address_history))
|
||||
company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
|
||||
address_path = []
|
||||
for index, row in company_address_history.iterrows():
|
||||
if math.isnan(float(row['lat'])) or math.isnan(float(row['lon'])):
|
||||
for index, row in enumerate(company_address_history_sorted):
|
||||
if not row['lat'] or not row['lon']:
|
||||
pass
|
||||
else:
|
||||
address_path.insert(0,[row['lat'], row['lon']])
|
||||
@@ -26,21 +26,30 @@ def locations_from_origin_path(path, network):
|
||||
locations = []
|
||||
for node in path:
|
||||
if node['type'] == 'Company':
|
||||
last_company_address_row = network.address_history.loc[network.address_history['company_number'] == node['id']].iloc[:1]
|
||||
lat = last_company_address_row['lat'].item()
|
||||
lon = last_company_address_row['lon'].item()
|
||||
if math.isnan(float(lat)):
|
||||
###
|
||||
company_address_history = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))
|
||||
company_address_history_sorted = sorted(company_address_history, key=lambda d: d['start_date'], reverse=True)
|
||||
last_company_address_row = {}
|
||||
for address_row in company_address_history_sorted:
|
||||
if address_row['lat'] and address_row['lon']:
|
||||
last_company_address_row = address_row
|
||||
break
|
||||
# last_company_address_row = list(filter(lambda d: d.get('company_number') == node['id'], network.address_history))[0]
|
||||
lat = last_company_address_row['lat']
|
||||
lon = last_company_address_row['lon']
|
||||
if not lat or not lon:
|
||||
pass
|
||||
else:
|
||||
locations.append([float(lat),float(lon)])
|
||||
locations.append([lat,lon])
|
||||
elif node['type'] == 'Address':
|
||||
address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1]
|
||||
lat = address_row['lat'].item()
|
||||
lon = address_row['lon'].item()
|
||||
if math.isnan(float(lat)) or math.isnan(float(lon)):
|
||||
address_row = list(filter(lambda d: d.get('address') == node['node'], network.addresses))[0]
|
||||
# address_row = network.addresses.loc[network.addresses['address'] == node['node']].iloc[:1]
|
||||
lat = address_row['lat']
|
||||
lon = address_row['lon']
|
||||
if not lat or not lon:
|
||||
pass
|
||||
else:
|
||||
locations.append([float(lat),float(lon)])
|
||||
locations.append([lat,lon])
|
||||
return locations
|
||||
|
||||
def on_button_clicked(address_path, path, location, address_trail, path_table, origin_trail, locations_from_origin, **kwargs):
|
||||
@@ -98,28 +107,30 @@ def get_marker_data(network,address_trail, origin_trail, path_table):
|
||||
address_trail=address_trail
|
||||
origin_trail=origin_trail
|
||||
ms = []
|
||||
for index, row in network.address_history.iterrows():
|
||||
path = ""
|
||||
locations_from_origin = ""
|
||||
message = HTML()
|
||||
marker_color = "green"
|
||||
company = network.companies.loc[network.companies['company_number'] == row['company_number']]
|
||||
company_name = company['company_name'].item()
|
||||
company_status = company['company_status'].item()
|
||||
if company_status == "active":
|
||||
if row['end_date'] != None:
|
||||
marker_color = "red"
|
||||
else:
|
||||
marker_color = "black"
|
||||
address = row['address']
|
||||
path = network.find_path(str(row['company_number']))
|
||||
locations_from_origin = locations_from_origin_path(path, network)
|
||||
message.value = str(company_name) + "<hr>" + str(address)
|
||||
icon = AwesomeIcon(
|
||||
marker_color=marker_color
|
||||
)
|
||||
address_path = get_address_path(network,str(row['company_number']))
|
||||
marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
|
||||
marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
|
||||
ms.append(marker)
|
||||
for index, row in enumerate(network.address_history):
|
||||
if row['lat'] and row['lon']:
|
||||
path = ""
|
||||
locations_from_origin = ""
|
||||
message = HTML()
|
||||
marker_color = "green"
|
||||
company = list(filter(lambda d: d.get('company_number') == row['company_number'], network.companies))[0]
|
||||
# company = network.companies.loc[network.companies['company_number'] == row['company_number']]
|
||||
company_name = company['company_name']
|
||||
company_status = company['company_status']
|
||||
if company_status == "active":
|
||||
if row['end_date']:
|
||||
marker_color = "red"
|
||||
else:
|
||||
marker_color = "black"
|
||||
address = row['address']
|
||||
path = network.find_path(str(row['company_number']))
|
||||
locations_from_origin = locations_from_origin_path(path, network)
|
||||
message.value = str(company_name) + "<hr>" + str(address)
|
||||
icon = AwesomeIcon(
|
||||
marker_color=marker_color
|
||||
)
|
||||
address_path = get_address_path(network,str(row['company_number']))
|
||||
marker = Marker(icon=icon, opacity=1, location=(row['lat'], row['lon']), draggable=False, popup=message, title="Address")
|
||||
marker.on_click(functools.partial(on_button_clicked, address_path=address_path, address_trail=address_trail, path_table=path_table, origin_trail=origin_trail, path=path, location=(row['lat'], row['lon']), locations_from_origin = locations_from_origin))
|
||||
ms.append(marker)
|
||||
return ms
|
||||
|
||||
@@ -4,6 +4,17 @@ import pandas as pd
|
||||
import random
|
||||
import urllib
|
||||
import regex as re
|
||||
import collections
|
||||
|
||||
def flatten(d, parent_key='', sep='.'):
|
||||
items = []
|
||||
for k, v in d.items():
|
||||
new_key = parent_key + sep + k if parent_key else k
|
||||
if isinstance(v, collections.MutableMapping):
|
||||
items.extend(flatten(v, new_key, sep=sep).items())
|
||||
else:
|
||||
items.append((new_key, v))
|
||||
return dict(items)
|
||||
|
||||
def infer_postcode(address_string):
|
||||
postcode = re.findall(r'\b[A-Z]{1,2}[0-9][A-Z0-9]? [0-9][ABD-HJLNP-UW-Z]{2}\b', address_string)
|
||||
@@ -18,13 +29,6 @@ def get_companies_from_address_database(address, company_data):
|
||||
companies['registered_office_address'] = [{'address_line_1': row['address_line_1'], 'address_line_2': row['address_line_2'], 'locality': row['locality'], 'postal_code': row['postal_code'], 'country': row['country']} for i,row in companies.iterrows()]
|
||||
return companies.to_dict('records')
|
||||
|
||||
def load_company_data(company_data_filepath):
|
||||
try:
|
||||
company_data = pd.read_csv(company_data_filepath)
|
||||
return company_data
|
||||
except:
|
||||
return
|
||||
|
||||
def get_nearby_postcode(postcode_string):
|
||||
url = "http://api.postcodes.io/postcodes/" + postcode_string[:-1] + "/autocomplete"
|
||||
response = requests.get(url).json()
|
||||
@@ -89,6 +93,8 @@ def build_address_history(company_id):
|
||||
addresses = []
|
||||
entry = {}
|
||||
entry["company_number"] = str(company_id)
|
||||
entry["lat"] = ""
|
||||
entry["lon"] = ""
|
||||
entry["address"] = str(normalise_address(company_info_subset['registered_office_address']))
|
||||
entry["start_date"] = str(address_changes['items'][0]['date'])
|
||||
if 'date_of_cessation' in company_info_subset:
|
||||
@@ -98,6 +104,8 @@ def build_address_history(company_id):
|
||||
addresses.append(entry)
|
||||
for i,change in enumerate(address_changes['items']):
|
||||
entry = {}
|
||||
entry["lat"] = ""
|
||||
entry["lon"] = ""
|
||||
entry["company_number"] = str(company_id)
|
||||
if 'old_address' in change['description_values']:
|
||||
entry["address"] = change['description_values']['old_address']
|
||||
@@ -120,6 +128,8 @@ def build_address_history(company_id):
|
||||
entry[address_keys[k]] = None
|
||||
entry["company_number"] = str(company_id)
|
||||
entry['address'] = normalise_address(entry['address'])
|
||||
entry["lat"] = ""
|
||||
entry["lon"] = ""
|
||||
return [entry]
|
||||
else:
|
||||
address_history = []
|
||||
@@ -131,6 +141,8 @@ def build_address_history(company_id):
|
||||
entry[address_keys[k]] = None
|
||||
entry["company_number"] = str(company_id)
|
||||
entry['address'] = normalise_address(entry['address'])
|
||||
entry["lat"] = ""
|
||||
entry["lon"] = ""
|
||||
return [entry]
|
||||
else:
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user