From d21ca3e8cc5cab777fb42b7f4af7295bf3775115 Mon Sep 17 00:00:00 2001 From: seangreaves Date: Thu, 19 Jan 2023 09:54:58 +0000 Subject: [PATCH] implemented more accurate rate limiting --- config/environment.yml | 1 + config/requirements.txt | 216 +++++++++++++++++++++------------------ sugartrail/api.py | 7 +- sugartrail/processing.py | 3 +- 4 files changed, 126 insertions(+), 101 deletions(-) diff --git a/config/environment.yml b/config/environment.yml index 94042f0..1a6ca3f 100644 --- a/config/environment.yml +++ b/config/environment.yml @@ -96,6 +96,7 @@ dependencies: - pytz==2022.7 - pyyaml==6.0 - pyzmq==24.0.1 + - ratelimit==2.2.1 - regex==2022.10.31 - requests==2.28.1 - rfc3339-validator==0.1.4 diff --git a/config/requirements.txt b/config/requirements.txt index 616bb0d..d70a3bb 100644 --- a/config/requirements.txt +++ b/config/requirements.txt @@ -1,98 +1,118 @@ -anyio==3.6.2 -appnope==0.1.3 -argon2-cffi==21.3.0 -argon2-cffi-bindings==21.2.0 -arrow==1.2.3 -asttokens==2.2.1 -attrs==22.2.0 -Babel==2.11.0 -backcall==0.2.0 -beautifulsoup4==4.11.1 -bleach==5.0.1 -branca==0.6.0 -certifi==2022.12.7 -cffi==1.15.1 -charset-normalizer==2.1.1 -comm==0.1.2 -debugpy==1.6.5 -decorator==5.1.1 -defusedxml==0.7.1 -entrypoints==0.4 -executing==1.2.0 -fastjsonschema==2.16.2 -fqdn==1.5.1 -idna==3.4 -ipykernel==6.19.4 -ipyleaflet==0.17.2 -ipython==8.8.0 -ipython-genutils==0.2.0 -ipywidgets==8.0.4 -isoduration==20.11.0 -jedi==0.18.2 -Jinja2==3.1.2 -json5==0.9.11 -jsonpointer==2.3 -jsonschema==4.17.3 -jupyter-events==0.5.0 -jupyter-server==1.23.4 -jupyter_client==7.4.1 -jupyter_core==5.1.2 -jupyter_server_terminals==0.4.3 -jupyterlab-pygments==0.2.2 -jupyterlab-widgets==3.0.5 -jupyterlab_server==2.18.0 -MarkupSafe==2.1.1 -matplotlib-inline==0.1.6 -mistune==2.0.4 -nbclassic==0.4.8 -nbclient==0.7.2 -nbconvert==7.2.7 -nbformat==5.7.1 -nest-asyncio==1.5.6 -notebook==6.5.2 -notebook_shim==0.2.2 -numpy==1.24.1 -packaging==22.0 -pandas==1.5.2 -pandocfilters==1.5.0 -parso==0.8.3 -pexpect==4.8.0 -pickleshare==0.7.5 -platformdirs==2.6.2 -prometheus-client==0.15.0 -prompt-toolkit==3.0.36 -psutil==5.9.4 -ptyprocess==0.7.0 -pure-eval==0.2.2 -pycparser==2.21 -Pygments==2.14.0 -pyrsistent==0.19.3 -python-dateutil==2.8.2 -python-json-logger==2.0.4 -pytz==2022.7 -PyYAML==6.0 -pyzmq==24.0.1 -regex==2022.10.31 -requests==2.28.1 -rfc3339-validator==0.1.4 -rfc3986-validator==0.1.1 -Send2Trash==1.8.0 -six==1.16.0 -sniffio==1.3.0 -soupsieve==2.3.2.post1 -stack-data==0.6.2 -terminado==0.17.1 -tinycss2==1.2.1 -tornado==6.2 -traitlets==5.8.0 -traittypes==0.2.1 -uri-template==1.2.0 -urllib3==1.26.13 -voila==0.4.0 -wcwidth==0.2.5 -webcolors==1.12 -webencodings==0.5.1 -websocket-client==1.4.2 -websockets==10.4 -widgetsnbextension==4.0.5 -xyzservices==2022.9.0 +anyio=3.6.2=pypi_0 +appnope=0.1.3=pypi_0 +argon2-cffi=21.3.0=pypi_0 +argon2-cffi-bindings=21.2.0=pypi_0 +arrow=1.2.3=pypi_0 +asttokens=2.2.1=pypi_0 +attrs=22.2.0=pypi_0 +babel=2.11.0=pypi_0 +backcall=0.2.0=pypi_0 +beautifulsoup4=4.11.1=pypi_0 +bleach=5.0.1=pypi_0 +branca=0.6.0=pypi_0 +bzip2=1.0.8=h0d85af4_4 +ca-certificates=2022.12.7=h033912b_0 +certifi=2022.12.7=pypi_0 +cffi=1.15.1=pypi_0 +charset-normalizer=2.1.1=pypi_0 +comm=0.1.2=pypi_0 +debugpy=1.6.5=pypi_0 +decorator=5.1.1=pypi_0 +defusedxml=0.7.1=pypi_0 +entrypoints=0.4=pypi_0 +executing=1.2.0=pypi_0 +fastjsonschema=2.16.2=pypi_0 +fqdn=1.5.1=pypi_0 +idna=3.4=pypi_0 +ipykernel=6.19.4=pypi_0 +ipyleaflet=0.17.2=pypi_0 +ipython=8.8.0=pypi_0 +ipython-genutils=0.2.0=pypi_0 +ipywidgets=8.0.4=pypi_0 +isoduration=20.11.0=pypi_0 +jedi=0.18.2=pypi_0 +jinja2=3.1.2=pypi_0 +json5=0.9.11=pypi_0 +jsonpointer=2.3=pypi_0 +jsonschema=4.17.3=pypi_0 +jupyter-client=7.4.1=pypi_0 +jupyter-core=5.1.2=pypi_0 +jupyter-events=0.5.0=pypi_0 +jupyter-server=1.23.4=pypi_0 +jupyter-server-terminals=0.4.3=pypi_0 +jupyterlab-pygments=0.2.2=pypi_0 +jupyterlab-server=2.18.0=pypi_0 +jupyterlab-widgets=3.0.5=pypi_0 +libcxx=14.0.6=hccf4f1f_0 +libffi=3.3=h046ec9c_2 +libsqlite=3.40.0=ha978bb4_0 +libzlib=1.2.13=hfd90126_4 +markupsafe=2.1.1=pypi_0 +matplotlib-inline=0.1.6=pypi_0 +mistune=2.0.4=pypi_0 +nbclassic=0.4.8=pypi_0 +nbclient=0.7.2=pypi_0 +nbconvert=7.2.7=pypi_0 +nbformat=5.7.1=pypi_0 +ncurses=6.3=h96cf925_1 +nest-asyncio=1.5.6=pypi_0 +notebook=6.5.2=pypi_0 +notebook-shim=0.2.2=pypi_0 +numpy=1.24.1=pypi_0 +openssl=1.1.1s=hfd90126_1 +packaging=22.0=pypi_0 +pandas=1.5.2=pypi_0 +pandocfilters=1.5.0=pypi_0 +parso=0.8.3=pypi_0 +pexpect=4.8.0=pypi_0 +pickleshare=0.7.5=pypi_0 +pip=22.3.1=pyhd8ed1ab_0 +platformdirs=2.6.2=pypi_0 +prometheus-client=0.15.0=pypi_0 +prompt-toolkit=3.0.36=pypi_0 +psutil=5.9.4=pypi_0 +ptyprocess=0.7.0=pypi_0 +pure-eval=0.2.2=pypi_0 +pycparser=2.21=pypi_0 +pygments=2.14.0=pypi_0 +pyrsistent=0.19.3=pypi_0 +python=3.10.4=hdfd78df_0 +python-dateutil=2.8.2=pypi_0 +python-json-logger=2.0.4=pypi_0 +pytz=2022.7=pypi_0 +pyyaml=6.0=pypi_0 +pyzmq=24.0.1=pypi_0 +ratelimit=2.2.1=pypi_0 +readline=8.1.2=h3899abd_0 +regex=2022.10.31=pypi_0 +requests=2.28.1=pypi_0 +rfc3339-validator=0.1.4=pypi_0 +rfc3986-validator=0.1.1=pypi_0 +send2trash=1.8.0=pypi_0 +setuptools=65.6.3=pyhd8ed1ab_0 +six=1.16.0=pypi_0 +sniffio=1.3.0=pypi_0 +soupsieve=2.3.2.post1=pypi_0 +sqlite=3.40.0=h9ae0607_0 +stack-data=0.6.2=pypi_0 +sugartrail=1.0.0=dev_0 +terminado=0.17.1=pypi_0 +tinycss2=1.2.1=pypi_0 +tk=8.6.12=h5dbffcc_0 +tornado=6.2=pypi_0 +traitlets=5.8.0=pypi_0 +traittypes=0.2.1=pypi_0 +tzdata=2022g=h191b570_0 +uri-template=1.2.0=pypi_0 +urllib3=1.26.13=pypi_0 +voila=0.4.0=pypi_0 +wcwidth=0.2.5=pypi_0 +webcolors=1.12=pypi_0 +webencodings=0.5.1=pypi_0 +websocket-client=1.4.2=pypi_0 +websockets=10.4=pypi_0 +wheel=0.38.4=pyhd8ed1ab_0 +widgetsnbextension=4.0.5=pypi_0 +xyzservices=2022.9.0=pypi_0 +xz=5.2.6=h775f41a_0 +zlib=1.2.13=hfd90126_4 diff --git a/sugartrail/api.py b/sugartrail/api.py index 337da61..d2c7d41 100644 --- a/sugartrail/api.py +++ b/sugartrail/api.py @@ -2,6 +2,7 @@ import requests import time import os import functools +from ratelimit import limits, RateLimitException, sleep_and_retry access_token = "" username = "" @@ -28,10 +29,12 @@ def test(): else: return False -# @auth +# Companies House API allows 600 requests every 5 mins +@sleep_and_retry +@limits(calls=120, period=60) def make_request(url, input, input_type, response_type): """Query Companies House API.""" - time.sleep(0.5) + # time.sleep(0.5) try: response = requests.get(url, auth=basic_auth) response.raise_for_status() diff --git a/sugartrail/processing.py b/sugartrail/processing.py index 92ffc8c..01bd311 100644 --- a/sugartrail/processing.py +++ b/sugartrail/processing.py @@ -73,7 +73,8 @@ def get_coords_from_address(address_string): else: print("failed") else: - print("No postcode found for: " + address_string) + # print("No postcode found for: " + address_string) + pass def normalise_name(name): """Move first word (often surname) from the beginning to the end of string."""