From 0398fb6eba4f5247509ec98c5adda9480adde23c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 16 Jun 2023 15:21:04 +0100 Subject: [PATCH] setup OpenUprnClient --- model_data/OpenUprnClient.py | 27 ++++++++++++++++++++++ model_data/Property.py | 10 ++++++++ model_data/app.py | 44 ++++++++++++++++++++++++++++++++---- 3 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 model_data/OpenUprnClient.py diff --git a/model_data/OpenUprnClient.py b/model_data/OpenUprnClient.py new file mode 100644 index 00000000..7ae37a90 --- /dev/null +++ b/model_data/OpenUprnClient.py @@ -0,0 +1,27 @@ +import pandas as pd +from model_data.utils import setup_logger + +logger = setup_logger() + + +class OpenUprnClient: + COLUMN_NAMES = [ + + ] + + def __init__(self, path, urpns): + self.path = path + self.urpns = [int(x) for x in urpns] + self.data = None + + def read(self): + """ + This methodology is placeholder, while data sits localls + :return: + """ + logger.info("Reading in open uprn data") + + df = pd.read_csv(self.path) + df = df[df["UPRN"].isin(self.urpns)] + + self.data = df diff --git a/model_data/Property.py b/model_data/Property.py index 6ac017a1..a5058191 100644 --- a/model_data/Property.py +++ b/model_data/Property.py @@ -1,5 +1,6 @@ from epc_api.client import EpcClient from model_data.config import EPC_AUTH_TOKEN +from model_data.OpenUprnClient import OpenUprnClient class Property: @@ -35,3 +36,12 @@ class Property: response["rows"] = newest_response self.data = response["rows"][0] + + def get_coordinates(self, open_oprn_client: OpenUprnClient): + """ + This method utlises the OpenOprnClient to get the coordinates of the property + The OpenOprnClient interfactes with the Ordinance Survey Open UPRN database to extract + property coordinates. This database holds lookups between UPRN and coordinates. + :param open_oprn_client: + :return: + """ diff --git a/model_data/app.py b/model_data/app.py index e656e9ca..a3a45e9e 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -1,5 +1,6 @@ import pandas as pd from tqdm import tqdm +import os from model_data.temp_inputs import input_data from model_data.Property import Property @@ -7,6 +8,7 @@ from model_data.config import EPC_AUTH_TOKEN from epc_api.client import EpcClient from model_data.downloader import pagenated_epc_download from model_data.EpcClean import EpcClean +from model_data.OpenUprnClient import OpenUprnClient def handler(): @@ -22,6 +24,16 @@ def handler(): for p in input_properties: p.search_address_epc() + urpns = [p.data['uprn'] for p in input_properties] + + open_urpn_client = OpenUprnClient( + path=os.path.abspath( + os.path.dirname(__file__) + ) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv", + urpns=urpns + ) + open_urpn_client.read() + local_authorities = {p.data['local-authority'] for p in input_properties} data = [] @@ -40,7 +52,6 @@ def handler(): cleaner.clean() import pickle - import os with open(os.path.abspath(os.path.dirname(__file__)) + "/data.pkl", "rb") as f: data = pickle.load(f) @@ -91,9 +102,8 @@ def handler(): # # df.to_dict("records") - ## Land registry + # Land registry from model_data.LandRegistryClient import LandRegistryClient - import os land_registry_client = LandRegistryClient( paths=[ @@ -110,8 +120,7 @@ def handler(): ) from dbfread import DBF - import os - from tqdm import tqdm + import pandas as pd borehole_file = os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf" table = DBF(borehole_file) @@ -125,3 +134,28 @@ def handler(): borehole_shape_file = os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.shp" boreholes = gpd.read_file(borehole_shape_file) + + from geopy.geocoders import Nominatim + from geopy.extra.rate_limiter import RateLimiter + + geolocator = Nominatim(user_agent="myGeocoder") + + # The rate limiter will ensure you're not making too many requests too quickly + geocode = RateLimiter(geolocator.geocode, min_delay_seconds=0.5) + + for address_config in input_data: + location = geocode(" ".join([address_config["address1"], address_config["postcode"]])) + geocode("28 distillery wharf, regtta lane, hammersmith, london, w6 9bf") + + print((location.latitude, location.longitude)) + + from geopy.geocoders import Nominatim + from geopy.extra.rate_limiter import RateLimiter + + geolocator = Nominatim(user_agent="myGeocoder") + + geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1) + + # Full address + location = geocode(", ".join([p.data["address"], p.data["postcode"]])) + print((location.latitude, location.longitude))