import pandas as pd from pprint import pprint from tqdm import tqdm import os from model_data.BoreholeClient import BoreholeClient from model_data.LandRegistryClient import LandRegistryClient from model_data.temp_inputs import input_data from model_data.Property import Property from model_data.config import EPC_AUTH_TOKEN from epc_api.client import EpcClient from model_data.downloader import pagenated_epc_download from model_data.EpcClean import EpcClean from model_data.OpenUprnClient import OpenUprnClient LAND_REGISTRY_PATHS = [ os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv", os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv", os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv", os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv", os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv", os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv", os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv", os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv", ] def handler(): # To begin with, the input data is a list of dictionaries, however we would read this file in epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN) input_properties = [ Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client) for config in input_data ] for p in input_properties: p.search_address_epc() uprns = [p.data['uprn'] for p in input_properties] open_uprn_client = OpenUprnClient( path=os.path.abspath( os.path.dirname(__file__) ) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv", uprns=uprns ) open_uprn_client.read() # What's going on here? # We're using Ordinance Survey Open Uprn data # to find the coordinates of each address, which we will then be able to use at a later stage for p in input_properties: p.get_coordinates(open_uprn_client) pprint(input_properties[0].coordinates) local_authorities = {p.data['local-authority'] for p in input_properties} data = [] for la in tqdm(local_authorities): data.extend( pagenated_epc_download( client=epc_client, params={"local-authority": la}, page_size=5000, n_pages=10, ) ) cleaner = EpcClean(data) cleaner.clean() # example cleaned data # Why do we need this stuff? # https://docs.google.com/spreadsheets/d/1ek9ItDv7xHwFm_FK6B0PyOBwvi6U4qRPuncBsVlCHUA/edit#gid=0 cleaner.cleaned.keys() floors = pd.DataFrame(cleaner.cleaned['floor-description']) hotwater = pd.DataFrame(cleaner.cleaned['hotwater-description']) mainheat = pd.DataFrame(cleaner.cleaned["mainheat-description"]) address_meta = [ { "postcode": x["postcode"].upper(), "address1": x["address1"].upper(), "address2": x["address2"].upper(), "address3": x["address3"].upper(), "address": x["address"], "uprn": x["uprn"] } for x in data ] # Land registry land_registry_client = LandRegistryClient( paths=LAND_REGISTRY_PATHS, addresses=address_meta ) land_registry_client.read() # Borehole borehole_client = BoreholeClient( path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf" ) borehole_client.read() # There are ~1.4 million entries in this dataset and so we firstly want to reduce the number of # entries in here if possible before we produce any form of comparison between our properties, to infer # the distance from the property to the nearest borehole # Let's take a sample borehold_compare_to = borehole_client.data[0] property = input_properties[0] # for each property, find the nearest borehole # This is just an example, looking at the distance from a property to a borehole dist_m, dist_km = borehole_client.distance_between_bng_coords( x1_bng=property.coordinates["x_coordinate"], y1_bng=property.coordinates["y_coordinate"], x2_bng=borehold_compare_to["X"], y2_bng=borehold_compare_to["Y"], )