Model/model_data/app.py
Khalim Conn-Kowlessar dd5b8db1d5 tidy up for demo
2023-06-19 10:58:56 +01:00

123 lines
4.4 KiB
Python

import pandas as pd
from pprint import pprint
from tqdm import tqdm
import os
from model_data.BoreholeClient import BoreholeClient
from model_data.LandRegistryClient import LandRegistryClient
from model_data.temp_inputs import input_data
from model_data.Property import Property
from model_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from model_data.downloader import pagenated_epc_download
from model_data.EpcClean import EpcClean
from model_data.OpenUprnClient import OpenUprnClient
LAND_REGISTRY_PATHS = [
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
]
def handler():
# To begin with, the input data is a list of dictionaries, however we would read this file in
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
input_properties = [
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
for config in input_data
]
for p in input_properties:
p.search_address_epc()
uprns = [p.data['uprn'] for p in input_properties]
open_uprn_client = OpenUprnClient(
path=os.path.abspath(
os.path.dirname(__file__)
) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
uprns=uprns
)
open_uprn_client.read()
# What's going on here?
# We're using Ordinance Survey Open Uprn data
# to find the coordinates of each address, which we will then be able to use at a later stage
for p in input_properties:
p.get_coordinates(open_uprn_client)
pprint(input_properties[0].coordinates)
local_authorities = {p.data['local-authority'] for p in input_properties}
data = []
for la in tqdm(local_authorities):
data.extend(
pagenated_epc_download(
client=epc_client,
params={"local-authority": la},
page_size=5000,
n_pages=10,
)
)
cleaner = EpcClean(data)
cleaner.clean()
# example cleaned data
# Why do we need this stuff?
# https://docs.google.com/spreadsheets/d/1ek9ItDv7xHwFm_FK6B0PyOBwvi6U4qRPuncBsVlCHUA/edit#gid=0
cleaner.cleaned.keys()
floors = pd.DataFrame(cleaner.cleaned['floor-description'])
hotwater = pd.DataFrame(cleaner.cleaned['hotwater-description'])
mainheat = pd.DataFrame(cleaner.cleaned["mainheat-description"])
address_meta = [
{
"postcode": x["postcode"].upper(),
"address1": x["address1"].upper(),
"address2": x["address2"].upper(),
"address3": x["address3"].upper(),
"address": x["address"],
"uprn": x["uprn"]
} for x in data
]
# Land registry
land_registry_client = LandRegistryClient(
paths=LAND_REGISTRY_PATHS,
addresses=address_meta
)
land_registry_client.read()
# Borehole
borehole_client = BoreholeClient(
path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
)
borehole_client.read()
# There are ~1.4 million entries in this dataset and so we firstly want to reduce the number of
# entries in here if possible before we produce any form of comparison between our properties, to infer
# the distance from the property to the nearest borehole
# Let's take a sample
borehold_compare_to = borehole_client.data[0]
property = input_properties[0]
# for each property, find the nearest borehole
# This is just an example, looking at the distance from a property to a borehole
dist_m, dist_km = borehole_client.distance_between_bng_coords(
x1_bng=property.coordinates["x_coordinate"],
y1_bng=property.coordinates["y_coordinate"],
x2_bng=borehold_compare_to["X"],
y2_bng=borehold_compare_to["Y"],
)