Model/model_data/app.py
2023-06-26 18:30:13 +01:00

192 lines
6.7 KiB
Python

from tqdm import tqdm
import os
from model_data.BoreholeClient import BoreholeClient
from model_data.LandRegistryClient import LandRegistryClient
from model_data.ConservationAreaClient import ConservationAreaClient
from model_data.temp_inputs import input_data
from model_data.Property import Property
from model_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from model_data.downloader import pagenated_epc_download
from model_data.EpcClean import EpcClean
from model_data.OpenUprnClient import OpenUprnClient
from model_data.analysis.UvalueEstimations import UvalueEstimations
LAND_REGISTRY_PATHS = [
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
]
def handler():
# To begin with, the input data is a list of dictionaries, however we would read this file in
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
input_properties = [
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
for config in input_data
]
for p in input_properties:
p.search_address_epc()
p.set_year_built()
uprns = [p.data['uprn'] for p in input_properties]
open_uprn_client = OpenUprnClient(
path=os.path.abspath(
os.path.dirname(__file__)
) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
uprns=uprns
)
open_uprn_client.read()
# We're using Ordinance Survey Open Uprn data
# to find the coordinates of each address, which we will then be able to use at a later stage
for p in input_properties:
p.get_coordinates(open_uprn_client)
conservation_area_client = ConservationAreaClient(
historic_england_path=os.path.abspath(
os.path.dirname(__file__)
) + "/model_data/local_data/Historic_Eng_Conservation_Areas/Conservation_Areas.shp",
gov_path=os.path.abspath(
os.path.dirname(__file__)
) + "/model_data/local_data/gov-conservation-area.geojson"
)
conservation_area_client.read()
# Check if the property is in a conversation area
for p in input_properties:
p.set_is_in_conservation_area(conservation_area_client)
local_authorities = {p.data['local-authority'] for p in input_properties}
data = []
for la in tqdm(local_authorities):
data.extend(
pagenated_epc_download(
client=epc_client,
params={"local-authority": la},
page_size=5000,
n_pages=10,
)
)
# Incorporate input data into cleaning
cleaner = EpcClean(data + [p.data for p in input_properties])
cleaner.clean()
address_meta = [
{
"postcode": x["postcode"].upper(),
"address1": x["address1"].upper(),
"address2": x["address2"].upper(),
"address3": x["address3"].upper(),
"address": x["address"],
"uprn": x["uprn"]
} for x in data
]
# Land registry
land_registry_client = LandRegistryClient(
paths=LAND_REGISTRY_PATHS,
addresses=address_meta
)
land_registry_client.read()
# Borehole
borehole_client = BoreholeClient(
path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
)
borehole_client.read()
# Now, for our input properties, we need to identify the components of the building, based
# on the cleaning we've done
for p in input_properties:
p.get_components(cleaner)
uvalue_estimates = UvalueEstimations(data=data)
uvalue_estimates.get_estimates(cleaner=cleaner)
# Now, given the components, we want to idenfity upgrade options
import pandas as pd
floors_df = pd.DataFrame(
[{"address1": p.address1, **p.floor} for p in input_properties]
)
input_properties[4].data["address1"]
input_properties[4].data["postcode"]
floors_df["address1"].values[4]
floors_df["original_description"].values[4]
df = pd.DataFrame(
[
x.data for x in input_properties
]
)
df["property-type"].unique()
from model_data.recommendations.FloorRecommendations import FloorRecommendations
self = FloorRecommendations(property_instance=input_properties[4], uvalue_estimates=uvalue_estimates)
self.recommendations
self.recommend()
# We need to deduce a U-value for "Good" energy effieciency
mainheating = pd.DataFrame(
[{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties])
hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties])
mainheating[["address1", "postcode"]]
# TODO: I want to knwo what "Good" efficiency means for the description
# 'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)'
# so to do this, filter on the local authority code and property type, where we have U
# values for the wall and take a median!
p = input_properties[6]
df = pd.DataFrame(data)
res = []
for p in input_properties:
distances = []
for borehole in tqdm(borehole_client.data, total=len(borehole_client.data)):
dist_meeters, _ = borehole_client.distance_between_bng_coords(
x1_bng=p.coordinates['x_coordinate'],
y1_bng=p.coordinates['y_coordinate'],
x2_bng=float(borehole['EASTING']),
y2_bng=float(borehole['NORTHING'])
)
distances.append(dist_meeters)
res.append(
{
"uprn": int(p.data["uprn"]),
"meters_to_nearest_borehole": min(distances)
}
)
res = pd.DataFrame(res)
properties_dataset = [
{
**p.data,
"in_conservation_area": p.in_conservation_area,
**p.coordinates,
} for p in input_properties
]
properties_dataset = pd.DataFrame(properties_dataset)
properties_dataset = properties_dataset.merge(res, on="uprn", how="left")
properties_dataset.to_csv("properties_dataset.csv")