minor changes to add propety type as a breakdown in downloader

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-30 09:32:58 +01:00
parent abed2ce2de
commit b922d5a9b7

View file

@ -1,3 +1,4 @@
import pandas as pd
from tqdm import tqdm
import os
from model_data.BoreholeClient import BoreholeClient
@ -69,19 +70,23 @@ def handler():
p.set_is_in_conservation_area(conservation_area_client)
local_authorities = {p.data['local-authority'] for p in input_properties}
# TODO: Do this at a constituency level
constituencies = {p.data["constituency"] for p in input_properties}
property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
# TODO: Create a more balanced sample where we grab more properties across different properties
# types, as e.g. we're pulling many more flats than houses
# We pull properties from local authorities, by property type. This will allow us to build
# a dataset of up to 10k properties per local authority/property type combination
data = []
for la in tqdm(local_authorities):
data.extend(
pagenated_epc_download(
client=epc_client,
params={"local-authority": la},
page_size=5000,
n_pages=10,
for pt in property_types:
data.extend(
pagenated_epc_download(
client=epc_client,
params={"local-authority": la, "property-type": pt},
page_size=5000,
n_pages=10,
)
)
)
# Incorporate input data into cleaning
cleaner = EpcClean(data + [p.data for p in input_properties])
@ -120,85 +125,147 @@ def handler():
uvalue_estimates = UvalueEstimations(data=data)
uvalue_estimates.get_estimates(cleaner=cleaner)
input_properties[4].data["address1"]
input_properties[4].data["postcode"]
floors_df["address1"].values[4]
floors_df["original_description"].values[4]
# all_data = {
# "input_properties": input_properties,
# "cleaner": cleaner,
# "uvalue_estimates": uvalue_estimates,
# "land_registry_client": land_registry_client,
# "borehole_client": borehole_client,
# "conservation_area_client": conservation_area_client,
# "open_uprn_client": open_uprn_client,
# "data": data
# }
df = pd.DataFrame(
[
x.data for x in input_properties
]
)
df["property-type"].unique()
# import pickle
# with open("all_data.pkl", "wb") as f:
# pickle.dump(all_data, f)
from model_data.recommendations.WallRecommendations import WallRecommendations
all_res = []
for p in input_properties:
inst = WallRecommendations(property_instance=p, uvalue_estimates=uvalue_estimates)
inst.recommend()
n_recs = len(inst.recommendations)
all_res.append(n_recs)
# input_properties[4].data["address1"]
# input_properties[4].data["postcode"]
# floors_df["address1"].values[4]
# floors_df["original_description"].values[4]
#
# df = pd.DataFrame(
# [
# x.data for x in input_properties
# ]
# )
# df["property-type"].unique()
#
# from model_data.recommendations.WallRecommendations import WallRecommendations
# all_res = []
# for p in input_properties:
# inst = WallRecommendations(property_instance=p, uvalue_estimates=uvalue_estimates)
# inst.recommend()
# n_recs = len(inst.recommendations)
# all_res.append(n_recs)
#
# self = WallRecommendations(property_instance=input_properties[2], uvalue_estimates=uvalue_estimates)
# input_properties[6].walls
# self.recommend()
# df = pd.DataFrame(self.recommendations[0]["parts"])
# recommendations = pd.DataFrame(self.recommendations)
#
# from model_data.recommendations.FloorRecommendations import FloorRecommendations
# self = FloorRecommendations(property_instance=input_properties[4], uvalue_estimates=uvalue_estimates)
# self.recommendations
# self.recommend()
# self.recommendations
#
# # We need to deduce a U-value for "Good" energy effieciency
#
# mainheating = pd.DataFrame(
# [{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties])
# hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties])
#
# mainheating[["address1", "postcode"]]
#
# # TODO: I want to knwo what "Good" efficiency means for the description
# # 'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)'
# # so to do this, filter on the local authority code and property type, where we have U
# # values for the wall and take a median!
#
# p = input_properties[6]
# df = pd.DataFrame(data)
#
# res = []
# for p in input_properties:
# distances = []
# for borehole in tqdm(borehole_client.data, total=len(borehole_client.data)):
# dist_meeters, _ = borehole_client.distance_between_bng_coords(
# x1_bng=p.coordinates['x_coordinate'],
# y1_bng=p.coordinates['y_coordinate'],
# x2_bng=float(borehole['EASTING']),
# y2_bng=float(borehole['NORTHING'])
# )
# distances.append(dist_meeters)
#
# res.append(
# {
# "uprn": int(p.data["uprn"]),
# "meters_to_nearest_borehole": min(distances)
# }
#
# )
# res = pd.DataFrame(res)
#
# properties_dataset = [
# {
# **p.data,
# "in_conservation_area": p.in_conservation_area,
# **p.coordinates,
#
# } for p in input_properties
# ]
#
# properties_dataset = pd.DataFrame(properties_dataset)
# properties_dataset = properties_dataset.merge(res, on="uprn", how="left")
#
# properties_dataset.to_csv("properties_dataset.csv")
self = WallRecommendations(property_instance=input_properties[2], uvalue_estimates=uvalue_estimates)
input_properties[6].walls
self.recommend()
df = pd.DataFrame(self.recommendations[0]["parts"])
recommendations = pd.DataFrame(self.recommendations)
from model_data.recommendations.FloorRecommendations import FloorRecommendations
self = FloorRecommendations(property_instance=input_properties[4], uvalue_estimates=uvalue_estimates)
self.recommendations
self.recommend()
self.recommendations
# We need to deduce a U-value for "Good" energy effieciency
mainheating = pd.DataFrame(
[{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties])
hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties])
mainheating[["address1", "postcode"]]
# TODO: I want to knwo what "Good" efficiency means for the description
# 'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)'
# so to do this, filter on the local authority code and property type, where we have U
# values for the wall and take a median!
p = input_properties[6]
# We test estimating gain
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
df = pd.DataFrame(data)
res = []
for p in input_properties:
distances = []
for borehole in tqdm(borehole_client.data, total=len(borehole_client.data)):
dist_meeters, _ = borehole_client.distance_between_bng_coords(
x1_bng=p.coordinates['x_coordinate'],
y1_bng=p.coordinates['y_coordinate'],
x2_bng=float(borehole['EASTING']),
y2_bng=float(borehole['NORTHING'])
)
distances.append(dist_meeters)
res.append(
{
"uprn": int(p.data["uprn"]),
"meters_to_nearest_borehole": min(distances)
}
)
res = pd.DataFrame(res)
properties_dataset = [
{
**p.data,
"in_conservation_area": p.in_conservation_area,
**p.coordinates,
} for p in input_properties
# We want to estimate for making improvements on different property components
response = "environment-impact-current"
base_features = [
"property-type",
"built-form",
# "construction-age-band",
"number-habitable-rooms",
]
properties_dataset = pd.DataFrame(properties_dataset)
properties_dataset = properties_dataset.merge(res, on="uprn", how="left")
component_features = [
"walls-description",
"floor-description",
]
properties_dataset.to_csv("properties_dataset.csv")
model_data = df[[response] + component_features + base_features]
model_data = model_data.reset_index()
model_data["idx"] = model_data.index.copy()
summary = (
model_data
.groupby(component_features + base_features)
.agg({response: 'median', "idx": 'size'})
.reset_index()
)
summary = summary.sort_values("walls-description")
example = summary[
(summary["walls-description"].isin(
[
"Solid brick, as built, no insulation (assumed)",
"Solid brick, as built, partial insulation (assumed)",
"Solid brick, as built, insulated (assumed)",
]
)) &
(summary["property-type"] == "House") &
(summary["built-form"] == "Detached") &
# (summary["construction-age-band"] == "England and Wales: 1976-1982")
(summary["number-habitable-rooms"] == "4")
]