mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
167 lines
6.6 KiB
Python
167 lines
6.6 KiB
Python
import pandas as pd
|
|
|
|
|
|
def get_band(sap_score_number):
|
|
bands = [
|
|
("High_A", 96, float("inf")),
|
|
("Low_A", 92, 96),
|
|
("High_B", 86, 92),
|
|
("Low_B", 81, 86),
|
|
("High_C", 74.5, 81),
|
|
("Low_C", 69, 74.5),
|
|
("High_D", 61.5, 69),
|
|
("Low_D", 55, 61.5),
|
|
("High_E", 46.5, 55),
|
|
("Low_E", 39, 46.5),
|
|
("High_F", 29.5, 39),
|
|
("Low_F", 21, 29.5),
|
|
("High_G", 10.5, 21),
|
|
("Low_G", 1, 10.5),
|
|
]
|
|
|
|
for band, lower, upper in bands:
|
|
if lower <= sap_score_number < upper:
|
|
return band
|
|
|
|
return None
|
|
|
|
|
|
def classify_floor_area(floor_area):
|
|
if floor_area <= 72:
|
|
return "0-72"
|
|
|
|
if floor_area <= 97:
|
|
return "73-97"
|
|
|
|
if floor_area <= 199:
|
|
return "98-199"
|
|
|
|
return "200+"
|
|
|
|
|
|
asset_list = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx",
|
|
sheet_name="Standardised Asset List"
|
|
)
|
|
|
|
asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band)
|
|
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area)
|
|
|
|
# Objective:
|
|
# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below
|
|
#
|
|
# Therefore:
|
|
# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying
|
|
# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do
|
|
# qualify
|
|
# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the
|
|
# archetypes
|
|
#
|
|
# Driving Factors:
|
|
# 1) Floor area band & starting SAP band - this will determine how much funding is produced
|
|
# 2) Heating system - this will determine if the property needs a heating upgrade or not
|
|
|
|
|
|
archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby(
|
|
["floor_area_band", "starting_sap_band", "landlord_heating_system"]
|
|
)["landlord_property_id"].nunique().reset_index()
|
|
archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"})
|
|
archetypes = archetypes.sort_values("n_properties", ascending=False)
|
|
archetypes["running_total"] = archetypes["n_properties"].cumsum()
|
|
archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100
|
|
|
|
archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel"
|
|
archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin(
|
|
["boiler - other fuel", "electric storage heaters"]
|
|
)
|
|
archetypes = archetypes.reset_index(drop=True)
|
|
|
|
# Right now, they don't want to treat the oil properties so we'll exclude them for the moment
|
|
electric_heated_archetypes = (
|
|
archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True)
|
|
)
|
|
electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum()
|
|
electric_heated_archetypes["cumulative_percentage"] = (
|
|
electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100
|
|
)
|
|
|
|
# The main properties that need validation surveys are properties that require a heating upgrade
|
|
electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]]
|
|
electric_heated_archetypes = electric_heated_archetypes.merge(
|
|
archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
|
|
how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
|
|
)
|
|
|
|
oil_archetypes = archetypes[
|
|
archetypes["landlord_heating_system"] == "boiler - other fuel"
|
|
].copy().reset_index(drop=True)
|
|
|
|
archetypes["archetype_id"] = archetypes.index
|
|
|
|
asset_list = asset_list.merge(
|
|
archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
|
|
how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
|
|
)
|
|
|
|
properties_for_verification = asset_list[
|
|
asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values)
|
|
].copy()
|
|
properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[
|
|
0].str.strip()
|
|
|
|
properties_for_verification["epc_age"] = (
|
|
pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"])
|
|
).dt.days
|
|
|
|
# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes
|
|
archetypes_for_survey = pd.concat(
|
|
[electric_heated_archetypes, oil_archetypes.head(2)]
|
|
)
|
|
|
|
# Take the property with the oldest EPC, by region. Prioritise estimated properties
|
|
sample = []
|
|
for _, config in archetypes_for_survey.iterrows():
|
|
properties = asset_list[
|
|
(asset_list["archetype_id"] == config["archetype_id"]) &
|
|
(asset_list["floor_area_band"] == config["floor_area_band"]) &
|
|
(asset_list["starting_sap_band"] == config["starting_sap_band"])
|
|
]
|
|
|
|
if pd.isnull(properties["epc_inspection_date"]).sum():
|
|
sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records")
|
|
else:
|
|
# Take the property with the oldest EPC
|
|
sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records")
|
|
|
|
sample.extend(sample_property)
|
|
|
|
sample = pd.DataFrame(sample)
|
|
|
|
sample = sample[
|
|
[
|
|
"landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band",
|
|
"floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id"
|
|
]
|
|
]
|
|
|
|
archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy()
|
|
archetypes["archetype_id"] = archetypes["archetype_id"].astype(str)
|
|
|
|
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx"
|
|
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
|
|
|
|
with pd.ExcelWriter(filename) as writer:
|
|
archetypes.to_excel(writer, sheet_name="Archetypes", index=False)
|
|
sample.to_excel(writer, sheet_name="Survey Sample", index=False)
|
|
|
|
# We store this
|
|
|
|
# Questions:
|
|
# 1) If futures are considering changing properties that have oil heating systems, we could include them and
|
|
# we have 39 total archetypes. Otherwise, we have 25 archetypes
|
|
# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're
|
|
# using
|
|
|
|
# Recommendations:
|
|
# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover
|
|
#
|