added windows glazing remapping

This commit is contained in:
Khalim Conn-Kowlessar 2026-02-03 11:52:59 +00:00
parent 41aed1cffe
commit 6cb20b4828

View file

@ -356,7 +356,6 @@ data["has_sloping_ceiling"] = data["Roof Construction"].apply(
# ------------ Floor Construction ------------
floor_mapping = {
# Solid floor
('Solid', 'AsBuilt'): None, # Mapped
@ -428,10 +427,65 @@ assert data["landlord_floor_description"].isnull().sum() == 0, (
"Some floor descriptions could not be resolved"
)
# ------------ Glazing ------------
glazing_map = {
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
# how we make updates to the windows data.
# Triple known data is high performance glazing with Good efficiency (at least)
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
# This is also classed as high performance glazing
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
}
data[["landlord_windows_description",
"landlord_windows_efficiency",
"landlord_multi_glaze_proportion",
"landlord_glazed_type",
"landlord_glazed_area"]] = data["Glazing"].map(glazing_map).progress_apply(pd.Series)
# Peform the remapping. The columns we wish to produce are the following:
# 1) landlord_windows_description
# 2) landlord_windows_efficiency
# 3) landlord_multi_glaze_proportion - maybe don't need to set this, same for glazing typd and area
# ------------ Heating ------------
agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel']).size().reset_index(name='counts')
epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv")
epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts()
epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts()
epcs[
(epcs["WINDOWS_DESCRIPTION"] == "Full secondary glazing") & (epcs["LODGEMENT_DATE"] > "2025-07-01")
]["WINDOWS_ENERGY_EFF"].value_counts()
# ------------ Fuel ------------
# ------------ Heating Controls ------------
# ------------ Floor Area ------------
# TODO: Convert everything to values
# Variables we want to map
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode',
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
# 'Glazing', 'Heating',
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
# 'Total Floor Area (m2)'
data["Glazing"].value_counts()
data["Glazing"].value_counts()