From 6cb20b4828545ced6de8dc8ebe3acfd24500e4de Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 3 Feb 2026 11:52:59 +0000 Subject: [PATCH] added windows glazing remapping --- backend/onboarders/parity.py | 58 ++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index dfba0443..a1faea3c 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -356,7 +356,6 @@ data["has_sloping_ceiling"] = data["Roof Construction"].apply( # ------------ Floor Construction ------------ - floor_mapping = { # Solid floor ('Solid', 'AsBuilt'): None, # Mapped @@ -428,10 +427,65 @@ assert data["landlord_floor_description"].isnull().sum() == 0, ( "Some floor descriptions could not be resolved" ) +# ------------ Glazing ------------ +glazing_map = { + # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area + # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more + "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None), + "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None), + # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022 + # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to + # how we make updates to the windows data. + # Triple known data is high performance glazing with Good efficiency (at least) + "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None), + # This is also classed as high performance glazing + "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), + # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good) + "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None), + "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), +} + +data[["landlord_windows_description", + "landlord_windows_efficiency", + "landlord_multi_glaze_proportion", + "landlord_glazed_type", + "landlord_glazed_area"]] = data["Glazing"].map(glazing_map).progress_apply(pd.Series) + +# Peform the remapping. The columns we wish to produce are the following: +# 1) landlord_windows_description +# 2) landlord_windows_efficiency +# 3) landlord_multi_glaze_proportion - maybe don't need to set this, same for glazing typd and area + + +# ------------ Heating ------------ + + +agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel']).size().reset_index(name='counts') +epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv") + +epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts() +epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts() + +epcs[ + (epcs["WINDOWS_DESCRIPTION"] == "Full secondary glazing") & (epcs["LODGEMENT_DATE"] > "2025-07-01") + ]["WINDOWS_ENERGY_EFF"].value_counts() + +# ------------ Fuel ------------ + +# ------------ Heating Controls ------------ + +# ------------ Floor Area ------------ # TODO: Convert everything to values # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', -# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', +# 'Glazing', 'Heating', # 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', # 'Total Floor Area (m2)' + + +data["Glazing"].value_counts() + +data["Glazing"].value_counts()