added windows glazing remapping

2026-07-27 23:35:01 +00:00 · 2026-02-03 11:52:59 +00:00 · 2026-02-03 11:52:59 +00:00 · 6cb20b4828
commit 6cb20b4828
parent 41aed1cffe
1 changed files with 56 additions and 2 deletions
--- a/backend/onboarders/parity.py
+++ b/backend/onboarders/parity.py
@ -356,7 +356,6 @@ data["has_sloping_ceiling"] = data["Roof Construction"].apply(

 # ------------ Floor Construction ------------

-
 floor_mapping = {
    # Solid floor
    ('Solid', 'AsBuilt'): None,  # Mapped
@ -428,10 +427,65 @@ assert data["landlord_floor_description"].isnull().sum() == 0, (
    "Some floor descriptions could not be resolved"
 )

+# ------------ Glazing ------------
+glazing_map = {
+    # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
+    # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
+    "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
+    "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
+    "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
+    "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
+    # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
+    # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
+    # how we make updates to the windows data.
+    # Triple known data is high performance glazing with Good efficiency (at least)
+    "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
+    # This is also classed as high performance glazing
+    "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
+    # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
+    "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
+    "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
+}
+
+data[["landlord_windows_description",
+      "landlord_windows_efficiency",
+      "landlord_multi_glaze_proportion",
+      "landlord_glazed_type",
+      "landlord_glazed_area"]] = data["Glazing"].map(glazing_map).progress_apply(pd.Series)
+
+# Peform the remapping. The columns we wish to produce are the following:
+# 1) landlord_windows_description
+# 2) landlord_windows_efficiency
+# 3) landlord_multi_glaze_proportion - maybe don't need to set this, same for glazing typd and area
+
+
+# ------------ Heating ------------
+
+
+agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel']).size().reset_index(name='counts')
+epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv")
+
+epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts()
+epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts()
+
+epcs[
+    (epcs["WINDOWS_DESCRIPTION"] == "Full secondary glazing") & (epcs["LODGEMENT_DATE"] > "2025-07-01")
+    ]["WINDOWS_ENERGY_EFF"].value_counts()
+
+# ------------ Fuel ------------
+
+# ------------ Heating Controls ------------
+
+# ------------ Floor Area ------------
 # TODO: Convert everything to values

 # Variables we want to map
 # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode',
-#        'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
+#         'Glazing', 'Heating',
 #        'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
 #        'Total Floor Area (m2)'
+
+
+data["Glazing"].value_counts()
+
+data["Glazing"].value_counts()