From 28e9f37739ad8fc4b105743044118f2cadf8fb4b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 29 Jan 2026 18:45:42 +0000 Subject: [PATCH 01/33] setting up the wall mapping functions --- asset_list/app.py | 28 +- asset_list/mappings/property_type.py | 4 +- asset_list/mappings/walls.py | 11 +- backend/onboarders/base.py | 0 backend/onboarders/epc_descriptions.py | 247 ++++++++++++++++++ backend/onboarders/mappings/age_band.py | 31 ++- .../mappings/as_built_wall_classifiers.py | 204 +++++++++++++++ backend/onboarders/parity.py | 208 ++++++++++++--- 8 files changed, 672 insertions(+), 61 deletions(-) create mode 100644 backend/onboarders/base.py create mode 100644 backend/onboarders/epc_descriptions.py create mode 100644 backend/onboarders/mappings/as_built_wall_classifiers.py diff --git a/asset_list/app.py b/asset_list/app.py index 01906c5f..969b0184 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,24 +59,24 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney" - data_filename = "Domna SHF Wave 3 (3).xlsx" - sheet_name = "Domna Wave 3" - postcode_column = 'Postcode' - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1"] + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/20260129 SAL" + data_filename = "NCHA ASSET LIST 1.xlsx" + sheet_name = "NCHA ASSET LIST" + postcode_column = 'POSTCODE' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = 'ADDRESS' + address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "Construction Years" - landlord_os_uprn = "UPRN" - landlord_property_type = "Type" - landlord_built_form = "Attachment" - landlord_wall_construction = "Wall type" + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "PROPERTY TYPE" + landlord_built_form = "BUILD FORM" + landlord_wall_construction = "wall combined" landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "Row ID" + landlord_property_id = "UPRN" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 1f251598..703cb8ef 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -427,6 +427,8 @@ PROPERTY_MAPPING = { 'End Terrace': 'unknown', 'Detached': 'unknown', 'Mid-terrace': 'unknown', - 'MID - TERRACE': 'unknown' + 'MID - TERRACE': 'unknown', + 'COMOFF': 'unknown', + 'LOTS': 'unknown' } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 418ae9f8..1bb02a9a 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -354,6 +354,15 @@ WALL_CONSTRUCTION_MAPPINGS = { 'System built Internal': 'insulated system built', 'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation', - 'Cavity: FilledCavityPlusExternal': 'filled cavity' + 'Cavity: FilledCavityPlusExternal': 'filled cavity', + + 'Cavity, Filled Cavity': 'filled cavity', + 'Solid Brick, As Built': 'solid brick unknown insulation', + 'Cavity, As Built': 'cavity unknown insulation', + 'Sandstone, As Built': 'sandstone or limestone unknown insulation', + 'Timber Frame, As Built': 'timber frame unknown insulation', + 'Solid Brick, Internal Insulation': 'insulated solid brick', + 'Granite or Whinstone, As Built': 'granite or whinstone unknown insulation', + 'Solid Brick, External': 'insulated solid brick' } diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py new file mode 100644 index 00000000..a674e332 --- /dev/null +++ b/backend/onboarders/epc_descriptions.py @@ -0,0 +1,247 @@ +import re +from enum import Enum +from typing import Callable, Union, List + + +class EpcConstructionAgeBand(Enum): + before_1900: str = 'England and Wales: before 1900' + from_1900_to_1929: str = 'England and Wales: 1900-1929' + from_1930_to_1949: str = 'England and Wales: 1930-1949' + from_1950_to_1966: str = 'England and Wales: 1950-1966' + from_1967_to_1975: str = 'England and Wales: 1967-1975' + from_1976_to_1982: str = 'England and Wales: 1976-1982' + from_1983_to_1990: str = 'England and Wales: 1983-1990' + from_1991_to_1995: str = 'England and Wales: 1991-1995' + from_1996_to_2002: str = 'England and Wales: 1996-2002' + from_2003_to_2006: str = 'England and Wales: 2003-2006' + from_2007_to_2011: str = 'England and Wales: 2007-2011' + from_2012_onwards: str = 'England and Wales: 2012-onwards' + from_2012_to_2022: str = 'England and Wales: 2012-2022' + from_2023_onwards: str = 'England and Wales: 2023 onwards' + + def start_year(self) -> int: + """ + Extract the starting year of the age band. + """ + value = self.value.lower() + + if 'before' in value: + return 0 + match = re.search(r'(\d{4})', value) + if not match: + raise ValueError(f"Cannot determine start year from '{self.value}'") + + return int(match.group(1)) + + @classmethod + def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]: + """ + Return all age bands whose starting year is >= the given year. + """ + return [ + band + for band in cls + if band.start_year() >= year + ] + + +class EpcWallDescriptions(Enum): + # Cavity wall descriptions + cavity_insulated_assumed: str = "Cavity wall, as built, insulated (assumed)" + cavity_partial_insulated_assumed: str = "Cavity wall, as built, partial insulation (assumed)" + cavity_no_insulation_assumed: str = "Cavity wall, as built, no insulation (assumed)" + cavity_filled_cavity: str = "Cavity wall, filled cavity" + cavity_internal_insulation: str = "Cavity wall, with internal insulation" + cavity_external_insulation: str = "Cavity wall, with external insulation" + cavity_filled_plus_internal: str = "Cavity wall, filled cavity and internal insulation" + cavity_filled_plus_external: str = "Cavity wall, filled cavity and external insulation" + + # Solid wall descriptions + solid_brick_internal_insulation: str = "Solid brick, with internal insulation" + solid_brick_external_insulation: str = "Solid brick, with external insulation" + solid_brick_no_insulation_assumed: str = 'Solid brick, as built, no insulation (assumed)' + solid_brick_partial_insulated_assumed: str = 'Solid brick, as built, partial insulation (assumed)' + solid_brick_insulated_assumed: str = 'Solid brick, as built, insulated (assumed)' + + # System + system_external_insulation: str = "System built, with external insulation" + system_internal_insulation: str = "System built, with internal insulation" + system_no_insulation_assumed: str = "System built, as built, no insulation (assumed)" + system_partial_insulated_assumed: str = "System built, as built, partial insulation (assumed)" + system_insulated_assumed: str = "System built, as built, insulated (assumed)" + + # Timber + timber_frame_internal_insulation: str = "Timber frame, with internal insulation" + timber_frame_external_insulation: str = "Timber frame, with external insulation" + timber_frame_no_insulation_assumed: str = "Timber frame, as built, no insulation (assumed)" + timber_frame_partial_insulated_assumed: str = "Timber frame, as built, partial insulation (assumed)" + timber_frame_insulated_assumed: str = "Timber frame, as built, insulated (assumed)" + + # Granite/whinstone + granite_whinstone_external_insulation: str = "Granite or whin, with external insulation" + granite_whinstone_internal_insulation: str = "Granite or whin, with internal insulation" + granite_whinstone_no_insulation_assumed: str = "Granite or whin, as built, no insulation (assumed)" + granite_whinstone_partial_insulated_assumed: str = "Granite or whin, as built, partial insulation (assumed)" + granite_whinestone_insulated_assumed: str = "Granite or whin, as built, insulated (assumed)" + + # Sandstone/limestone + sandstone_limestone_internal_insulation: str = "Sandstone, with internal insulation" + sandstone_limestone_external_insulation: str = "Sandstone, with external insulation" + sandstone_limestone_no_insulation_assumed: str = "Sandstone, as built, no insulation (assumed)" + sandstone_limestone_partial_insulated_assumed: str = "Sandstone, as built, partial insulation (assumed)" + sandstone_limestone_insulated_assumed: str = "Sandstone, as built, insulated (assumed)" + + # Cob + cob_as_built_average = "Cob, as built" + cob_as_built_good = "Cob, as built" + + +class EpcEfficiency(Enum): + VERY_POOR = "Very Poor" + POOR = "Poor" + AVERAGE = "Average" + GOOD = "Good" + VERY_GOOD = "Very Good" + NA = "N/A" + + +EfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand], EpcEfficiency], +] + + +def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps cavity filled to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def internal_external_insulation_efficiency( + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Maps: + - cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - solid brick with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - system built with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + + All of these wall types have the same behaviour in elmhurst + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_1983_to_1990, + EpcConstructionAgeBand.from_1991_to_1995, + EpcConstructionAgeBand.from_1996_to_2002, + EpcConstructionAgeBand.from_2003_to_2006, + EpcConstructionAgeBand.from_2007_to_2011, + EpcConstructionAgeBand.from_2012_to_2022, + EpcConstructionAgeBand.from_2023_onwards, + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps: + - timber frame with internal/external wall insulation to efficiency based on construction age band. + - sandstone/limestone with internal/external wall insulation to efficiency based on construction age band. + - granite/whinstone with internal/external wall insulation to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +WALL_DESCRIPTION_METADATA = { + # Note: all function mappings have been defined based on Elmhurst + # Cavity + # value mappings + EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD, + EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD, + EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD, + # function mappings + EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency, + EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency, + + # Solid brick + # value mappings + EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency, + + # System + # value mappings + EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency, + + # Timber frame + # value mappings + EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Granite / whinstone + EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Sandstone / limestone + EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Cob (special case) + EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD, +} + + +def resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + rule = WALL_DESCRIPTION_METADATA[description] + + if isinstance(rule, EpcEfficiency): + return rule + + return rule(age_band) diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py index 2487c921..5106a3fe 100644 --- a/backend/onboarders/mappings/age_band.py +++ b/backend/onboarders/mappings/age_band.py @@ -1,14 +1,19 @@ -party_map = { - "Before 1900": 'England and Wales: before 1900', - "1900-1929": 'England and Wales: 1900-1929', - "1930-1949": 'England and Wales: 1930-1949', - "1950-1966": 'England and Wales: 1950-1966', - "1967-1975": 'England and Wales: 1967-1975', - "1976-1982": 'England and Wales: 1976-1982', - "1983-1990": 'England and Wales: 1983-1990', - "1991-1995": 'England and Wales: 1991-1995', - "1996-2002": 'England and Wales: 1996-2002', - "2003-2006": 'England and Wales: 2003-2006', - "2007-2011": 'England and Wales: 2007-2011', - "2012 onwards": 'England and Wales: 2012-2021', +from backend.onboarders.epc_descriptions import EpcConstructionAgeBand + +parity_map = { + "Before 1900": EpcConstructionAgeBand.before_1900.value, + "1900-1929": EpcConstructionAgeBand.from_1900_to_1929.value, + "1930-1949": EpcConstructionAgeBand.from_1930_to_1949.value, + "1950-1966": EpcConstructionAgeBand.from_1950_to_1966.value, + "1967-1975": EpcConstructionAgeBand.from_1967_to_1975.value, + "1976-1982": EpcConstructionAgeBand.from_1976_to_1982.value, + "1983-1990": EpcConstructionAgeBand.from_1983_to_1990.value, + "1991-1995": EpcConstructionAgeBand.from_1991_to_1995.value, + "1996-2002": EpcConstructionAgeBand.from_1996_to_2002.value, + "2003-2006": EpcConstructionAgeBand.from_2003_to_2006.value, + "2007-2011": EpcConstructionAgeBand.from_2007_to_2011.value, + "2012 onwards": EpcConstructionAgeBand.from_2012_onwards.value, + # Newer age bands, under SAP10 + "2012-2022": EpcConstructionAgeBand.from_2012_to_2022.value, + "2023 onwards": EpcConstructionAgeBand.from_2023_onwards.value, } diff --git a/backend/onboarders/mappings/as_built_wall_classifiers.py b/backend/onboarders/mappings/as_built_wall_classifiers.py new file mode 100644 index 00000000..e0ef193f --- /dev/null +++ b/backend/onboarders/mappings/as_built_wall_classifiers.py @@ -0,0 +1,204 @@ +def map_cavity_wall_insulation(age_band): + if age_band in [ + 'England and Wales: before 1900', + 'England and Wales: 1900-1929', + 'England and Wales: 1930-1949', + 'England and Wales: 1950-1966', + 'England and Wales: 1967-1975' + ]: + return EpcWallDescriptions.cavity_no_insulation_assumed + + if age_band in [ + 'England and Wales: 1976-1982' + ]: + return EpcWallDescriptions.cavity_partial_insulated_assumed + + if age_band in [ + 'England and Wales: 1983-1990', + 'England and Wales: 1991-1995', + 'England and Wales: 1996-2002', + 'England and Wales: 2003-2006', + 'England and Wales: 2007-2011', + 'England and Wales: 2012-2022', + 'England and Wales: 2023 onwards', + ]: + return EpcWallDescriptions.cavity_insulated_assumed + + raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping") + + +def map_solid_wall_insulation(age_band): + if age_band in [ + 'England and Wales: before 1900', 'England and Wales: 1900-1929', 'England and Wales: 1930-1949', + 'England and Wales: 1967-1975' + ]: + return EpcWallDescriptions.solid_brick_no_insulation_assumed + + if age_band in [ + 'England and Wales: 1976-1982' + ]: + return EpcWallDescriptions.solid_brick_partial_insulated_assumed + + if age_band in [ + 'England and Wales: 1983-1990', 'England and Wales: 1991-1995', 'England and Wales: 1996-2002', + 'England and Wales: 2003-2006', 'England and Wales: 2007-2011', 'England and Wales: 2012-2022', + 'England and Wales: 2023 onwards', + ]: + return EpcWallDescriptions.solid_brick_insulated_assumed + + +def map_timber_frame_wall_insulation(age_band): + # No insulation (Poor) + if age_band in [ + 'England and Wales: before 1900', + 'England and Wales: 1900-1929', + 'England and Wales: 1930-1949', + ]: + return EpcWallDescriptions.timber_frame_no_insulation_assumed + + # Partial insulation (Average) + if age_band in [ + 'England and Wales: 1950-1966', + 'England and Wales: 1967-1975', + ]: + return EpcWallDescriptions.timber_frame_partial_insulated_assumed + + # Insulated (Good) + if age_band in [ + 'England and Wales: 1976-1982', + 'England and Wales: 1983-1990', + 'England and Wales: 1991-1995', + 'England and Wales: 1996-2002', + 'England and Wales: 2003-2006', + 'England and Wales: 2007-2011', + 'England and Wales: 2012-2022', + 'England and Wales: 2023 onwards', + ]: + return EpcWallDescriptions.timber_frame_insulated_assumed + + # TODO: Unknown / pre-1930 handling + raise NotImplementedError(f"Age band {age_band} not handled for timber frame wall insulation mapping") + + +def map_system_build_wall_insulation(age_band): + # No insulation (Poor) + if age_band in [ + 'England and Wales: before 1900', + 'England and Wales: 1900-1929', + 'England and Wales: 1930-1949', + 'England and Wales: 1950-1966', + 'England and Wales: 1967-1975', + ]: + return EpcWallDescriptions.system_no_insulation_assumed + + # Partial insulation (Average) + if age_band in [ + 'England and Wales: 1976-1982', + ]: + return EpcWallDescriptions.system_partial_insulated_assumed + + # Insulated (Good) + if age_band in [ + 'England and Wales: 1983-1990', + 'England and Wales: 1991-1995', + 'England and Wales: 1996-2002', + 'England and Wales: 2003-2006', + 'England and Wales: 2007-2011', + 'England and Wales: 2012-2022', + 'England and Wales: 2023 onwards', + ]: + return EpcWallDescriptions.system_insulated_assumed + + # TODO: Unknown / early system build handling + raise NotImplementedError(f"Age band {age_band} not handled for system build wall insulation mapping") + + +def map_granite_wall_insulation(age_band): + # No insulation (Very Poor) + if age_band in [ + 'England and Wales: before 1900', + 'England and Wales: 1900-1929', + 'England and Wales: 1930-1949', + 'England and Wales: 1950-1966', + 'England and Wales: 1967-1975', + ]: + return EpcWallDescriptions.granite_whinstone_no_insulation_assumed + + # Partial insulation (Average) + if age_band in [ + 'England and Wales: 1976-1982', + ]: + return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed + + # Insulated (Good) + if age_band in [ + 'England and Wales: 1983-1990', + 'England and Wales: 1991-1995', + 'England and Wales: 1996-2002', + 'England and Wales: 2003-2006', + 'England and Wales: 2007-2011', + 'England and Wales: 2012-2022', + 'England and Wales: 2023 onwards', + ]: + return EpcWallDescriptions.granite_whinestone_insulated_assumed + + raise NotImplementedError(f"Age band {age_band} not handled for granite wall insulation mapping") + + +def map_sandstone_wall_insulation(age_band): + # No insulation (Very Poor) + if age_band in [ + 'England and Wales: before 1900', + 'England and Wales: 1900-1929', + 'England and Wales: 1930-1949', + 'England and Wales: 1950-1966', + 'England and Wales: 1967-1975', + ]: + return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed + + # Partial insulation (Average) + if age_band in [ + 'England and Wales: 1976-1982', + ]: + return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed + + # Insulated (Good) + if age_band in [ + 'England and Wales: 1983-1990', + 'England and Wales: 1991-1995', + 'England and Wales: 1996-2002', + 'England and Wales: 2003-2006', + 'England and Wales: 2007-2011', + 'England and Wales: 2012-2022', + 'England and Wales: 2023 onwards', + ]: + return EpcWallDescriptions.sandstone_limestone_insulated_assumed + + raise NotImplementedError(f"Age band {age_band} not handled for sandstone wall insulation mapping") + + +def map_cob_wall_insulation(age_band): + # Cob, as built (Average) + if age_band in [ + 'England and Wales: before 1900', + 'England and Wales: 1900-1929', + 'England and Wales: 1930-1949', + 'England and Wales: 1950-1966', + 'England and Wales: 1967-1975', + 'England and Wales: 1976-1982', + ]: + return EpcWallDescriptions.cob_as_built_average + + # Cob, as built (Good) + if age_band in [ + 'England and Wales: 1983-1990', + 'England and Wales: 1991-1995', + 'England and Wales: 1996-2002', + 'England and Wales: 2003-2006', + 'England and Wales: 2007-2011', + 'England and Wales: 2012-2022', + 'England and Wales: 2023 onwards', + ]: + return EpcWallDescriptions.cob_as_built_good + + raise NotImplementedError(f"Age band {age_band} not handled for cob wall insulation mapping") diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 27244777..ca84fbec 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,8 +1,9 @@ import pandas as pd from etl.epc.DataProcessor import construction_age_bounds_map from backend.onboarders.mappings.property_type import parity_map as property_map -from backend.onboarders.mappings.age_band import party_map as age_band_map +from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map +from onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand def check_nulls(data, original_column, mapped_column): @@ -17,7 +18,6 @@ def check_nulls(data, original_column, mapped_column): # Sample input data - data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " "- Data Extracts for Domna.xlsx", @@ -30,18 +30,6 @@ data = pd.read_excel( # 3) Indicate already installed measures # ------------ construction_age_band ------------ -# Map to EPC age bands -# def construction_date_to_band(year): -# if pd.isnull(year): -# return None -# # Get the year from the date which is numpy datetime format -# for label, ranges in construction_age_bounds_map.items(): -# if ranges["l"] <= year <= ranges["u"]: -# return label -# raise NotImplementedError("year out of bounds") -# -# -# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) data["construction_age_band"] = data["Construction Years"].map(age_band_map) @@ -59,30 +47,186 @@ assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapp # ------------ Wall Construction ------------ -data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") -data["Wall Insulation"].value_counts() -data["Wall Construction"].value_counts() +# Unique combindations +wall_mapping = { + # Cavity walls + ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity.value, + ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation.value, + ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation.value, + ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal.value, + ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external.value, + ('Cavity', 'AsBuilt'): None, # To be classified + ('Cavity', 'Unknown'): None, # To be classified -as_built_map = { - "Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + # System built walls + ('System', 'External'): EpcWallDescriptions.system_external_insulation.value, + ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation.value, + ('System', 'AsBuilt'): None, # To be classified + ('System', 'Unknown'): None, + + # Timber Frame walls + ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation.value, + ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation.value, + ('Timber Frame', 'AsBuilt'): None, # To be classified + ('Timber Frame', 'Unknown'): None, + + # Solid Brick walls + ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation.value, + ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation.value, + ('Solid Brick', 'AsBuilt'): None, # To be classified + ('Solid Brick', 'Unknown'): None, + + # Granite walls + ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation.value, + ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation.value, + ('Granite', 'AsBuilt'): None, + ('Granite', 'Unknown'): None, + + # Sandstone walls + ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation.value, + ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation.value, + ('Sandstone', 'Unknown'): None, + ('Sandstone', 'AsBuilt'): None, + + # Cob walls + ('Cob', 'AsBuilt'): None, } -def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): - if wall_insulation == "AsBuilt": - # Deduce based on wall construction and age band - bands = as_built_map.get(wall_constuction, None) - if bands is None: - raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") +def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.cavity_no_insulation_assumed - # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.cavity_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cavity_insulated_assumed + + raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping") + + +def map_solid_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.solid_brick_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.solid_brick_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.solid_brick_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for solid wall insulation mapping" + ) + + +def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1950: + return EpcWallDescriptions.timber_frame_no_insulation_assumed + + if age_band.start_year() < 1976: + return EpcWallDescriptions.timber_frame_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1976): + return EpcWallDescriptions.timber_frame_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for timber frame wall insulation mapping" + ) + + +def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.system_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.system_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.system_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for system build wall insulation mapping" + ) + + +def map_granite_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.granite_whinstone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.granite_whinestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for granite wall insulation mapping" + ) + + +def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.sandstone_limestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for sandstone wall insulation mapping" + ) + + +def map_cob_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1983: + return EpcWallDescriptions.cob_as_built_average + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cob_as_built_good + + raise NotImplementedError( + f"Age band {age_band.value} not handled for cob wall insulation mapping" + ) + + +AS_BUILT_WALL_CLASSIFIERS = { + "Cavity": map_cavity_wall_insulation, + "Solid Brick": map_solid_wall_insulation, + "Timber Frame": map_timber_frame_wall_insulation, + "System": map_system_build_wall_insulation, + "Granite": map_granite_wall_insulation, + "Sandstone": map_sandstone_wall_insulation, + "Cob": map_cob_wall_insulation, +} + +data["landlord_wall_description"] = ( + data[["Wall Construction", "Wall Insulation"]] + .apply(tuple, axis=1) + .map(wall_mapping) +) + + +def fill_as_built(row): + if row.landlord_wall_description is not None: + return row.landlord_wall_description + + classifier = AS_BUILT_WALL_CLASSIFIERS.get(row["Wall Construction"]) + if classifier is None: + return None + + return classifier(row.construction_age_band) + + +data["landlord_wall_description"] = data.apply(fill_as_built, axis=1) + +for _, x in data.iterrows(): + if x["construction_age_band"] == "England and Wales: 2012-2021": + de # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', From 0524d7ab9c30b22270f162cff2a8c6f4eb770f76 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 29 Jan 2026 19:06:27 +0000 Subject: [PATCH 02/33] add fallback to rows without an age band --- backend/onboarders/mappings/age_band.py | 28 +++++------ backend/onboarders/parity.py | 63 ++++++++++++++++--------- 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py index 5106a3fe..e49fede8 100644 --- a/backend/onboarders/mappings/age_band.py +++ b/backend/onboarders/mappings/age_band.py @@ -1,19 +1,19 @@ from backend.onboarders.epc_descriptions import EpcConstructionAgeBand parity_map = { - "Before 1900": EpcConstructionAgeBand.before_1900.value, - "1900-1929": EpcConstructionAgeBand.from_1900_to_1929.value, - "1930-1949": EpcConstructionAgeBand.from_1930_to_1949.value, - "1950-1966": EpcConstructionAgeBand.from_1950_to_1966.value, - "1967-1975": EpcConstructionAgeBand.from_1967_to_1975.value, - "1976-1982": EpcConstructionAgeBand.from_1976_to_1982.value, - "1983-1990": EpcConstructionAgeBand.from_1983_to_1990.value, - "1991-1995": EpcConstructionAgeBand.from_1991_to_1995.value, - "1996-2002": EpcConstructionAgeBand.from_1996_to_2002.value, - "2003-2006": EpcConstructionAgeBand.from_2003_to_2006.value, - "2007-2011": EpcConstructionAgeBand.from_2007_to_2011.value, - "2012 onwards": EpcConstructionAgeBand.from_2012_onwards.value, + "Before 1900": EpcConstructionAgeBand.before_1900, + "1900-1929": EpcConstructionAgeBand.from_1900_to_1929, + "1930-1949": EpcConstructionAgeBand.from_1930_to_1949, + "1950-1966": EpcConstructionAgeBand.from_1950_to_1966, + "1967-1975": EpcConstructionAgeBand.from_1967_to_1975, + "1976-1982": EpcConstructionAgeBand.from_1976_to_1982, + "1983-1990": EpcConstructionAgeBand.from_1983_to_1990, + "1991-1995": EpcConstructionAgeBand.from_1991_to_1995, + "1996-2002": EpcConstructionAgeBand.from_1996_to_2002, + "2003-2006": EpcConstructionAgeBand.from_2003_to_2006, + "2007-2011": EpcConstructionAgeBand.from_2007_to_2011, + "2012 onwards": EpcConstructionAgeBand.from_2012_onwards, # Newer age bands, under SAP10 - "2012-2022": EpcConstructionAgeBand.from_2012_to_2022.value, - "2023 onwards": EpcConstructionAgeBand.from_2023_onwards.value, + "2012-2022": EpcConstructionAgeBand.from_2012_to_2022, + "2023 onwards": EpcConstructionAgeBand.from_2023_onwards, } diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index ca84fbec..d3ee8485 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,9 +1,11 @@ +from tqdm import tqdm import pandas as pd -from etl.epc.DataProcessor import construction_age_bounds_map from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map -from onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand +from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand + +tqdm.pandas() def check_nulls(data, original_column, mapped_column): @@ -51,41 +53,41 @@ assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapp # Unique combindations wall_mapping = { # Cavity walls - ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity.value, - ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation.value, - ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation.value, - ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal.value, - ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external.value, + ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity, + ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation, + ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation, + ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal, + ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external, ('Cavity', 'AsBuilt'): None, # To be classified ('Cavity', 'Unknown'): None, # To be classified # System built walls - ('System', 'External'): EpcWallDescriptions.system_external_insulation.value, - ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation.value, + ('System', 'External'): EpcWallDescriptions.system_external_insulation, + ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation, ('System', 'AsBuilt'): None, # To be classified ('System', 'Unknown'): None, # Timber Frame walls - ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation.value, - ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation.value, + ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation, + ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation, ('Timber Frame', 'AsBuilt'): None, # To be classified ('Timber Frame', 'Unknown'): None, # Solid Brick walls - ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation.value, - ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation.value, + ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation, + ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation, ('Solid Brick', 'AsBuilt'): None, # To be classified ('Solid Brick', 'Unknown'): None, # Granite walls - ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation.value, - ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation.value, + ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation, + ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation, ('Granite', 'AsBuilt'): None, ('Granite', 'Unknown'): None, # Sandstone walls - ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation.value, - ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation.value, + ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation, + ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation, ('Sandstone', 'Unknown'): None, ('Sandstone', 'AsBuilt'): None, @@ -204,6 +206,16 @@ AS_BUILT_WALL_CLASSIFIERS = { "Cob": map_cob_wall_insulation, } +UNKNOWN_AGE_FALLBACK = { + "Cavity": EpcWallDescriptions.cavity_no_insulation_assumed, + "Solid Brick": EpcWallDescriptions.solid_brick_no_insulation_assumed, + "Timber Frame": EpcWallDescriptions.timber_frame_no_insulation_assumed, + "System": EpcWallDescriptions.system_no_insulation_assumed, + "Granite": EpcWallDescriptions.granite_whinstone_no_insulation_assumed, + "Sandstone": EpcWallDescriptions.sandstone_limestone_no_insulation_assumed, + "Cob": EpcWallDescriptions.cob_as_built_average, +} + data["landlord_wall_description"] = ( data[["Wall Construction", "Wall Insulation"]] .apply(tuple, axis=1) @@ -212,21 +224,28 @@ data["landlord_wall_description"] = ( def fill_as_built(row): + # Already resolved via direct mapping if row.landlord_wall_description is not None: return row.landlord_wall_description - classifier = AS_BUILT_WALL_CLASSIFIERS.get(row["Wall Construction"]) + wall_type = row["Wall Construction"] + + # Missing construction age → conservative fallback + if pd.isnull(row.construction_age_band): + return UNKNOWN_AGE_FALLBACK.get(wall_type) + + classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type) if classifier is None: return None return classifier(row.construction_age_band) -data["landlord_wall_description"] = data.apply(fill_as_built, axis=1) +data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1) -for _, x in data.iterrows(): - if x["construction_age_band"] == "England and Wales: 2012-2021": - de +assert data["landlord_wall_description"].isnull().sum() == 0, ( + "Some wall descriptions could not be resolved" +) # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', From 002dc3695bc1b5d827099c4e2b60a17255bc4a61 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 29 Jan 2026 19:21:14 +0000 Subject: [PATCH 03/33] handled walls - roof to come next --- backend/onboarders/epc_descriptions.py | 9 +++++++++ backend/onboarders/parity.py | 14 +++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index a674e332..be704308 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -95,6 +95,15 @@ class EpcWallDescriptions(Enum): cob_as_built_average = "Cob, as built" cob_as_built_good = "Cob, as built" + # unknown descriptions which may get mapped later or handled via fallback + cavity_as_built_unknown = "Cavity wall, as built, unknown insulation" + solid_brick_as_built_unknown = "Solid brick, as built, unknown insulation" + system_as_built_unknown = "System built, as built, unknown insulation" + timber_frame_as_built_unknown = "Timber frame, as built, unknown insulation" + granite_as_built_unknown = "Granite or whin, as built, unknown insulation" + sandstone_as_built_unknown = "Sandstone, as built, unknown insulation" + cob_as_built_unknown = "Cob, as built, unknown insulation" + class EpcEfficiency(Enum): VERY_POOR = "Very Poor" diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index d3ee8485..3e17ecce 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -207,13 +207,13 @@ AS_BUILT_WALL_CLASSIFIERS = { } UNKNOWN_AGE_FALLBACK = { - "Cavity": EpcWallDescriptions.cavity_no_insulation_assumed, - "Solid Brick": EpcWallDescriptions.solid_brick_no_insulation_assumed, - "Timber Frame": EpcWallDescriptions.timber_frame_no_insulation_assumed, - "System": EpcWallDescriptions.system_no_insulation_assumed, - "Granite": EpcWallDescriptions.granite_whinstone_no_insulation_assumed, - "Sandstone": EpcWallDescriptions.sandstone_limestone_no_insulation_assumed, - "Cob": EpcWallDescriptions.cob_as_built_average, + "Cavity": EpcWallDescriptions.cavity_as_built_unknown, + "Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown, + "Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown, + "System": EpcWallDescriptions.system_as_built_unknown, + "Granite": EpcWallDescriptions.granite_as_built_unknown, + "Sandstone": EpcWallDescriptions.sandstone_as_built_unknown, + "Cob": EpcWallDescriptions.cob_as_built_unknown, } data["landlord_wall_description"] = ( From 63c6c32e2204b121f190777ebac21ea9cba5e248 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 12:25:54 +0000 Subject: [PATCH 04/33] working on roof efficiency rules --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/app.py | 68 ++++++ asset_list/mappings/built_form.py | 10 + asset_list/mappings/exising_pv.py | 7 +- asset_list/mappings/heating_systems.py | 6 +- asset_list/mappings/property_type.py | 12 +- asset_list/mappings/roof.py | 9 + backend/onboarders/epc_descriptions.py | 141 +++++++++++- .../mappings/as_built_wall_classifiers.py | 204 ----------------- backend/onboarders/parity.py | 213 +++++++++++++++++- .../onboarders/tests/test_roof_remapping.py | 0 .../onboarders/tests/test_wall_remapping.py | 0 13 files changed, 459 insertions(+), 215 deletions(-) create mode 100644 backend/onboarders/tests/test_roof_remapping.py create mode 100644 backend/onboarders/tests/test_wall_remapping.py diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/app.py b/asset_list/app.py index 969b0184..63dc0601 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,74 @@ def app(): Property UPRN """ + # Fairhive + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Fairhive" + data_filename = "Fairhive Asset list.xlsx" + sheet_name = "Sheet1" + postcode_column = 'POSTCODE' + address1_column = "ADDRESS" + address1_method = None + fulladdress_column = 'ADDRESS' + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "PROPERTY TYPE" + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Row ID" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Hyde + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Minor Works" + data_filename = "Hyde Group - Domna Minor Works Programme List.xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = 'Address' + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Age" + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Property Type" + landlord_wall_construction = "Walls" + landlord_roof_construction = "Roofs" + landlord_heating_system = "Heating" + landlord_existing_pv = "Renewables" + landlord_property_id = "Organisation Reference" + landlord_sap = "SAP (10)" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/20260129 SAL" data_filename = "NCHA ASSET LIST 1.xlsx" sheet_name = "NCHA ASSET LIST" diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index a9defdef..d6466539 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -520,4 +520,14 @@ BUILT_FORM_MAPPINGS = { '2.EXT.WALL FLAT': 'mid-terrace', '2 EXT. WALL FLAT': 'mid-terrace', + 'Maisonette: Detached: Ground Floor': 'detached', + 'Maisonette: Enclosed End Terrace: Top Floor': 'enclosed end-terrace', + 'Flat: End Terrace: Basement': 'end-terrace', + 'Flat: Mid Terrace: Basement': 'mid-terrace', + 'Flat: Enclosed Mid Terrace: Basement': 'enclosed mid-terrace', + 'House: Semi Detached: Top Floor': 'semi-detached', + 'House: End Terrace: Ground Floor': 'end-terrace', + 'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace', + 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace' + } diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index e67fafb4..defce35f 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -17,5 +17,10 @@ EXISTING_PV_MAPPINGS = { 'PV: 10% roof area, PV: 2kWp array': 'already has PV', 'PV: 50% roof area': 'already has PV', 'Solar PV': 'already has PV', - 'SOLAR PV': 'already has PV' + 'SOLAR PV': 'already has PV', + + 'PV: 40% roof area, PV: 2kWp array': 'already has PV', + 'PV: 33% roof area, PV: 2kWp array': 'already has PV', + 'PV: 30% roof area': 'already has PV' + } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index ffd1b198..272d6279 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -494,6 +494,10 @@ HEATING_MAPPINGS = { 'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, ' 'and sealed to, fireplace opening': 'room heaters', 'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel', - 'Boiler: G rated Combi': 'gas condensing combi' + 'Boiler: G rated Combi': 'gas condensing combi', + + 'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler', + 'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators', + 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 703cb8ef..6f808c9a 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -429,6 +429,16 @@ PROPERTY_MAPPING = { 'Mid-terrace': 'unknown', 'MID - TERRACE': 'unknown', 'COMOFF': 'unknown', - 'LOTS': 'unknown' + 'LOTS': 'unknown', + + 'Maisonette: Detached: Ground Floor': 'maisonette', + 'Maisonette: Enclosed End Terrace: Top Floor': 'maisonette', + 'Flat: End Terrace: Basement': 'flat', + 'Bungalow: EnclosedEndTerrace': 'bungalow', + 'Flat: Mid Terrace: Basement': 'flat', + 'House: Semi Detached: Top Floor': 'house', + 'House: End Terrace: Ground Floor': 'house', + 'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette', + 'Flat: Enclosed Mid Terrace: Basement': 'flat' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 0857b046..cf829a5f 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -301,4 +301,13 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'PitchedWithSlopingCeiling: As Built': 'pitched insulated', 'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation', + 'Flat: 150mm, Flat: Unknown': 'flat insulated', + 'AnotherDwellingAbove: Unknown, Flat: Unknown': 'another dwelling above', + 'AnotherDwellingAbove, AnotherDwellingAbove: Unknown': 'another dwelling above', + 'PitchedNormalNoLoftAccess: Unknown, PitchedWithSlopingCeiling: As Built': 'pitched unknown access to loft', + 'Flat: No Insulation': 'flat uninsulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 250mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 175mm': 'pitched insulated', + 'AnotherDwellingAbove: 300mm': 'another dwelling above' + } diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index be704308..37f70df8 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -1,4 +1,5 @@ import re +from collections.abc import Mapping from enum import Enum from typing import Callable, Union, List @@ -105,6 +106,71 @@ class EpcWallDescriptions(Enum): cob_as_built_unknown = "Cob, as built, unknown insulation" +class EpcRoofDescriptions(Enum): + # Loft + # Known insulation at joists - we have 12, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 270, 300, 350, + # 400+ as options + loft_12mm_insulation: str = "Pitched, 12 mm loft insulation" + loft_25mm_insulation: str = "Pitched, 25 mm loft insulation" + loft_50mm_insulation: str = "Pitched, 50 mm loft insulation" + loft_75mm_insulation: str = "Pitched, 75 mm loft insulation" + loft_100mm_insulation: str = "Pitched, 100 mm loft insulation" + loft_125mm_insulation: str = "Pitched, 125 mm loft insulation" + loft_150mm_insulation: str = "Pitched, 150 mm loft insulation" + loft_175mm_insulation: str = "Pitched, 175 mm loft insulation" + loft_200mm_insulation: str = "Pitched, 200 mm loft insulation" + loft_250mm_insulation: str = "Pitched, 250 mm loft insulation" + loft_270mm_insulation: str = "Pitched, 270 mm loft insulation" + loft_300mm_insulation: str = "Pitched, 300 mm loft insulation" + loft_350mm_insulation: str = "Pitched, 350 mm loft insulation" + loft_400mm_plus_insulation: str = "Pitched, 400+ mm loft insulation" + # Insulated at rafters "Pitched, insulated at rafters" + # Rafters + # 400mm, 350mm = very good + # 200-300mm = good + # 125-175 = average + # 50-100 = poor + # 25 and below= very poor + loft_insulated_at_rafters: str = "Pitched, insulated at rafters" + # another dwelling above + another_dwelling_above: str = "(another dwelling above)" + # flat roof, which if there is observed insulation is just "flat, insulated", however there is a + # different efficiency rating depending on insulation thickness + # categories: + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + # As built 2023 = Flat, insulated, Very good + # 2003 - 2006, up to 2012-2022 = Flat insulated, Good + # 1983-1990, 1996-2002 = Flat, insulated, Average + # 1976-1982 = Flat, limited insulation, poor + # 1967 - 1975 = Flat, limited insulation, Very Poor + # 1950-1966 and earlier bands = flat, no insulation, very poor + + flat_insulated = "Flat, insulated" + flat_limited_insulation = "Flat, limited insulation" + flat_no_insulation = "Flat, no insulation" + + # Thatched roof descriptions + # With Loft insulation at joists + # Thatched + 12mm = thatched, with additional insulation, average + # Thatched + 25, 50, 100, 150mm = thatched, with additional insulation, good + # Thatched + 175mm+ = thatched, with additional insulation, very good + # With loft insulation at rafters [out of scope atm] + # Unknown insulation + # Pre 1900, 1930-1949, 1967-1975, 1983-1990, 1996-2002 = "Thatched", Average + # 2003-2006, 2012-2022 = "Thatched", Good + # 2023 onwards = "Thatched", Very Good + thatched = "Thatched" # We see this for no insulation, has average performance + thatched_with_additional_insulation: str = "Thatched, with additional insulation" + + # TODO: + # Sloping ceiling + # Pitched, as built + + class EpcEfficiency(Enum): VERY_POOR = "Very Poor" POOR = "Poor" @@ -181,7 +247,12 @@ def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstruct return EpcEfficiency.GOOD -WALL_DESCRIPTION_METADATA = { +WallEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + +WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = { # Note: all function mappings have been defined based on Elmhurst # Cavity # value mappings @@ -248,9 +319,75 @@ def resolve_wall_efficiency( description: EpcWallDescriptions, age_band: EpcConstructionAgeBand, ) -> EpcEfficiency: - rule = WALL_DESCRIPTION_METADATA[description] + rule = WALL_DESCRIPTION_EFFICIENCIES[description] if isinstance(rule, EpcEfficiency): return rule return rule(age_band) + + +RoofEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + + +def flat_limited_insulation_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + pass + + +def flat_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + pass + + +def flat_limited_efficiency( + age_band: EpcConstructionAgeBand, + insulation_thickness: int | None, +) -> EpcEfficiency: + """ + If we have an insulation thickness, 12mm results in a very poor rating. 25mm or above results in a poor rating. + If we don't have an insulation thickness, we fall back to age band, where + - 1976 - 1982 = Flat, limited insulation, poor efficiency + - 1967 - 1975 = Flat, limited insulation, Very Poor + :param age_band: + :param insulation_thickness: + :return: + """ + + if insulation_thickness is not None: + if insulation_thickness >= 25: + return EpcEfficiency.POOR + return EpcEfficiency.VERY_POOR + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcEfficiency.POOR + + if age_band == EpcConstructionAgeBand.from_1967_to_1975: + return EpcEfficiency.VERY_POOR + + raise ValueError("Cannot determine flat limited insulation efficiency without insulation thickness or age band") + + +ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = { + # Flat roof + EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.flat_limited_insulation: flat_limited_insulation_efficiency, + EpcRoofDescriptions.flat_insulated: flat_insulated_efficiency, + + # Loft: + EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD, +} diff --git a/backend/onboarders/mappings/as_built_wall_classifiers.py b/backend/onboarders/mappings/as_built_wall_classifiers.py index e0ef193f..e69de29b 100644 --- a/backend/onboarders/mappings/as_built_wall_classifiers.py +++ b/backend/onboarders/mappings/as_built_wall_classifiers.py @@ -1,204 +0,0 @@ -def map_cavity_wall_insulation(age_band): - if age_band in [ - 'England and Wales: before 1900', - 'England and Wales: 1900-1929', - 'England and Wales: 1930-1949', - 'England and Wales: 1950-1966', - 'England and Wales: 1967-1975' - ]: - return EpcWallDescriptions.cavity_no_insulation_assumed - - if age_band in [ - 'England and Wales: 1976-1982' - ]: - return EpcWallDescriptions.cavity_partial_insulated_assumed - - if age_band in [ - 'England and Wales: 1983-1990', - 'England and Wales: 1991-1995', - 'England and Wales: 1996-2002', - 'England and Wales: 2003-2006', - 'England and Wales: 2007-2011', - 'England and Wales: 2012-2022', - 'England and Wales: 2023 onwards', - ]: - return EpcWallDescriptions.cavity_insulated_assumed - - raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping") - - -def map_solid_wall_insulation(age_band): - if age_band in [ - 'England and Wales: before 1900', 'England and Wales: 1900-1929', 'England and Wales: 1930-1949', - 'England and Wales: 1967-1975' - ]: - return EpcWallDescriptions.solid_brick_no_insulation_assumed - - if age_band in [ - 'England and Wales: 1976-1982' - ]: - return EpcWallDescriptions.solid_brick_partial_insulated_assumed - - if age_band in [ - 'England and Wales: 1983-1990', 'England and Wales: 1991-1995', 'England and Wales: 1996-2002', - 'England and Wales: 2003-2006', 'England and Wales: 2007-2011', 'England and Wales: 2012-2022', - 'England and Wales: 2023 onwards', - ]: - return EpcWallDescriptions.solid_brick_insulated_assumed - - -def map_timber_frame_wall_insulation(age_band): - # No insulation (Poor) - if age_band in [ - 'England and Wales: before 1900', - 'England and Wales: 1900-1929', - 'England and Wales: 1930-1949', - ]: - return EpcWallDescriptions.timber_frame_no_insulation_assumed - - # Partial insulation (Average) - if age_band in [ - 'England and Wales: 1950-1966', - 'England and Wales: 1967-1975', - ]: - return EpcWallDescriptions.timber_frame_partial_insulated_assumed - - # Insulated (Good) - if age_band in [ - 'England and Wales: 1976-1982', - 'England and Wales: 1983-1990', - 'England and Wales: 1991-1995', - 'England and Wales: 1996-2002', - 'England and Wales: 2003-2006', - 'England and Wales: 2007-2011', - 'England and Wales: 2012-2022', - 'England and Wales: 2023 onwards', - ]: - return EpcWallDescriptions.timber_frame_insulated_assumed - - # TODO: Unknown / pre-1930 handling - raise NotImplementedError(f"Age band {age_band} not handled for timber frame wall insulation mapping") - - -def map_system_build_wall_insulation(age_band): - # No insulation (Poor) - if age_band in [ - 'England and Wales: before 1900', - 'England and Wales: 1900-1929', - 'England and Wales: 1930-1949', - 'England and Wales: 1950-1966', - 'England and Wales: 1967-1975', - ]: - return EpcWallDescriptions.system_no_insulation_assumed - - # Partial insulation (Average) - if age_band in [ - 'England and Wales: 1976-1982', - ]: - return EpcWallDescriptions.system_partial_insulated_assumed - - # Insulated (Good) - if age_band in [ - 'England and Wales: 1983-1990', - 'England and Wales: 1991-1995', - 'England and Wales: 1996-2002', - 'England and Wales: 2003-2006', - 'England and Wales: 2007-2011', - 'England and Wales: 2012-2022', - 'England and Wales: 2023 onwards', - ]: - return EpcWallDescriptions.system_insulated_assumed - - # TODO: Unknown / early system build handling - raise NotImplementedError(f"Age band {age_band} not handled for system build wall insulation mapping") - - -def map_granite_wall_insulation(age_band): - # No insulation (Very Poor) - if age_band in [ - 'England and Wales: before 1900', - 'England and Wales: 1900-1929', - 'England and Wales: 1930-1949', - 'England and Wales: 1950-1966', - 'England and Wales: 1967-1975', - ]: - return EpcWallDescriptions.granite_whinstone_no_insulation_assumed - - # Partial insulation (Average) - if age_band in [ - 'England and Wales: 1976-1982', - ]: - return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed - - # Insulated (Good) - if age_band in [ - 'England and Wales: 1983-1990', - 'England and Wales: 1991-1995', - 'England and Wales: 1996-2002', - 'England and Wales: 2003-2006', - 'England and Wales: 2007-2011', - 'England and Wales: 2012-2022', - 'England and Wales: 2023 onwards', - ]: - return EpcWallDescriptions.granite_whinestone_insulated_assumed - - raise NotImplementedError(f"Age band {age_band} not handled for granite wall insulation mapping") - - -def map_sandstone_wall_insulation(age_band): - # No insulation (Very Poor) - if age_band in [ - 'England and Wales: before 1900', - 'England and Wales: 1900-1929', - 'England and Wales: 1930-1949', - 'England and Wales: 1950-1966', - 'England and Wales: 1967-1975', - ]: - return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed - - # Partial insulation (Average) - if age_band in [ - 'England and Wales: 1976-1982', - ]: - return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed - - # Insulated (Good) - if age_band in [ - 'England and Wales: 1983-1990', - 'England and Wales: 1991-1995', - 'England and Wales: 1996-2002', - 'England and Wales: 2003-2006', - 'England and Wales: 2007-2011', - 'England and Wales: 2012-2022', - 'England and Wales: 2023 onwards', - ]: - return EpcWallDescriptions.sandstone_limestone_insulated_assumed - - raise NotImplementedError(f"Age band {age_band} not handled for sandstone wall insulation mapping") - - -def map_cob_wall_insulation(age_band): - # Cob, as built (Average) - if age_band in [ - 'England and Wales: before 1900', - 'England and Wales: 1900-1929', - 'England and Wales: 1930-1949', - 'England and Wales: 1950-1966', - 'England and Wales: 1967-1975', - 'England and Wales: 1976-1982', - ]: - return EpcWallDescriptions.cob_as_built_average - - # Cob, as built (Good) - if age_band in [ - 'England and Wales: 1983-1990', - 'England and Wales: 1991-1995', - 'England and Wales: 1996-2002', - 'England and Wales: 2003-2006', - 'England and Wales: 2007-2011', - 'England and Wales: 2012-2022', - 'England and Wales: 2023 onwards', - ]: - return EpcWallDescriptions.cob_as_built_good - - raise NotImplementedError(f"Age band {age_band} not handled for cob wall insulation mapping") diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 3e17ecce..a77e76a8 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -3,7 +3,9 @@ import pandas as pd from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map -from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand +from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ + WALL_DESCRIPTION_EFFICIENCIES +from onboarders.epc_descriptions import EpcRoofDescriptions tqdm.pandas() @@ -49,7 +51,6 @@ assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapp # ------------ Wall Construction ------------ - # Unique combindations wall_mapping = { # Cavity walls @@ -241,16 +242,220 @@ def fill_as_built(row): return classifier(row.construction_age_band) +def resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand | None, +) -> EpcEfficiency: + # Unknown / holding descriptions → efficiency unknown + if "unknown insulation" in description.value.lower(): + return EpcEfficiency.NA + + rule = WALL_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + if isinstance(rule, EpcEfficiency): + return rule + + # Rule needs age band but we don't have one + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + return rule(age_band) + + data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1) assert data["landlord_wall_description"].isnull().sum() == 0, ( "Some wall descriptions could not be resolved" ) +data["landlord_wall_efficiency"] = data.progress_apply( + lambda row: resolve_wall_efficiency( + row.landlord_wall_description, + row.construction_age_band, + ), + axis=1, +) +# Sanity check +assert data["landlord_wall_efficiency"].isnull().sum() == 0 + +# ------------ Roof Construction ------------ + +roof_aggs = data[["Roof Construction", "Roof Insulation"]].drop_duplicates().to_dict("records") + +[ + # Dwelling above + + # Pitched, loft + + {'Roof Construction': 'PitchedNormalLoftAccess', 'Roof Insulation': nan}, + {'Roof Construction': 'PitchedNormalLoftAccess', 'Roof Insulation': 'AsBuilt'}, + {'Roof Construction': 'PitchedNormalLoftAccess', 'Roof Insulation': 'Unknown'}, + + # Flat + {'Roof Construction': 'Flat', 'Roof Insulation': 'AsBuilt'}, + {'Roof Construction': 'Flat', 'Roof Insulation': 'mm100'}, + {'Roof Construction': 'Flat', 'Roof Insulation': 'mm150'}, + {'Roof Construction': 'Flat', 'Roof Insulation': nan}, + + {'Roof Construction': 'Flat', 'Roof Insulation': 'Unknown'}, + + # Thatched + {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'mm150'}, + {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'Unknown'}, + {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'mm50'}, + {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'mm300'}, + + # Sloping + {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'AsBuilt'}, + {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'mm150'}, + {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'mm100'}, + {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': nan}, + {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'mm50'}, + {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'NoInsulation'}, + {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'Unknown'}, + + # Pitched no loft access + {'Roof Construction': 'PitchedNormalNoLoftAccess', 'Roof Insulation': nan}, + {'Roof Construction': 'PitchedNormalNoLoftAccess', 'Roof Insulation': 'Unknown'}, + {'Roof Construction': 'PitchedNormalNoLoftAccess', 'Roof Insulation': 'AsBuilt'} +] + +roof_mapping = { + # Dwelling above + ('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + ('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + # Pitched, normal loft access, with a loft thickness + ('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # Pitched, no loft access, with a loft thickness + ('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # Flat + ('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation, + # Flat - limited insulation + ('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation, + # Flat insulated + ('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated, + + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + + # {'Roof Construction': 'Flat', 'Roof Insulation': 'mm50'}, + +} + + +def classify_flat_roof(age_band: EpcConstructionAgeBand): + # # flat roof, which if there is observed insulation is just "flat, insulated", however there is a + # # different efficiency rating depending on insulation thickness + # # categories: + # # 12mm = very poor & has limited insulation description + # # 25, 50 = poor & has limited insulation description + # # 75, 100, 125mm = average (Flat, insulated) + # # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # # 270mm+ = very good (Flat, insulated) + # # As built 2023 = Flat, insulated, Very good + # # 2003 - 2006, up to 2012-2022 = Flat insulated, Good + # # 1983-1990, 1996-2002 = Flat, insulated, Average + # # 1976-1982 = Flat, limited insulation, poor + # # 1967 - 1975 = Flat, limited insulation, Very Poor + # # 1950-1966 and earlier bands = flat, no insulation, very poor + raise NotImplementedError("Flat roof classification not implemented yet") + + +def classify_pitched_loft_unknown(age_band: EpcConstructionAgeBand): + raise NotImplementedError("Pitched loft (unknown insulation) not implemented yet") + + +def classify_thatched_roof(age_band: EpcConstructionAgeBand): + raise NotImplementedError("Thatched roof classification not implemented yet") + + +def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand): + raise NotImplementedError("Sloping ceiling roof classification not implemented yet") + + +AS_BUILT_ROOF_CLASSIFIERS = { + "Flat": classify_flat_roof, + "PitchedNormalLoftAccess": classify_pitched_loft_unknown, + "PitchedNormalNoLoftAccess": classify_pitched_loft_unknown, + "PitchedThatched": classify_thatched_roof, + "PitchedWithSlopingCeiling": classify_sloping_ceiling_roof, +} + + +def fill_roof_as_built(row): + # Already resolved + if row.landlord_roof_description is not None: + return row.landlord_roof_description + + roof_type = row["Roof Construction"] + + classifier = AS_BUILT_ROOF_CLASSIFIERS.get(roof_type) + if classifier is None: + raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") + + if pd.isnull(row.construction_age_band): + raise NotImplementedError( + f"Missing age band for roof classification ({roof_type})" + ) + + return classifier(row.construction_age_band) + + +data["landlord_roof_description"] = ( + data[["Roof Construction", "Roof Insulation"]] + .progress_apply(tuple, axis=1) + .map(roof_mapping) +) + +data["landlord_roof_description"] = data.progress_apply( + fill_roof_as_built, + axis=1, +) + +for _, row in data.iterrows(): + fill_roof_as_built(row) + # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', -# 'Attachment', 'Construction Years', 'Wall Construction', -# 'Wall Insulation', 'Roof Construction', 'Roof Insulation', +# 'Attachment', 'Construction Years', +# 'Roof Construction', 'Roof Insulation', # 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', # 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', # 'Total Floor Area (m2)' diff --git a/backend/onboarders/tests/test_roof_remapping.py b/backend/onboarders/tests/test_roof_remapping.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/onboarders/tests/test_wall_remapping.py b/backend/onboarders/tests/test_wall_remapping.py new file mode 100644 index 00000000..e69de29b From eee99d135891b83774d4d3a111b86d5ed3ea37a7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 12:40:18 +0000 Subject: [PATCH 05/33] added classify_flat_roof --- backend/onboarders/base.py | 7 ++++++ backend/onboarders/parity.py | 47 ++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index e69de29b..12ef9c94 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -0,0 +1,7 @@ +class OnboarderBase: + + def read(self): + pass + + def write(self): + pass diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index a77e76a8..a215d444 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -380,22 +380,27 @@ roof_mapping = { } -def classify_flat_roof(age_band: EpcConstructionAgeBand): - # # flat roof, which if there is observed insulation is just "flat, insulated", however there is a - # # different efficiency rating depending on insulation thickness - # # categories: - # # 12mm = very poor & has limited insulation description - # # 25, 50 = poor & has limited insulation description - # # 75, 100, 125mm = average (Flat, insulated) - # # 150, 175, 200, 225, 250mm = good (Flat, insulated) - # # 270mm+ = very good (Flat, insulated) - # # As built 2023 = Flat, insulated, Very good - # # 2003 - 2006, up to 2012-2022 = Flat insulated, Good - # # 1983-1990, 1996-2002 = Flat, insulated, Average - # # 1976-1982 = Flat, limited insulation, poor - # # 1967 - 1975 = Flat, limited insulation, Very Poor - # # 1950-1966 and earlier bands = flat, no insulation, very poor - raise NotImplementedError("Flat roof classification not implemented yet") +def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a flat, as built roof, these are the breakdowns: + + 2023 onwards → Flat, insulated + 2003–2022 → Flat, insulated + 1983–2002 → Flat, insulated + 1976–1982 → Flat, limited insulation + 1967–1975 → Flat, limited insulation + 1950–1966 and earlier → Flat, no insulation + """ + + year = age_band.start_year() + + if year >= 1983: + return EpcRoofDescriptions.flat_insulated + + if year >= 1967: + return EpcRoofDescriptions.flat_limited_insulation + + return EpcRoofDescriptions.flat_no_insulation def classify_pitched_loft_unknown(age_band: EpcConstructionAgeBand): @@ -421,7 +426,7 @@ AS_BUILT_ROOF_CLASSIFIERS = { def fill_roof_as_built(row): # Already resolved - if row.landlord_roof_description is not None: + if not pd.isnull(row.landlord_roof_description): return row.landlord_roof_description roof_type = row["Roof Construction"] @@ -435,7 +440,13 @@ def fill_roof_as_built(row): f"Missing age band for roof classification ({roof_type})" ) - return classifier(row.construction_age_band) + output = classifier(row.construction_age_band) + if output is None: + raise NotImplementedError( + f"Roof classification returned None for roof type '{roof_type}'" + ) + + return output data["landlord_roof_description"] = ( From 053142f325076f20fb19574269398cbb5f38f0f5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 13:51:41 +0000 Subject: [PATCH 06/33] mappings for unkown loft and for thatched --- backend/onboarders/epc_descriptions.py | 2 ++ backend/onboarders/parity.py | 45 ++++++++++++-------------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index 37f70df8..a4f0be13 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -108,6 +108,8 @@ class EpcWallDescriptions(Enum): class EpcRoofDescriptions(Enum): # Loft + # Assumed options + pitched_insulated_assumed = "Pitched, insulated (assumed)" # Known insulation at joists - we have 12, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 270, 300, 350, # 400+ as options loft_12mm_insulation: str = "Pitched, 12 mm loft insulation" diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index a215d444..988ee01e 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,3 +1,4 @@ +from numpy import nan from tqdm import tqdm import pandas as pd from backend.onboarders.mappings.property_type import parity_map as property_map @@ -290,23 +291,9 @@ roof_aggs = data[["Roof Construction", "Roof Insulation"]].drop_duplicates().to_ # Pitched, loft - {'Roof Construction': 'PitchedNormalLoftAccess', 'Roof Insulation': nan}, - {'Roof Construction': 'PitchedNormalLoftAccess', 'Roof Insulation': 'AsBuilt'}, - {'Roof Construction': 'PitchedNormalLoftAccess', 'Roof Insulation': 'Unknown'}, - # Flat - {'Roof Construction': 'Flat', 'Roof Insulation': 'AsBuilt'}, - {'Roof Construction': 'Flat', 'Roof Insulation': 'mm100'}, - {'Roof Construction': 'Flat', 'Roof Insulation': 'mm150'}, - {'Roof Construction': 'Flat', 'Roof Insulation': nan}, - - {'Roof Construction': 'Flat', 'Roof Insulation': 'Unknown'}, # Thatched - {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'mm150'}, - {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'Unknown'}, - {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'mm50'}, - {'Roof Construction': 'PitchedThatched', 'Roof Insulation': 'mm300'}, # Sloping {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'AsBuilt'}, @@ -317,10 +304,6 @@ roof_aggs = data[["Roof Construction", "Roof Insulation"]].drop_duplicates().to_ {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'NoInsulation'}, {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'Unknown'}, - # Pitched no loft access - {'Roof Construction': 'PitchedNormalNoLoftAccess', 'Roof Insulation': nan}, - {'Roof Construction': 'PitchedNormalNoLoftAccess', 'Roof Insulation': 'Unknown'}, - {'Roof Construction': 'PitchedNormalNoLoftAccess', 'Roof Insulation': 'AsBuilt'} ] roof_mapping = { @@ -353,6 +336,16 @@ roof_mapping = { ('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, ('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + # All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed + # With access + ('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + # No access + ('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + # Flat ('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation, # Flat - limited insulation @@ -368,6 +361,10 @@ roof_mapping = { ('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated, ('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated, ('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated, + # Flat - as built or unknown + ('Flat', 'AsBuilt'): None, # To be classified + ('Flat', nan): None, # To be classified + ('Flat', 'Unknown'): None, # To be classified # 12mm = very poor & has limited insulation description # 25, 50 = poor & has limited insulation description @@ -375,7 +372,11 @@ roof_mapping = { # 150, 175, 200, 225, 250mm = good (Flat, insulated) # 270mm+ = very good (Flat, insulated) - # {'Roof Construction': 'Flat', 'Roof Insulation': 'mm50'}, + # Thatched + ('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age } @@ -403,10 +404,6 @@ def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: return EpcRoofDescriptions.flat_no_insulation -def classify_pitched_loft_unknown(age_band: EpcConstructionAgeBand): - raise NotImplementedError("Pitched loft (unknown insulation) not implemented yet") - - def classify_thatched_roof(age_band: EpcConstructionAgeBand): raise NotImplementedError("Thatched roof classification not implemented yet") @@ -417,8 +414,6 @@ def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand): AS_BUILT_ROOF_CLASSIFIERS = { "Flat": classify_flat_roof, - "PitchedNormalLoftAccess": classify_pitched_loft_unknown, - "PitchedNormalNoLoftAccess": classify_pitched_loft_unknown, "PitchedThatched": classify_thatched_roof, "PitchedWithSlopingCeiling": classify_sloping_ceiling_roof, } From 73692e96ee68738a8d7daaf4b637c8176a9daf08 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 14:17:18 +0000 Subject: [PATCH 07/33] added sloping ceiling description and notes of efficiency categorisations --- backend/onboarders/epc_descriptions.py | 6 ++++-- backend/onboarders/parity.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index a4f0be13..280b180f 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -168,9 +168,11 @@ class EpcRoofDescriptions(Enum): thatched = "Thatched" # We see this for no insulation, has average performance thatched_with_additional_insulation: str = "Thatched, with additional insulation" - # TODO: # Sloping ceiling - # Pitched, as built + # For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped + sloping_pitched_no_insulation: str = "Pithced, no insulation" + sloping_pitched_limited_insulation: str = "Pitched, limited insulation" + sloping_pitched_insulated: str = "Pitched, insulated" class EpcEfficiency(Enum): diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 988ee01e..cb9b4f7f 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -304,6 +304,19 @@ roof_aggs = data[["Roof Construction", "Roof Insulation"]].drop_duplicates().to_ {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'NoInsulation'}, {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'Unknown'}, + # Sloping ceiling: + # With measured insulation + # 12mm insulation: Pitched, limited insulation, Very Poor + # 25mm, 50mm insulation: Pitched, limited insulation, Poor + # 75mm - 125mm, Pitched, insulated, average + # 150mm - 250mm, Pitched, insulated, good + # 270mm+, Pitched, insulated, very good + # As built / unknown: + # before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975, -> Pitched, no insulation, Very Poor + # 1976-1982 -> Pitched, limited insulation, Poor + # 1983-1990, to 1996-2002 Pitched, insulated, Average + # 2003 - 2006, 2012-2022 -> Pitched, insulated, Good + # 2023 onwards -> Pitched, insulated, Very Good ] roof_mapping = { From b2d4037b213f6d0843f4822c909a0fd572a1743d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 14:22:56 +0000 Subject: [PATCH 08/33] classify_sloping_ceiling_roof added --- backend/onboarders/parity.py | 52 +++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index cb9b4f7f..f2f940ba 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -296,13 +296,6 @@ roof_aggs = data[["Roof Construction", "Roof Insulation"]].drop_duplicates().to_ # Thatched # Sloping - {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'AsBuilt'}, - {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'mm150'}, - {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'mm100'}, - {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': nan}, - {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'mm50'}, - {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'NoInsulation'}, - {'Roof Construction': 'PitchedWithSlopingCeiling', 'Roof Insulation': 'Unknown'}, # Sloping ceiling: # With measured insulation @@ -391,6 +384,25 @@ roof_mapping = { ('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation, ('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age + # Sloping: + # Limited (12 very poor, 25-50 poor) + ('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + # Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good) + ('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated, + # As built/unknown + ('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified + ('PitchedWithSlopingCeiling', nan): None, # To be classified + ('PitchedWithSlopingCeiling', 'Unknown'): None, # } @@ -404,6 +416,8 @@ def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: 1976–1982 → Flat, limited insulation 1967–1975 → Flat, limited insulation 1950–1966 and earlier → Flat, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions """ year = age_band.start_year() @@ -417,17 +431,31 @@ def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: return EpcRoofDescriptions.flat_no_insulation -def classify_thatched_roof(age_band: EpcConstructionAgeBand): - raise NotImplementedError("Thatched roof classification not implemented yet") +def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a sloping ceiling, as built roof, these are the breakdowns: + 2023 onwards → Sloping pitched, insulated + 2003–2022 → Sloping pitched, insulated + 1983–2002 → Sloping pitched, insulated + 1976–1982 → Sloping pitched, limited insulation + 1967–1975 and earlier → Sloping pitched, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions + """ + year = age_band.start_year() + if year >= 1983: + return EpcRoofDescriptions.sloping_pitched_insulated -def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand): - raise NotImplementedError("Sloping ceiling roof classification not implemented yet") + if year >= 1976: + return EpcRoofDescriptions.sloping_pitched_limited_insulation + + return EpcRoofDescriptions.sloping_pitched_no_insulation AS_BUILT_ROOF_CLASSIFIERS = { + # Only need to apply this to flat and sloping ceiling roofs "Flat": classify_flat_roof, - "PitchedThatched": classify_thatched_roof, "PitchedWithSlopingCeiling": classify_sloping_ceiling_roof, } From 9c5befbff6f6993e8781bed04854b2b4fedd80d1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 14:29:52 +0000 Subject: [PATCH 09/33] added unknown descriptions properties --- backend/onboarders/epc_descriptions.py | 27 +++++++++++++++++ backend/onboarders/parity.py | 40 +++++++------------------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index 280b180f..7f1d04e9 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -105,6 +105,18 @@ class EpcWallDescriptions(Enum): sandstone_as_built_unknown = "Sandstone, as built, unknown insulation" cob_as_built_unknown = "Cob, as built, unknown insulation" + @property + def unknown_descriptions(self) -> List["EpcWallDescriptions"]: + return [ + EpcWallDescriptions.cavity_as_built_unknown, + EpcWallDescriptions.solid_brick_as_built_unknown, + EpcWallDescriptions.system_as_built_unknown, + EpcWallDescriptions.timber_frame_as_built_unknown, + EpcWallDescriptions.granite_as_built_unknown, + EpcWallDescriptions.sandstone_as_built_unknown, + EpcWallDescriptions.cob_as_built_unknown, + ] + class EpcRoofDescriptions(Enum): # Loft @@ -174,6 +186,21 @@ class EpcRoofDescriptions(Enum): sloping_pitched_limited_insulation: str = "Pitched, limited insulation" sloping_pitched_insulated: str = "Pitched, insulated" + # Unknown descriptions which may get mapped later or handled via fallback + flat_as_built_unknown: str = "Flat, as built, unknown insulation" + loft_as_built_unknown: str = "Loft, as built, unknown insulation" + thatched_as_built_unknown: str = "Thatched, as built, unknown insulation" + sloping_pitched_as_built_unknown: str = "Pitched, as built, unknown insulation" + + @property + def unknown_descriptions(self) -> List["EpcRoofDescriptions"]: + return [ + EpcRoofDescriptions.flat_as_built_unknown, + EpcRoofDescriptions.loft_as_built_unknown, + EpcRoofDescriptions.thatched_as_built_unknown, + EpcRoofDescriptions.sloping_pitched_as_built_unknown, + ] + class EpcEfficiency(Enum): VERY_POOR = "Very Poor" diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index f2f940ba..f7553617 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -208,7 +208,7 @@ AS_BUILT_WALL_CLASSIFIERS = { "Cob": map_cob_wall_insulation, } -UNKNOWN_AGE_FALLBACK = { +WALL_UNKNOWN_AGE_FALLBACK = { "Cavity": EpcWallDescriptions.cavity_as_built_unknown, "Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown, "Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown, @@ -234,7 +234,7 @@ def fill_as_built(row): # Missing construction age → conservative fallback if pd.isnull(row.construction_age_band): - return UNKNOWN_AGE_FALLBACK.get(wall_type) + return WALL_UNKNOWN_AGE_FALLBACK.get(wall_type) classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type) if classifier is None: @@ -284,33 +284,6 @@ assert data["landlord_wall_efficiency"].isnull().sum() == 0 # ------------ Roof Construction ------------ -roof_aggs = data[["Roof Construction", "Roof Insulation"]].drop_duplicates().to_dict("records") - -[ - # Dwelling above - - # Pitched, loft - - # Flat - - # Thatched - - # Sloping - - # Sloping ceiling: - # With measured insulation - # 12mm insulation: Pitched, limited insulation, Very Poor - # 25mm, 50mm insulation: Pitched, limited insulation, Poor - # 75mm - 125mm, Pitched, insulated, average - # 150mm - 250mm, Pitched, insulated, good - # 270mm+, Pitched, insulated, very good - # As built / unknown: - # before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975, -> Pitched, no insulation, Very Poor - # 1976-1982 -> Pitched, limited insulation, Poor - # 1983-1990, to 1996-2002 Pitched, insulated, Average - # 2003 - 2006, 2012-2022 -> Pitched, insulated, Good - # 2023 onwards -> Pitched, insulated, Very Good -] roof_mapping = { # Dwelling above @@ -459,6 +432,15 @@ AS_BUILT_ROOF_CLASSIFIERS = { "PitchedWithSlopingCeiling": classify_sloping_ceiling_roof, } +ROOF_UNKNOWN_AGE_FALLBACK = { + "Flat": EpcRoofDescriptions.flat_as_built_unknown, + "PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown, + "PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown, + "PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, + "PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, + +} + def fill_roof_as_built(row): # Already resolved From c8c283cd908d7ba42301dd15bb7d8edcc251fa0c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 14:53:25 +0000 Subject: [PATCH 10/33] added loft_insulated_efficiency --- backend/onboarders/epc_descriptions.py | 38 ++++++++++++++++++++++++-- backend/onboarders/parity.py | 14 +++++----- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index 7f1d04e9..36e9a4f8 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -122,8 +122,8 @@ class EpcRoofDescriptions(Enum): # Loft # Assumed options pitched_insulated_assumed = "Pitched, insulated (assumed)" - # Known insulation at joists - we have 12, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 270, 300, 350, - # 400+ as options + pitched_no_insulation = "Pitched, no insulation" + # Insulation thickness options loft_12mm_insulation: str = "Pitched, 12 mm loft insulation" loft_25mm_insulation: str = "Pitched, 25 mm loft insulation" loft_50mm_insulation: str = "Pitched, 50 mm loft insulation" @@ -182,7 +182,7 @@ class EpcRoofDescriptions(Enum): # Sloping ceiling # For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped - sloping_pitched_no_insulation: str = "Pithced, no insulation" + sloping_pitched_no_insulation: str = "Pitched, no insulation" sloping_pitched_limited_insulation: str = "Pitched, limited insulation" sloping_pitched_insulated: str = "Pitched, insulated" @@ -372,6 +372,33 @@ def flat_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency pass +def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + 2023 onwards -> Very Good + 2012-2022 -> Very Good + 2007-2011 -> Very Good + 2003-2006 -> Very Good + 1996-2002 -> Good + 1991-1995 -> Good + 1983-1990 -> Average + 1976-1982 -> Average + 1967-1975 -> Average + 1950-1966 -> Average + 1930-1949 -> Average + 1900-1929 -> Average + before 1900 -> Average + :param age_band: Input age band, EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2003: + return EpcEfficiency.VERY_GOOD + if year >= 1991: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + def flat_limited_efficiency( age_band: EpcConstructionAgeBand, insulation_thickness: int | None, @@ -407,6 +434,7 @@ ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] EpcRoofDescriptions.flat_insulated: flat_insulated_efficiency, # Loft: + # value mappings EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR, EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR, EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR, @@ -421,4 +449,8 @@ ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD, EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD, EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR, + # function mappings + EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency, + } diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index f7553617..c3b4184d 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -438,7 +438,6 @@ ROOF_UNKNOWN_AGE_FALLBACK = { "PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown, "PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, "PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, - } @@ -454,9 +453,7 @@ def fill_roof_as_built(row): raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") if pd.isnull(row.construction_age_band): - raise NotImplementedError( - f"Missing age band for roof classification ({roof_type})" - ) + return ROOF_UNKNOWN_AGE_FALLBACK.get(roof_type) output = classifier(row.construction_age_band) if output is None: @@ -477,9 +474,12 @@ data["landlord_roof_description"] = data.progress_apply( fill_roof_as_built, axis=1, ) - -for _, row in data.iterrows(): - fill_roof_as_built(row) +# Sanity check +assert data["landlord_roof_description"].isnull().sum() == 0, ( + "Some roof descriptions could not be resolved" +) +# TODO: 1) Map energy efficiency +# TODO: 2) Flag sloped ceilings # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', From f7d614d6ec3770e3f1aaa3a3cfd791ddbacd1464 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 14:57:31 +0000 Subject: [PATCH 11/33] added flat_insulated_efficiency_age_band --- backend/onboarders/epc_descriptions.py | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index 36e9a4f8..e9b770fd 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -368,7 +368,34 @@ def flat_limited_insulation_efficiency(age_band: EpcConstructionAgeBand) -> EpcE pass -def flat_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: +def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor + 1976-1982 -> Pitched, limited insulation, Poor + 1983-1990, to 1996-2002 Pitched, insulated, Average + 2003 - 2006, 2012-2022 -> Pitched, insulated, Good + 2023 onwards -> Pitched, insulated, Very Good + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + + start_year = age_band.start_year() + if start_year >= 2023: + return EpcEfficiency.VERY_GOOD + + if start_year >= 2003: + return EpcEfficiency.GOOD + + if start_year >= 1983: + return EpcEfficiency.AVERAGE + + if start_year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: pass @@ -453,4 +480,10 @@ ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] # function mappings EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency, + # Loft af rafters + EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency, + + # Another dwelling above + EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA, + } From d38ec808bfa72e42514a11249228d4499a523bd2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 14:58:54 +0000 Subject: [PATCH 12/33] added flat_insulated_efficiency_thickness --- backend/onboarders/epc_descriptions.py | 27 +++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index e9b770fd..a92565d1 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -396,7 +396,32 @@ def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcE def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: - pass + """ + 12mm -> Very Poor + 25mm - 50mm -> Poor + 75mm - 125mm -> Pitched, insulated, average + 150mm - 250mm -> good + 270mm+ -> very good + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for flat insulated efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if 150 <= insulation_thickness <= 250: + return EpcEfficiency.GOOD + + if 75 <= insulation_thickness <= 125: + return EpcEfficiency.AVERAGE + + if 25 <= insulation_thickness <= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: From a7c34c85034eb35d5826ff118192194bd00a3218 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 15:50:30 +0000 Subject: [PATCH 13/33] working on loft at rafters --- backend/onboarders/epc_descriptions.py | 181 +++++++++++++++++++++---- 1 file changed, 158 insertions(+), 23 deletions(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index a92565d1..fc10ad70 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -364,10 +364,6 @@ RoofEfficiencyRule = Union[ ] -def flat_limited_insulation_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - pass - - def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: """ before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor @@ -424,6 +420,19 @@ def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> Epc return EpcEfficiency.VERY_POOR +def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine flat roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return flat_insulated_efficiency_thickness(insulation_thickness) + + return flat_insulated_efficiency_age_band(age_band) + + def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: """ 2023 onwards -> Very Good @@ -451,39 +460,156 @@ def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency return EpcEfficiency.AVERAGE -def flat_limited_efficiency( - age_band: EpcConstructionAgeBand, +def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for thatched efficiency calculation") + + if insulation_thickness >= 175: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 25: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency( insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, ) -> EpcEfficiency: """ - If we have an insulation thickness, 12mm results in a very poor rating. 25mm or above results in a poor rating. - If we don't have an insulation thickness, we fall back to age band, where - - 1976 - 1982 = Flat, limited insulation, poor efficiency - - 1967 - 1975 = Flat, limited insulation, Very Poor - :param age_band: - :param insulation_thickness: - :return: + Combines both age band and insulation thickness to determine thatched roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency """ - if insulation_thickness is not None: - if insulation_thickness >= 25: - return EpcEfficiency.POOR - return EpcEfficiency.VERY_POOR + return thatched_efficiency_thickness(insulation_thickness) - if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return thatched_efficiency_age_band(age_band) + + +def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: return EpcEfficiency.POOR - if age_band == EpcConstructionAgeBand.from_1967_to_1975: - return EpcEfficiency.VERY_POOR + return EpcEfficiency.VERY_POOR - raise ValueError("Cannot determine flat limited insulation efficiency without insulation thickness or age band") + +def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 150: + return EpcEfficiency.GOOD + + if insulation_thickness >= 75: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 25: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def sloping_ceiling_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine sloping ceiling roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return sloping_ceiling_efficiency_thickness(insulation_thickness) + + return sloping_ceiling_efficiency_age_band(age_band) + + +def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + 400mm, 350mm = very good + 200-300mm = good + 125-175 = average + 50-100 = poor + 25 and below= very poor + :return: + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation") + + if insulation_thickness >= 350: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 200: + return EpcEfficiency.GOOD + + if insulation_thickness >= 125: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + # 2023 onwards -> Very Good + # 2012-2022 -> Good + Before 1900 -> Very Poor + :param age_band: + :return: + """ ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = { # Flat roof EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR, - EpcRoofDescriptions.flat_limited_insulation: flat_limited_insulation_efficiency, - EpcRoofDescriptions.flat_insulated: flat_insulated_efficiency, + EpcRoofDescriptions.flat_limited_insulation: flat_efficiency, + EpcRoofDescriptions.flat_insulated: flat_efficiency, # Loft: # value mappings @@ -511,4 +637,13 @@ ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] # Another dwelling above EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA, + # Thatched + EpcRoofDescriptions.thatched: thatched_efficiency, + EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency, + + # Sloping ceiling + EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR, + } From 90389ec3c2d84064979c43f56d3b3b3c09e5e08c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 15:54:22 +0000 Subject: [PATCH 14/33] added loft_insulated_at_rafters_efficiency --- backend/onboarders/epc_descriptions.py | 37 +++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index fc10ad70..d2237880 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -598,11 +598,40 @@ def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | N def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: """ # 2023 onwards -> Very Good - # 2012-2022 -> Good - Before 1900 -> Very Poor - :param age_band: - :return: + # 2003-2006, 2012-2022 -> Good + # 1983 - 1990, 1996-2002 -> Average + # 1976-1982 -> Poor + # 1967-1975 and earlier bands -> Very Poor + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness) + + return loft_insulated_at_rafters_efficiency_age_band(age_band) ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = { From 2631b4aa204f2b521b6d8cd2c54f18dbadcab6e1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 18:47:42 +0000 Subject: [PATCH 15/33] Roof and wall tests green --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/onboarders/__init__.py | 0 backend/onboarders/epc_descriptions.py | 51 ++++- .../mappings/as_built_roof_classifiers.py | 55 +++++ .../mappings/as_built_wall_classifiers.py | 112 +++++++++ backend/onboarders/parity.py | 213 ++++-------------- .../onboarders/tests/test_roof_remapping.py | 175 ++++++++++++++ .../onboarders/tests/test_wall_remapping.py | 163 ++++++++++++++ infrastructure/terraform/main.tf | 4 +- pytest.ini | 2 +- 11 files changed, 596 insertions(+), 183 deletions(-) create mode 100644 backend/onboarders/__init__.py create mode 100644 backend/onboarders/mappings/as_built_roof_classifiers.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/onboarders/__init__.py b/backend/onboarders/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index d2237880..c6fe9de9 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -1,3 +1,4 @@ +import pandas as pd import re from collections.abc import Mapping from enum import Enum @@ -211,12 +212,6 @@ class EpcEfficiency(Enum): NA = "N/A" -EfficiencyRule = Union[ - EpcEfficiency, - Callable[[EpcConstructionAgeBand], EpcEfficiency], -] - - def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: """" Maps cavity filled to efficiency based on construction age band. @@ -343,6 +338,16 @@ WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] # Cob (special case) EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE, EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD, + + # Unknown mappings which are unhandled + EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA, + } @@ -676,3 +681,37 @@ ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR, } + + +def resolve_roof_efficiency( + description: EpcRoofDescriptions, + age_band: EpcConstructionAgeBand | None, + insulation_thickness: int | None, +) -> EpcEfficiency: + """ + Resolve roof efficiency from description + age band + insulation thickness. + """ + + # Unknown / holding descriptions → efficiency unknown + if description in description.unknown_descriptions: + return EpcEfficiency.NA + + rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + # Fixed efficiency + if isinstance(rule, EpcEfficiency): + return rule + + # Callable rule + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + try: + # Try (thickness, age_band) + return rule(insulation_thickness, age_band) + except TypeError: + # Fallback to (age_band) + return rule(age_band) diff --git a/backend/onboarders/mappings/as_built_roof_classifiers.py b/backend/onboarders/mappings/as_built_roof_classifiers.py new file mode 100644 index 00000000..7c672ce5 --- /dev/null +++ b/backend/onboarders/mappings/as_built_roof_classifiers.py @@ -0,0 +1,55 @@ +from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcRoofDescriptions + + +def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a flat, as built roof, these are the breakdowns: + + 2023 onwards → Flat, insulated + 2003–2022 → Flat, insulated + 1983–2002 → Flat, insulated + 1976–1982 → Flat, limited insulation + 1967–1975 → Flat, limited insulation + 1950–1966 and earlier → Flat, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions + """ + + year = age_band.start_year() + + if year >= 1983: + return EpcRoofDescriptions.flat_insulated + + if year >= 1967: + return EpcRoofDescriptions.flat_limited_insulation + + return EpcRoofDescriptions.flat_no_insulation + + +def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a sloping ceiling, as built roof, these are the breakdowns: + 2023 onwards → Sloping pitched, insulated + 2003–2022 → Sloping pitched, insulated + 1983–2002 → Sloping pitched, insulated + 1976–1982 → Sloping pitched, limited insulation + 1967–1975 and earlier → Sloping pitched, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions + """ + year = age_band.start_year() + + if year >= 1983: + return EpcRoofDescriptions.sloping_pitched_insulated + + if year >= 1976: + return EpcRoofDescriptions.sloping_pitched_limited_insulation + + return EpcRoofDescriptions.sloping_pitched_no_insulation + + +AS_BUILT_ROOF_CLASSIFIERS = { + # Only need to apply this to flat and sloping ceiling roofs + "Flat": classify_flat_roof, + "PitchedWithSlopingCeiling": classify_sloping_ceiling_roof, +} diff --git a/backend/onboarders/mappings/as_built_wall_classifiers.py b/backend/onboarders/mappings/as_built_wall_classifiers.py index e69de29b..f907a533 100644 --- a/backend/onboarders/mappings/as_built_wall_classifiers.py +++ b/backend/onboarders/mappings/as_built_wall_classifiers.py @@ -0,0 +1,112 @@ +from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcWallDescriptions + + +def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.cavity_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.cavity_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cavity_insulated_assumed + + raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping") + + +def map_solid_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.solid_brick_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.solid_brick_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.solid_brick_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for solid wall insulation mapping" + ) + + +def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1950: + return EpcWallDescriptions.timber_frame_no_insulation_assumed + + if age_band.start_year() < 1976: + return EpcWallDescriptions.timber_frame_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1976): + return EpcWallDescriptions.timber_frame_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for timber frame wall insulation mapping" + ) + + +def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.system_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.system_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.system_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for system build wall insulation mapping" + ) + + +def map_granite_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.granite_whinstone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.granite_whinestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for granite wall insulation mapping" + ) + + +def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.sandstone_limestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for sandstone wall insulation mapping" + ) + + +def map_cob_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1983: + return EpcWallDescriptions.cob_as_built_average + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cob_as_built_good + + raise NotImplementedError( + f"Age band {age_band.value} not handled for cob wall insulation mapping" + ) + + +AS_BUILT_WALL_CLASSIFIERS = { + "Cavity": map_cavity_wall_insulation, + "Solid Brick": map_solid_wall_insulation, + "Timber Frame": map_timber_frame_wall_insulation, + "System": map_system_build_wall_insulation, + "Granite": map_granite_wall_insulation, + "Sandstone": map_sandstone_wall_insulation, + "Cob": map_cob_wall_insulation, +} diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index c3b4184d..69a64a89 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,3 +1,4 @@ +import re from numpy import nan from tqdm import tqdm import pandas as pd @@ -5,8 +6,9 @@ from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ - WALL_DESCRIPTION_EFFICIENCIES -from onboarders.epc_descriptions import EpcRoofDescriptions + WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency +from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS +from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS tqdm.pandas() @@ -97,117 +99,6 @@ wall_mapping = { ('Cob', 'AsBuilt'): None, } - -def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand): - if age_band.start_year() < 1976: - return EpcWallDescriptions.cavity_no_insulation_assumed - - if age_band == EpcConstructionAgeBand.from_1976_to_1982: - return EpcWallDescriptions.cavity_partial_insulated_assumed - - if age_band in EpcConstructionAgeBand.from_year_onwards(1983): - return EpcWallDescriptions.cavity_insulated_assumed - - raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping") - - -def map_solid_wall_insulation(age_band: EpcConstructionAgeBand): - if age_band.start_year() < 1976: - return EpcWallDescriptions.solid_brick_no_insulation_assumed - - if age_band == EpcConstructionAgeBand.from_1976_to_1982: - return EpcWallDescriptions.solid_brick_partial_insulated_assumed - - if age_band in EpcConstructionAgeBand.from_year_onwards(1983): - return EpcWallDescriptions.solid_brick_insulated_assumed - - raise NotImplementedError( - f"Age band {age_band.value} not handled for solid wall insulation mapping" - ) - - -def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand): - if age_band.start_year() < 1950: - return EpcWallDescriptions.timber_frame_no_insulation_assumed - - if age_band.start_year() < 1976: - return EpcWallDescriptions.timber_frame_partial_insulated_assumed - - if age_band in EpcConstructionAgeBand.from_year_onwards(1976): - return EpcWallDescriptions.timber_frame_insulated_assumed - - raise NotImplementedError( - f"Age band {age_band.value} not handled for timber frame wall insulation mapping" - ) - - -def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand): - if age_band.start_year() < 1976: - return EpcWallDescriptions.system_no_insulation_assumed - - if age_band == EpcConstructionAgeBand.from_1976_to_1982: - return EpcWallDescriptions.system_partial_insulated_assumed - - if age_band in EpcConstructionAgeBand.from_year_onwards(1983): - return EpcWallDescriptions.system_insulated_assumed - - raise NotImplementedError( - f"Age band {age_band.value} not handled for system build wall insulation mapping" - ) - - -def map_granite_wall_insulation(age_band: EpcConstructionAgeBand): - if age_band.start_year() < 1976: - return EpcWallDescriptions.granite_whinstone_no_insulation_assumed - - if age_band == EpcConstructionAgeBand.from_1976_to_1982: - return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed - - if age_band in EpcConstructionAgeBand.from_year_onwards(1983): - return EpcWallDescriptions.granite_whinestone_insulated_assumed - - raise NotImplementedError( - f"Age band {age_band.value} not handled for granite wall insulation mapping" - ) - - -def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand): - if age_band.start_year() < 1976: - return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed - - if age_band == EpcConstructionAgeBand.from_1976_to_1982: - return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed - - if age_band in EpcConstructionAgeBand.from_year_onwards(1983): - return EpcWallDescriptions.sandstone_limestone_insulated_assumed - - raise NotImplementedError( - f"Age band {age_band.value} not handled for sandstone wall insulation mapping" - ) - - -def map_cob_wall_insulation(age_band: EpcConstructionAgeBand): - if age_band.start_year() < 1983: - return EpcWallDescriptions.cob_as_built_average - - if age_band in EpcConstructionAgeBand.from_year_onwards(1983): - return EpcWallDescriptions.cob_as_built_good - - raise NotImplementedError( - f"Age band {age_band.value} not handled for cob wall insulation mapping" - ) - - -AS_BUILT_WALL_CLASSIFIERS = { - "Cavity": map_cavity_wall_insulation, - "Solid Brick": map_solid_wall_insulation, - "Timber Frame": map_timber_frame_wall_insulation, - "System": map_system_build_wall_insulation, - "Granite": map_granite_wall_insulation, - "Sandstone": map_sandstone_wall_insulation, - "Cob": map_cob_wall_insulation, -} - WALL_UNKNOWN_AGE_FALLBACK = { "Cavity": EpcWallDescriptions.cavity_as_built_unknown, "Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown, @@ -378,60 +269,6 @@ roof_mapping = { ('PitchedWithSlopingCeiling', 'Unknown'): None, # } - -def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: - """ - For a flat, as built roof, these are the breakdowns: - - 2023 onwards → Flat, insulated - 2003–2022 → Flat, insulated - 1983–2002 → Flat, insulated - 1976–1982 → Flat, limited insulation - 1967–1975 → Flat, limited insulation - 1950–1966 and earlier → Flat, no insulation - :param age_band: Input age band - :return: EpcRoofDescriptions - """ - - year = age_band.start_year() - - if year >= 1983: - return EpcRoofDescriptions.flat_insulated - - if year >= 1967: - return EpcRoofDescriptions.flat_limited_insulation - - return EpcRoofDescriptions.flat_no_insulation - - -def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: - """ - For a sloping ceiling, as built roof, these are the breakdowns: - 2023 onwards → Sloping pitched, insulated - 2003–2022 → Sloping pitched, insulated - 1983–2002 → Sloping pitched, insulated - 1976–1982 → Sloping pitched, limited insulation - 1967–1975 and earlier → Sloping pitched, no insulation - :param age_band: Input age band - :return: EpcRoofDescriptions - """ - year = age_band.start_year() - - if year >= 1983: - return EpcRoofDescriptions.sloping_pitched_insulated - - if year >= 1976: - return EpcRoofDescriptions.sloping_pitched_limited_insulation - - return EpcRoofDescriptions.sloping_pitched_no_insulation - - -AS_BUILT_ROOF_CLASSIFIERS = { - # Only need to apply this to flat and sloping ceiling roofs - "Flat": classify_flat_roof, - "PitchedWithSlopingCeiling": classify_sloping_ceiling_roof, -} - ROOF_UNKNOWN_AGE_FALLBACK = { "Flat": EpcRoofDescriptions.flat_as_built_unknown, "PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown, @@ -478,13 +315,45 @@ data["landlord_roof_description"] = data.progress_apply( assert data["landlord_roof_description"].isnull().sum() == 0, ( "Some roof descriptions could not be resolved" ) -# TODO: 1) Map energy efficiency -# TODO: 2) Flag sloped ceilings + + +def extract_insulation_thickness(value: str | None) -> int | None: + """ + Extract insulation thickness in mm from a string like 'mm150'. + Returns None if not present or not parseable. + """ + if value is None or pd.isnull(value): + return None + + match = re.search(r"(\d+)", str(value)) + if not match: + return None + + return int(match.group(1)) + + +data["roof_insulation_thickness_mm"] = data["Roof Insulation"].apply( + extract_insulation_thickness +) + +data["landlord_roof_efficiency"] = data.progress_apply( + lambda row: resolve_roof_efficiency( + description=row.landlord_roof_description, + age_band=row.construction_age_band, + insulation_thickness=row.roof_insulation_thickness_mm, + ), + axis=1, +) + +assert data["landlord_roof_efficiency"].isnull().sum() == 0 + +# Flag sloping ceiling +data["has_sloping_ceiling"] = data["Roof Construction"].apply( + lambda x: x == "PitchedWithSlopingCeiling" +) # Variables we want to map -# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', -# 'Attachment', 'Construction Years', -# 'Roof Construction', 'Roof Insulation', +# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', # 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', # 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', # 'Total Floor Area (m2)' diff --git a/backend/onboarders/tests/test_roof_remapping.py b/backend/onboarders/tests/test_roof_remapping.py index e69de29b..a08471f9 100644 --- a/backend/onboarders/tests/test_roof_remapping.py +++ b/backend/onboarders/tests/test_roof_remapping.py @@ -0,0 +1,175 @@ +import pytest + +from backend.onboarders.epc_descriptions import ( + EpcConstructionAgeBand, + EpcRoofDescriptions, + EpcEfficiency, + resolve_roof_efficiency, +) + +from backend.onboarders.mappings.as_built_roof_classifiers import ( + classify_flat_roof, + classify_sloping_ceiling_roof, +) + + +# --------------------------------------------------------------------- +# As-built roof description classification +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.flat_no_insulation), + (EpcConstructionAgeBand.from_1950_to_1966, EpcRoofDescriptions.flat_no_insulation), + (EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.flat_limited_insulation), + (EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.flat_limited_insulation), + (EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.flat_insulated), + (EpcConstructionAgeBand.from_2007_to_2011, EpcRoofDescriptions.flat_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.flat_insulated), + ], +) +def test_classify_flat_roof(age_band, expected): + assert classify_flat_roof(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.sloping_pitched_no_insulation), + (EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.sloping_pitched_no_insulation), + (EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.sloping_pitched_limited_insulation), + (EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.sloping_pitched_insulated), + (EpcConstructionAgeBand.from_2012_to_2022, EpcRoofDescriptions.sloping_pitched_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.sloping_pitched_insulated), + ], +) +def test_classify_sloping_ceiling_roof(age_band, expected): + assert classify_sloping_ceiling_roof(age_band) == expected + + +# --------------------------------------------------------------------- +# Roof efficiency — fixed & age-band driven +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + # Flat roof, no insulation + (EpcRoofDescriptions.flat_no_insulation, EpcConstructionAgeBand.before_1900, EpcEfficiency.VERY_POOR), + + # Flat roof, limited insulation (age-band driven) + (EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1976_to_1982, EpcEfficiency.POOR), + ( + EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1967_to_1975, + EpcEfficiency.VERY_POOR), + + # Flat roof, insulated (age-band driven) + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_1983_to_1990, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD), + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD), + + # Pitched, insulated assumed (loft) + (EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_1996_to_2002, EpcEfficiency.GOOD), + (EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_2007_to_2011, + EpcEfficiency.VERY_GOOD), + ], +) +def test_roof_efficiency_age_band_only(description, age_band, expected): + assert resolve_roof_efficiency( + description=description, + age_band=age_band, + insulation_thickness=None, + ) == expected + + +# --------------------------------------------------------------------- +# Roof efficiency — insulation thickness driven +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, thickness, expected", + [ + # Loft insulation + (EpcRoofDescriptions.loft_12mm_insulation, 12, EpcEfficiency.VERY_POOR), + (EpcRoofDescriptions.loft_25mm_insulation, 25, EpcEfficiency.POOR), + (EpcRoofDescriptions.loft_75mm_insulation, 75, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.loft_150mm_insulation, 150, EpcEfficiency.GOOD), + (EpcRoofDescriptions.loft_300mm_insulation, 300, EpcEfficiency.VERY_GOOD), + + # Flat insulated — thickness overrides age band + (EpcRoofDescriptions.flat_insulated, 50, EpcEfficiency.POOR), + (EpcRoofDescriptions.flat_insulated, 100, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.flat_insulated, 200, EpcEfficiency.GOOD), + (EpcRoofDescriptions.flat_insulated, 300, EpcEfficiency.VERY_GOOD), + + # Sloping ceiling + (EpcRoofDescriptions.sloping_pitched_insulated, 75, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.sloping_pitched_insulated, 150, EpcEfficiency.GOOD), + (EpcRoofDescriptions.sloping_pitched_insulated, 350, EpcEfficiency.VERY_GOOD), + ], +) +def test_roof_efficiency_thickness_based(description, thickness, expected): + assert resolve_roof_efficiency( + description=description, + age_band=EpcConstructionAgeBand.before_1900, # should be ignored + insulation_thickness=thickness, + ) == expected + + +# --------------------------------------------------------------------- +# Thatched roofs +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.before_1900, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD), + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD), + ], +) +def test_thatched_efficiency_age_band(description, age_band, expected): + assert resolve_roof_efficiency( + description=description, + age_band=age_band, + insulation_thickness=None, + ) == expected + + +@pytest.mark.parametrize( + "thickness, expected", + [ + (12, EpcEfficiency.AVERAGE), + (50, EpcEfficiency.GOOD), + (150, EpcEfficiency.GOOD), + (200, EpcEfficiency.VERY_GOOD), + ], +) +def test_thatched_efficiency_thickness(thickness, expected): + assert resolve_roof_efficiency( + description=EpcRoofDescriptions.thatched_with_additional_insulation, + age_band=EpcConstructionAgeBand.before_1900, + insulation_thickness=thickness, + ) == expected + + +# --------------------------------------------------------------------- +# Unknown / holding descriptions +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description", + [ + EpcRoofDescriptions.flat_as_built_unknown, + EpcRoofDescriptions.loft_as_built_unknown, + EpcRoofDescriptions.thatched_as_built_unknown, + EpcRoofDescriptions.sloping_pitched_as_built_unknown, + ], +) +def test_unknown_roof_descriptions_return_na(description): + assert resolve_roof_efficiency( + description=description, + age_band=None, + insulation_thickness=None, + ) == EpcEfficiency.NA diff --git a/backend/onboarders/tests/test_wall_remapping.py b/backend/onboarders/tests/test_wall_remapping.py index e69de29b..eaac5afb 100644 --- a/backend/onboarders/tests/test_wall_remapping.py +++ b/backend/onboarders/tests/test_wall_remapping.py @@ -0,0 +1,163 @@ +import pytest + +from backend.onboarders.epc_descriptions import ( + EpcConstructionAgeBand, + EpcWallDescriptions, + EpcEfficiency, + resolve_wall_efficiency, +) + +from backend.onboarders.mappings.as_built_wall_classifiers import ( + map_cavity_wall_insulation, + map_solid_wall_insulation, + map_timber_frame_wall_insulation, + map_system_build_wall_insulation, + map_granite_wall_insulation, + map_sandstone_wall_insulation, + map_cob_wall_insulation, +) + + +# --------------------------------------------------------------------- +# As-built wall description classification +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cavity_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.cavity_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cavity_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cavity_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcWallDescriptions.cavity_insulated_assumed), + ], +) +def test_map_cavity_wall_insulation(age_band, expected): + assert map_cavity_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.solid_brick_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.solid_brick_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1996_to_2002, EpcWallDescriptions.solid_brick_insulated_assumed), + ], +) +def test_map_solid_wall_insulation(age_band, expected): + assert map_solid_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.timber_frame_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.timber_frame_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.timber_frame_insulated_assumed), + ], +) +def test_map_timber_frame_wall_insulation(age_band, expected): + assert map_timber_frame_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.system_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.system_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2003_to_2006, EpcWallDescriptions.system_insulated_assumed), + ], +) +def test_map_system_wall_insulation(age_band, expected): + assert map_system_build_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.granite_whinstone_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.granite_whinstone_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcWallDescriptions.granite_whinestone_insulated_assumed), + ], +) +def test_map_granite_wall_insulation(age_band, expected): + assert map_granite_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.sandstone_limestone_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2007_to_2011, EpcWallDescriptions.sandstone_limestone_insulated_assumed), + ], +) +def test_map_sandstone_wall_insulation(age_band, expected): + assert map_sandstone_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cob_as_built_average), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cob_as_built_average), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cob_as_built_good), + ], +) +def test_map_cob_wall_insulation(age_band, expected): + assert map_cob_wall_insulation(age_band) == expected + + +# --------------------------------------------------------------------- +# Wall efficiency resolution +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + # Fixed efficiencies + (EpcWallDescriptions.cavity_no_insulation_assumed, None, EpcEfficiency.POOR), + (EpcWallDescriptions.cavity_partial_insulated_assumed, None, EpcEfficiency.AVERAGE), + (EpcWallDescriptions.cavity_insulated_assumed, None, EpcEfficiency.GOOD), + + # Function-based efficiencies + ( + EpcWallDescriptions.cavity_filled_cavity, + EpcConstructionAgeBand.from_2023_onwards, + EpcEfficiency.VERY_GOOD, + ), + ( + EpcWallDescriptions.cavity_filled_cavity, + EpcConstructionAgeBand.from_1991_to_1995, + EpcEfficiency.GOOD, + ), + ( + EpcWallDescriptions.solid_brick_internal_insulation, + EpcConstructionAgeBand.from_2003_to_2006, + EpcEfficiency.VERY_GOOD, + ), + ( + EpcWallDescriptions.solid_brick_internal_insulation, + EpcConstructionAgeBand.from_1950_to_1966, + EpcEfficiency.GOOD, + ), + ], +) +def test_resolve_wall_efficiency(description, age_band, expected): + assert resolve_wall_efficiency(description, age_band) == expected + + +@pytest.mark.parametrize( + "description", + [ + EpcWallDescriptions.cavity_as_built_unknown, + EpcWallDescriptions.solid_brick_as_built_unknown, + EpcWallDescriptions.system_as_built_unknown, + EpcWallDescriptions.timber_frame_as_built_unknown, + EpcWallDescriptions.granite_as_built_unknown, + EpcWallDescriptions.sandstone_as_built_unknown, + EpcWallDescriptions.cob_as_built_unknown, + ], +) +def test_unknown_wall_descriptions_return_na(description): + assert resolve_wall_efficiency(description, None) == EpcEfficiency.NA diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 5a67b793..b97a2f4d 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -86,7 +86,7 @@ resource "aws_db_instance" "default" { # Temporary to enfore immediate change apply_immediately = true # Set up storage type to gp3 for better performance - storage_type = "gp3" + storage_type = "gp3" } # Set up the bucket that recieve the csv uploads of epc to be retrofit @@ -244,7 +244,7 @@ module "lambda_heating_cost_prediction_ecr" { } module "lambda_hot_water_cost_prediction_ecr" { - ecr_name = "hot-water-cost-prediction-${var.stage}" + ecr_name = "hot-water-fcost-prediction-${var.stage}" source = "./modules/ecr" } diff --git a/pytest.ini b/pytest.ini index 1422657b..fe2c7d67 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/onboarders/tests From 918e5fd8cea5433b466839bb49f9ca857b5be388 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 19:45:08 +0000 Subject: [PATCH 16/33] applying floor transformations --- backend/onboarders/epc_descriptions.py | 16 ++++ backend/onboarders/parity.py | 128 ++++++++++++++++++++++++- 2 files changed, 143 insertions(+), 1 deletion(-) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index c6fe9de9..57b4ab89 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -715,3 +715,19 @@ def resolve_roof_efficiency( except TypeError: # Fallback to (age_band) return rule(age_band) + + +class EpcFloorDescriptions(Enum): + # Solid floor + solid_insulated = "Solid, insulated" + solid_insulated_assumed = "Solid, insulated (assumed)" + solid_no_insulation_assumed = "Solid, no insulation (assumed)" + solid_limited_insulation_assumed = "Solid, limited insulation (assumed)" + + # Suspended floor + suspended_insulated = "Suspended, insulated" + suspended_insulated_assumed = "Suspended, insulated (assumed)" + suspended_no_insulation_assumed = "Suspended, no insulation (assumed)" + suspended_limited_insulation_assumed = "Suspended, limited insulation (assumed)" + + unknown = None # We don't resolve anything diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 69a64a89..67e65115 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -6,7 +6,7 @@ from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ - WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency + WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS @@ -352,6 +352,132 @@ data["has_sloping_ceiling"] = data["Roof Construction"].apply( lambda x: x == "PitchedWithSlopingCeiling" ) +# ------------ Floor Construction ------------ + + +floor_mapping = { + # Solid floor + ('Solid', 'AsBuilt'): None, # Mapped + ('Solid', 'Unknown'): None, # Mapped + ('Solid', nan): None, # Mapped + ('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated, + + # Suspended floor + ('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + + # Unknown type - mapped on age + ('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built + ('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted + (nan, nan): None, # No actual information! + ('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built +} + + +# Unknown floor, as built +# Before 1900, 1900 - 1929 -> Suspended, no insulation (assumed) +# 1930-1949, 1950 - 1966, 1967 - 1975, 1976-1982, 1983-1990, 1991-1995, -> Solid, no insulation (assumed) +# 1996 - 2002, Solid, limited insulation (assumed) +# 2003 onwards -> Solid, insulated (assumed) + +def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + + if year >= 1930: + return EpcFloorDescriptions.solid_no_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +# before 1900, 1900-1929 -> Suspended, insulated +# Thereafter, 1930 onwards -> Solid, insulated +def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 1930: + return EpcFloorDescriptions.solid_insulated + + return EpcFloorDescriptions.suspended_insulated + + +# 2003 - 2006, 2023 onwards -> Solid, insulated (assumed) +# 1996 - 2022 -> Solid, limited insulation (assumed) +# 1983 - 1990, 1991 - 1995 -> Solid, no insulation (assumed) +def solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + return EpcFloorDescriptions.solid_no_insulation_assumed + + +# 2003 -> 2006 -> Suspended, insulated (assumed) +# 1996 - 2022 -> Suspended, limited insulation (assumed) +# 1983 - 1995 -> Suspended, no insulation (assumed) +def suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.suspended_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.suspended_limited_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +data["landlord_floor_description"] = ( + data[["Floor Construction", "Floor Insulation"]] + .progress_apply(tuple, axis=1) + .map(floor_mapping) +) + + +def fill_floor_as_built(row): + # 1. Already resolved + if row.landlord_floor_description is not None: + return row.landlord_floor_description + + age_band = row.construction_age_band + floor_type = row["Floor Construction"] + insulation = row["Floor Insulation"] + + # 2. Missing age band → conservative fallback + if pd.isnull(age_band): + return EpcFloorDescriptions.unknown + + # 3. Known floor types + if floor_type == "Solid": + return solid_floor_as_built(age_band) + + if floor_type in {"SuspendedTimber", "SuspendedNotTimber"}: + return suspended_floor_as_built(age_band) + + # 4. Unknown floor type + if floor_type == "Unknown": + if insulation == "RetroFitted": + return unknown_floor_retrofitted(age_band) + return unknown_floor_as_built(age_band) + + # 5. Truly missing / garbage input + return EpcFloorDescriptions.unknown + + +data["landlord_floor_description"] = data.progress_apply( + fill_floor_as_built, + axis=1, +) + # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', # 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', From 41aed1cffef232296a83190ea38bda376fa5353c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 2 Feb 2026 20:01:47 +0000 Subject: [PATCH 17/33] added unit tests for floors --- .../mappings/as_built_floor_classifiers.py | 46 +++++++++ backend/onboarders/parity.py | 66 ++----------- .../onboarders/tests/test_floor_remapping.py | 98 +++++++++++++++++++ 3 files changed, 153 insertions(+), 57 deletions(-) create mode 100644 backend/onboarders/mappings/as_built_floor_classifiers.py create mode 100644 backend/onboarders/tests/test_floor_remapping.py diff --git a/backend/onboarders/mappings/as_built_floor_classifiers.py b/backend/onboarders/mappings/as_built_floor_classifiers.py new file mode 100644 index 00000000..9f14fa7d --- /dev/null +++ b/backend/onboarders/mappings/as_built_floor_classifiers.py @@ -0,0 +1,46 @@ +from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcFloorDescriptions + + +def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + + if year >= 1930: + return EpcFloorDescriptions.solid_no_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 1930: + return EpcFloorDescriptions.solid_insulated + + return EpcFloorDescriptions.suspended_insulated + + +def solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + return EpcFloorDescriptions.solid_no_insulation_assumed + + +def suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.suspended_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.suspended_limited_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 67e65115..dfba0443 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -9,6 +9,8 @@ from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstruc WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS +from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \ + solid_floor_as_built, suspended_floor_as_built tqdm.pandas() @@ -379,63 +381,6 @@ floor_mapping = { ('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built } - -# Unknown floor, as built -# Before 1900, 1900 - 1929 -> Suspended, no insulation (assumed) -# 1930-1949, 1950 - 1966, 1967 - 1975, 1976-1982, 1983-1990, 1991-1995, -> Solid, no insulation (assumed) -# 1996 - 2002, Solid, limited insulation (assumed) -# 2003 onwards -> Solid, insulated (assumed) - -def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 2003: - return EpcFloorDescriptions.solid_insulated_assumed - - if year >= 1930: - return EpcFloorDescriptions.solid_no_insulation_assumed - - return EpcFloorDescriptions.suspended_no_insulation_assumed - - -# before 1900, 1900-1929 -> Suspended, insulated -# Thereafter, 1930 onwards -> Solid, insulated -def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 1930: - return EpcFloorDescriptions.solid_insulated - - return EpcFloorDescriptions.suspended_insulated - - -# 2003 - 2006, 2023 onwards -> Solid, insulated (assumed) -# 1996 - 2022 -> Solid, limited insulation (assumed) -# 1983 - 1990, 1991 - 1995 -> Solid, no insulation (assumed) -def solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 2003: - return EpcFloorDescriptions.solid_insulated_assumed - if year >= 1996: - return EpcFloorDescriptions.solid_limited_insulation_assumed - return EpcFloorDescriptions.solid_no_insulation_assumed - - -# 2003 -> 2006 -> Suspended, insulated (assumed) -# 1996 - 2022 -> Suspended, limited insulation (assumed) -# 1983 - 1995 -> Suspended, no insulation (assumed) -def suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 2003: - return EpcFloorDescriptions.suspended_insulated_assumed - if year >= 1996: - return EpcFloorDescriptions.suspended_limited_insulation_assumed - - return EpcFloorDescriptions.suspended_no_insulation_assumed - - data["landlord_floor_description"] = ( data[["Floor Construction", "Floor Insulation"]] .progress_apply(tuple, axis=1) @@ -478,6 +423,13 @@ data["landlord_floor_description"] = data.progress_apply( axis=1, ) +# All values should be remapped now +assert data["landlord_floor_description"].isnull().sum() == 0, ( + "Some floor descriptions could not be resolved" +) + +# TODO: Convert everything to values + # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', # 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', diff --git a/backend/onboarders/tests/test_floor_remapping.py b/backend/onboarders/tests/test_floor_remapping.py new file mode 100644 index 00000000..26a05d22 --- /dev/null +++ b/backend/onboarders/tests/test_floor_remapping.py @@ -0,0 +1,98 @@ +import pytest + +from backend.onboarders.epc_descriptions import ( + EpcConstructionAgeBand, + EpcFloorDescriptions, +) +from backend.onboarders.mappings.as_built_floor_classifiers import ( + unknown_floor_as_built, + unknown_floor_retrofitted, + solid_floor_as_built, + suspended_floor_as_built, +) + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Before 1900 / 1900–1929 → suspended, no insulation + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1930–1995 → solid, no insulation + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → solid, limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → solid, insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_unknown_floor_as_built(age_band, expected): + assert unknown_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Pre-1930 → suspended, insulated + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated), + + # 1930+ → solid, insulated + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated), + ], +) +def test_unknown_floor_retrofitted(age_band, expected): + assert unknown_floor_retrofitted(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_solid_floor_as_built(age_band, expected): + assert solid_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed), + ], +) +def test_suspended_floor_as_built(age_band, expected): + assert suspended_floor_as_built(age_band) == expected From 6cb20b4828545ced6de8dc8ebe3acfd24500e4de Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 3 Feb 2026 11:52:59 +0000 Subject: [PATCH 18/33] added windows glazing remapping --- backend/onboarders/parity.py | 58 ++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index dfba0443..a1faea3c 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -356,7 +356,6 @@ data["has_sloping_ceiling"] = data["Roof Construction"].apply( # ------------ Floor Construction ------------ - floor_mapping = { # Solid floor ('Solid', 'AsBuilt'): None, # Mapped @@ -428,10 +427,65 @@ assert data["landlord_floor_description"].isnull().sum() == 0, ( "Some floor descriptions could not be resolved" ) +# ------------ Glazing ------------ +glazing_map = { + # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area + # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more + "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None), + "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None), + # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022 + # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to + # how we make updates to the windows data. + # Triple known data is high performance glazing with Good efficiency (at least) + "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None), + # This is also classed as high performance glazing + "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), + # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good) + "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None), + "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), +} + +data[["landlord_windows_description", + "landlord_windows_efficiency", + "landlord_multi_glaze_proportion", + "landlord_glazed_type", + "landlord_glazed_area"]] = data["Glazing"].map(glazing_map).progress_apply(pd.Series) + +# Peform the remapping. The columns we wish to produce are the following: +# 1) landlord_windows_description +# 2) landlord_windows_efficiency +# 3) landlord_multi_glaze_proportion - maybe don't need to set this, same for glazing typd and area + + +# ------------ Heating ------------ + + +agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel']).size().reset_index(name='counts') +epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv") + +epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts() +epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts() + +epcs[ + (epcs["WINDOWS_DESCRIPTION"] == "Full secondary glazing") & (epcs["LODGEMENT_DATE"] > "2025-07-01") + ]["WINDOWS_ENERGY_EFF"].value_counts() + +# ------------ Fuel ------------ + +# ------------ Heating Controls ------------ + +# ------------ Floor Area ------------ # TODO: Convert everything to values # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', -# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', +# 'Glazing', 'Heating', # 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', # 'Total Floor Area (m2)' + + +data["Glazing"].value_counts() + +data["Glazing"].value_counts() From 3c832b1aca206ae69f7670709769649376e736b8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 3 Feb 2026 12:11:55 +0000 Subject: [PATCH 19/33] establishing sttruture for heating system mapping --- backend/onboarders/epc/placeholder.py | 13 ++++ backend/onboarders/parity.py | 85 ++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 backend/onboarders/epc/placeholder.py diff --git a/backend/onboarders/epc/placeholder.py b/backend/onboarders/epc/placeholder.py new file mode 100644 index 00000000..ba18a303 --- /dev/null +++ b/backend/onboarders/epc/placeholder.py @@ -0,0 +1,13 @@ +from enum import Enum + + +class EpcFuel(Enum): + electricity_not_community = "electricity (not community)" + + +class EpcHeatingControls(Enum): + programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs" + + +class EpcHeatingSystems(Enum): + boiler_and_radiators_electric = "Boiler and radiators, electric" diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index a1faea3c..b1d3e88a 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -7,6 +7,7 @@ from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions +from backend.onboarders.epc.placeholder import EpcFuel, EpcHeatingControls, EpcHeatingSystems from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \ @@ -456,13 +457,93 @@ data[["landlord_windows_description", # Peform the remapping. The columns we wish to produce are the following: # 1) landlord_windows_description # 2) landlord_windows_efficiency -# 3) landlord_multi_glaze_proportion - maybe don't need to set this, same for glazing typd and area +# 3) landlord_multi_glaze_proportion - maybe don't need to store this, same for glazing type and area # ------------ Heating ------------ +agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy']).size().reset_index(name='counts') -agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel']).size().reset_index(name='counts') +# We map to: +# 1) Heating description +# 2) Heating efficiency +# 3) Fuel type +# 4) Heating controls + +heating_map = { + + ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs + ), + ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): None, + ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): None, + ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): None, + ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): None, + ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): None, + ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): None, + ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): None, + ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): None, + ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): None, + ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): None, + ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): None, + ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): None, + ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): None, + ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): None, + ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): None, + ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): None, + ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): None, + ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): None, + ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): None, + ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): None, + ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): None, + ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): None, + ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): None, + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): None, + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): None, + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): None, + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): None, + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): None, + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None, + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): None, + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None, + ('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): None, + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None, + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): None, + ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None +} + +example = list(heating_map.keys())[0] + +example_data = data[ + (data["Heating"] == example[0]) & (data["Boiler Efficiency"] == example[1]) & (data["Main Fuel"] == example[2]) & ( + data["Controls Adequacy"] == example[3]) + ] + +example_data["UPRN"] + +agg_tuples = { + (row['Heating'], row['Boiler Efficiency'], row['Main Fuel']): None for _, row in agg.iterrows() +} epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv") epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts() From 2331228ff6365b010205e256c8d54230c461b086 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 3 Feb 2026 15:04:29 +0000 Subject: [PATCH 20/33] working through mapping heating --- backend/onboarders/epc/placeholder.py | 6 +++ backend/onboarders/parity.py | 72 +++++++++++++++++++++------ 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/backend/onboarders/epc/placeholder.py b/backend/onboarders/epc/placeholder.py index ba18a303..2d52d4ad 100644 --- a/backend/onboarders/epc/placeholder.py +++ b/backend/onboarders/epc/placeholder.py @@ -3,11 +3,17 @@ from enum import Enum class EpcFuel(Enum): electricity_not_community = "electricity (not community)" + lpg_not_community = "LPG (not community)" + mains_gas_not_community = "mains gas (not community)" class EpcHeatingControls(Enum): programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs" + programmers_trvs_bypass = "Programmer, TRVs and bypass" + time_and_temperature_zone_control = "Time and temperature zone control" class EpcHeatingSystems(Enum): boiler_and_radiators_electric = "Boiler and radiators, electric" + boiler_and_radiators_lpg = "Boiler and radiators, LPG" + boiler_radiators_mains_gas = "Boiler and radiators, mains gas" diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index b1d3e88a..3ca54cab 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -469,22 +469,65 @@ agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequ # 2) Heating efficiency # 3) Fuel type # 4) Heating controls +# 5) Heating controls efficiency + +# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating +# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are +# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an upper limit +# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating +# controls. + +# Boiler ratings based on efficiency +# 90%+ = A +# 86-89.9% = B -> Mapped to good efficiency +# 78 - 85% = C +# 70 - 77.9% = D +# 65 - 69.9% = E +# 60 - 64.9% = F +# <60% = G heating_map = { ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD ), - ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): None, - ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): None, - ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): None, - ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): None, - ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): None, - ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): None, ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): None, ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): None, ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): None, @@ -532,14 +575,14 @@ heating_map = { ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None } -example = list(heating_map.keys())[0] +example = list(heating_map.keys())[5] example_data = data[ (data["Heating"] == example[0]) & (data["Boiler Efficiency"] == example[1]) & (data["Main Fuel"] == example[2]) & ( data["Controls Adequacy"] == example[3]) ] -example_data["UPRN"] +example_data["UPRN"].values.tolist() agg_tuples = { (row['Heating'], row['Boiler Efficiency'], row['Main Fuel']): None for _, row in agg.iterrows() @@ -550,8 +593,9 @@ epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts( epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts() epcs[ - (epcs["WINDOWS_DESCRIPTION"] == "Full secondary glazing") & (epcs["LODGEMENT_DATE"] > "2025-07-01") - ]["WINDOWS_ENERGY_EFF"].value_counts() + (epcs["MAINHEAT_DESCRIPTION"] == EpcHeatingSystems.boiler_radiators_mains_gas.value) & ( + epcs["LODGEMENT_DATE"] > "2025-07-01") + ]["MAINHEAT_ENERGY_EFF"].value_counts() # ------------ Fuel ------------ From 4de7dafccbeefefbd3139dbcbe18959fdcde7798 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 3 Feb 2026 16:51:03 +0000 Subject: [PATCH 21/33] added further categories and modifying A rated boiler efficiency categories --- backend/onboarders/epc/placeholder.py | 2 + backend/onboarders/parity.py | 172 ++++++++++++++++++++------ 2 files changed, 139 insertions(+), 35 deletions(-) diff --git a/backend/onboarders/epc/placeholder.py b/backend/onboarders/epc/placeholder.py index 2d52d4ad..70a4f9f9 100644 --- a/backend/onboarders/epc/placeholder.py +++ b/backend/onboarders/epc/placeholder.py @@ -5,6 +5,7 @@ class EpcFuel(Enum): electricity_not_community = "electricity (not community)" lpg_not_community = "LPG (not community)" mains_gas_not_community = "mains gas (not community)" + oil_not_community = "oil (not community)" class EpcHeatingControls(Enum): @@ -17,3 +18,4 @@ class EpcHeatingSystems(Enum): boiler_and_radiators_electric = "Boiler and radiators, electric" boiler_and_radiators_lpg = "Boiler and radiators, LPG" boiler_radiators_mains_gas = "Boiler and radiators, mains gas" + boiler_radiators_oil = "Boiler and radiators, oil" diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 3ca54cab..438ce215 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -475,7 +475,7 @@ agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequ # For sub optimal heating controls, we're going to make an assumption as to what the heating controls are # and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an upper limit # as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating -# controls. +# controls. E.g. it may be programmer and room thermostat # Boiler ratings based on efficiency # 90%+ = A @@ -487,72 +487,173 @@ agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequ # <60% = G heating_map = { - + # 0 ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD ), + # 1 ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE ), + # 2 ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD ), + # 3 ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD ), + # 4 ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD ), + # 5 ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE ), + # 6 ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD ), + # 7 ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD ), + # 8 ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE ), + # 9 ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD ), - ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): None, - ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): None, - ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): None, - ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): None, - ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): None, - ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): None, - ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): None, - ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): None, - ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): None, - ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): None, - ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): None, - ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): None, - ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): None, - ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): None, - ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): None, - ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): None, - ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None, + # 10 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + # 11 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + # 12 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + # 13 + ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + # 14 + ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + # 15 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + # 16 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + # 17 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + # 18 - oil boilers have an average efficiency rating + ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + # 18 + ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + # 19 + ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + # 20 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + # 21 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + # 22 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + # 23 + ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + # 24 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + # 25 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), + ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + ), + ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ), + ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + ), ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): None, ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): None, ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, @@ -575,14 +676,14 @@ heating_map = { ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None } -example = list(heating_map.keys())[5] +example = list(heating_map.keys())[25] example_data = data[ (data["Heating"] == example[0]) & (data["Boiler Efficiency"] == example[1]) & (data["Main Fuel"] == example[2]) & ( data["Controls Adequacy"] == example[3]) ] -example_data["UPRN"].values.tolist() +print(example_data["UPRN"].values.tolist()) agg_tuples = { (row['Heating'], row['Boiler Efficiency'], row['Main Fuel']): None for _, row in agg.iterrows() @@ -593,9 +694,10 @@ epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts( epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts() epcs[ - (epcs["MAINHEAT_DESCRIPTION"] == EpcHeatingSystems.boiler_radiators_mains_gas.value) & ( - epcs["LODGEMENT_DATE"] > "2025-07-01") - ]["MAINHEAT_ENERGY_EFF"].value_counts() + (epcs["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas") +]["MAINHEAT_ENERGY_EFF"].value_counts() + +z = data[data["Address 1"].str.lower().str.contains("133, mayo")] # ------------ Fuel ------------ From 4d2275fd1208a6309fb3fafc47be7cbd12c7aac7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 4 Feb 2026 10:56:37 +0000 Subject: [PATCH 22/33] set up basic mapping --- backend/onboarders/epc/placeholder.py | 36 +++ backend/onboarders/parity.py | 339 +++++++++++++++++--------- 2 files changed, 266 insertions(+), 109 deletions(-) diff --git a/backend/onboarders/epc/placeholder.py b/backend/onboarders/epc/placeholder.py index 70a4f9f9..64807c49 100644 --- a/backend/onboarders/epc/placeholder.py +++ b/backend/onboarders/epc/placeholder.py @@ -6,6 +6,8 @@ class EpcFuel(Enum): lpg_not_community = "LPG (not community)" mains_gas_not_community = "mains gas (not community)" oil_not_community = "oil (not community)" + manufactured_smokeless_fuel = "Solid fuel: manufactured smokeless fuel" + smokeless_coal = "smokeless coal" class EpcHeatingControls(Enum): @@ -13,9 +15,43 @@ class EpcHeatingControls(Enum): programmers_trvs_bypass = "Programmer, TRVs and bypass" time_and_temperature_zone_control = "Time and temperature zone control" + # Room heaters + programmer_and_appliance_thermostats = "Programmer and appliance thermostats" + appliance_thermostats = "Appliance thermostats" + + # Storage heaters + automatic_charge_control = "Automatic charge control" + manual_charge_control = "Manual charge control" + + # Warm air + programmer_and_atleast_two_room_thermostats = "Programmer and at least two room thermostats" + class EpcHeatingSystems(Enum): + # boiler and radiators boiler_and_radiators_electric = "Boiler and radiators, electric" boiler_and_radiators_lpg = "Boiler and radiators, LPG" boiler_radiators_mains_gas = "Boiler and radiators, mains gas" boiler_radiators_oil = "Boiler and radiators, oil" + # underfloor + electric_underfloor_heating = "Electric underfloor heating" + # ashp + air_to_air_ashp = "Air source heat pump, warm air, electric" + ashp_radiators_electric = "Air source heat pump, radiators, electric" + # Room heaters + room_heaters_electric = "Room heaters, electric" + room_heaters_mains_gas = "Room heaters, mains gas" + room_heaters_smokeless_fuel = "Room heaters, smokeless fuel" + room_heaters_coal = "Room heaters, coal" + # Storage heaters + electric_storage_heaters = "Electric storage heaters" + # Warm air + warm_air_electricaire = "Warm air, Electricaire" + warm_air_mains_gas = "Warm air, mains gas" + + +class EpcHotWaterSystems(Enum): + # from primary heating system + from_main_system = "From main system" + # Common for heater-based systems, e.g. room heaters or storage heaters + electric_immersion_off_peak = "Electric immersion, off-peak" diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 438ce215..88e548c2 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -7,7 +7,7 @@ from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions -from backend.onboarders.epc.placeholder import EpcFuel, EpcHeatingControls, EpcHeatingSystems +from backend.onboarders.epc.placeholder import EpcFuel, EpcHeatingControls, EpcHeatingSystems, EpcHotWaterSystems from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \ @@ -460,9 +460,8 @@ data[["landlord_windows_description", # 3) landlord_multi_glaze_proportion - maybe don't need to store this, same for glazing type and area -# ------------ Heating ------------ +# ------------ Heating, fuel, controls & hot water ------------ -agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy']).size().reset_index(name='counts') # We map to: # 1) Heating description @@ -470,6 +469,8 @@ agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequ # 3) Fuel type # 4) Heating controls # 5) Heating controls efficiency +# 6) Hot water system +# 7) Hot water efficiency # TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating # For sub optimal heating controls, we're going to make an assumption as to what the heating controls are @@ -490,229 +491,349 @@ heating_map = { # 0 ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 1 ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 2 ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 3 ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 4 ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 5 ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 6 ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 7 ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 8 ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 9 ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 10 ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 11 ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 12 ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 13 ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 14 ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 15 ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 16 ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 17 ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), - # 18 - oil boilers have an average efficiency rating ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD - ), - # 18 - ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 19 - ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): ( + ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 20 - ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 21 - ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): ( + ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 22 - ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): ( + ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 23 - ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 24 - ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), # 25 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 26 ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 27 ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 28 ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 29 ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 30 ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 31 ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 32 ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 33 ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), + # 34 ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE ), - ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): None, - ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): None, - ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): None, - ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): None, - ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): None, - ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): None, - ('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None, - ('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): None, - ('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None, - ('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): None, - ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None, - ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): None, - ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None + # 35 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 36 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 37 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 38 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 39 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 40 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 41 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 42 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 43 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 44 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 45 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 46 + ('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 47 - water done from here + ('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ) } -example = list(heating_map.keys())[25] - -example_data = data[ - (data["Heating"] == example[0]) & (data["Boiler Efficiency"] == example[1]) & (data["Main Fuel"] == example[2]) & ( - data["Controls Adequacy"] == example[3]) +# Apply the mapping +data[ + [ + "landlord_heating_description", + "landlord_heating_efficiency", + "landlord_fuel_type", + "landlord_heating_controls_description", + "landlord_heating_controls_efficiency", + "landlord_hot_water_system_description", + "landlord_hot_water_efficiency" ] - -print(example_data["UPRN"].values.tolist()) - -agg_tuples = { - (row['Heating'], row['Boiler Efficiency'], row['Main Fuel']): None for _, row in agg.iterrows() -} -epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv") - -epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts() -epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts() - -epcs[ - (epcs["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas") -]["MAINHEAT_ENERGY_EFF"].value_counts() - -z = data[data["Address 1"].str.lower().str.contains("133, mayo")] - -# ------------ Fuel ------------ - -# ------------ Heating Controls ------------ +] = data[ + [ + "Heating", + "Boiler Efficiency", + "Main Fuel", + "Controls Adequacy" + ] +].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series) # ------------ Floor Area ------------ -# TODO: Convert everything to values - -# Variables we want to map -# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', -# 'Glazing', 'Heating', -# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', -# 'Total Floor Area (m2)' - - -data["Glazing"].value_counts() - -data["Glazing"].value_counts() +# This is just a rename +data = data.rename( + columns={"Total Floor Area (m2)": "landlord_total_floor_area_m2"} +) From f4f9fc5b199d1e3c46609a82d3e7c90a5ff726af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 4 Feb 2026 18:34:59 +0000 Subject: [PATCH 23/33] beginning to assembly the parity class --- backend/onboarders/base.py | 29 ++- backend/onboarders/epc/placeholder.py | 57 ----- backend/onboarders/epc_descriptions.py | 216 +------------------ backend/onboarders/mappings/built_form.py | 23 +- backend/onboarders/mappings/property_type.py | 10 +- backend/onboarders/parity.py | 58 +++-- datatypes/epc/__init__.py | 0 datatypes/epc/construction_age_band.py | 45 ++++ datatypes/epc/efficiency.py | 10 + datatypes/epc/fuel.py | 10 + datatypes/epc/heating_controls.py | 18 ++ datatypes/epc/hotwater.py | 8 + datatypes/epc/main_heating.py | 24 +++ datatypes/epc/property_type_built_form.py | 17 ++ datatypes/epc/roof.py | 86 ++++++++ datatypes/epc/walls.py | 74 +++++++ 16 files changed, 374 insertions(+), 311 deletions(-) delete mode 100644 backend/onboarders/epc/placeholder.py create mode 100644 datatypes/epc/__init__.py create mode 100644 datatypes/epc/construction_age_band.py create mode 100644 datatypes/epc/efficiency.py create mode 100644 datatypes/epc/fuel.py create mode 100644 datatypes/epc/heating_controls.py create mode 100644 datatypes/epc/hotwater.py create mode 100644 datatypes/epc/main_heating.py create mode 100644 datatypes/epc/property_type_built_form.py create mode 100644 datatypes/epc/roof.py create mode 100644 datatypes/epc/walls.py diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index 12ef9c94..258784f1 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -1,7 +1,30 @@ -class OnboarderBase: +import pandas as pd +from utils.s3 import read_from_s3 - def read(self): - pass + +class OnboarderBase: + data: pd.DataFrame | None = None + + def read_s3(self, bucket_name: str, file_name: str): + self.data = read_from_s3(bucket_name=bucket_name, s3_file_name=file_name) def write(self): pass + + @staticmethod + def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool: + # We only allow nulls if the original value was null + null_vals = data[pd.isnull(data[mapped_column])] + if null_vals.empty: + return True + # We make sure all original values were null + assert pd.isnull(null_vals[original_column]).all(), ( + f"Some values in {mapped_column} were not mapped, but original values were not null" + ) + + @staticmethod + def assert_no_nulls(data: pd.DataFrame, column: str): + assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not" + + def map_construction_age_band(self): + pass diff --git a/backend/onboarders/epc/placeholder.py b/backend/onboarders/epc/placeholder.py deleted file mode 100644 index 64807c49..00000000 --- a/backend/onboarders/epc/placeholder.py +++ /dev/null @@ -1,57 +0,0 @@ -from enum import Enum - - -class EpcFuel(Enum): - electricity_not_community = "electricity (not community)" - lpg_not_community = "LPG (not community)" - mains_gas_not_community = "mains gas (not community)" - oil_not_community = "oil (not community)" - manufactured_smokeless_fuel = "Solid fuel: manufactured smokeless fuel" - smokeless_coal = "smokeless coal" - - -class EpcHeatingControls(Enum): - programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs" - programmers_trvs_bypass = "Programmer, TRVs and bypass" - time_and_temperature_zone_control = "Time and temperature zone control" - - # Room heaters - programmer_and_appliance_thermostats = "Programmer and appliance thermostats" - appliance_thermostats = "Appliance thermostats" - - # Storage heaters - automatic_charge_control = "Automatic charge control" - manual_charge_control = "Manual charge control" - - # Warm air - programmer_and_atleast_two_room_thermostats = "Programmer and at least two room thermostats" - - -class EpcHeatingSystems(Enum): - # boiler and radiators - boiler_and_radiators_electric = "Boiler and radiators, electric" - boiler_and_radiators_lpg = "Boiler and radiators, LPG" - boiler_radiators_mains_gas = "Boiler and radiators, mains gas" - boiler_radiators_oil = "Boiler and radiators, oil" - # underfloor - electric_underfloor_heating = "Electric underfloor heating" - # ashp - air_to_air_ashp = "Air source heat pump, warm air, electric" - ashp_radiators_electric = "Air source heat pump, radiators, electric" - # Room heaters - room_heaters_electric = "Room heaters, electric" - room_heaters_mains_gas = "Room heaters, mains gas" - room_heaters_smokeless_fuel = "Room heaters, smokeless fuel" - room_heaters_coal = "Room heaters, coal" - # Storage heaters - electric_storage_heaters = "Electric storage heaters" - # Warm air - warm_air_electricaire = "Warm air, Electricaire" - warm_air_mains_gas = "Warm air, mains gas" - - -class EpcHotWaterSystems(Enum): - # from primary heating system - from_main_system = "From main system" - # Common for heater-based systems, e.g. room heaters or storage heaters - electric_immersion_off_peak = "Electric immersion, off-peak" diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index 57b4ab89..bfe6b07f 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -1,215 +1,11 @@ import pandas as pd -import re -from collections.abc import Mapping from enum import Enum -from typing import Callable, Union, List - - -class EpcConstructionAgeBand(Enum): - before_1900: str = 'England and Wales: before 1900' - from_1900_to_1929: str = 'England and Wales: 1900-1929' - from_1930_to_1949: str = 'England and Wales: 1930-1949' - from_1950_to_1966: str = 'England and Wales: 1950-1966' - from_1967_to_1975: str = 'England and Wales: 1967-1975' - from_1976_to_1982: str = 'England and Wales: 1976-1982' - from_1983_to_1990: str = 'England and Wales: 1983-1990' - from_1991_to_1995: str = 'England and Wales: 1991-1995' - from_1996_to_2002: str = 'England and Wales: 1996-2002' - from_2003_to_2006: str = 'England and Wales: 2003-2006' - from_2007_to_2011: str = 'England and Wales: 2007-2011' - from_2012_onwards: str = 'England and Wales: 2012-onwards' - from_2012_to_2022: str = 'England and Wales: 2012-2022' - from_2023_onwards: str = 'England and Wales: 2023 onwards' - - def start_year(self) -> int: - """ - Extract the starting year of the age band. - """ - value = self.value.lower() - - if 'before' in value: - return 0 - match = re.search(r'(\d{4})', value) - if not match: - raise ValueError(f"Cannot determine start year from '{self.value}'") - - return int(match.group(1)) - - @classmethod - def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]: - """ - Return all age bands whose starting year is >= the given year. - """ - return [ - band - for band in cls - if band.start_year() >= year - ] - - -class EpcWallDescriptions(Enum): - # Cavity wall descriptions - cavity_insulated_assumed: str = "Cavity wall, as built, insulated (assumed)" - cavity_partial_insulated_assumed: str = "Cavity wall, as built, partial insulation (assumed)" - cavity_no_insulation_assumed: str = "Cavity wall, as built, no insulation (assumed)" - cavity_filled_cavity: str = "Cavity wall, filled cavity" - cavity_internal_insulation: str = "Cavity wall, with internal insulation" - cavity_external_insulation: str = "Cavity wall, with external insulation" - cavity_filled_plus_internal: str = "Cavity wall, filled cavity and internal insulation" - cavity_filled_plus_external: str = "Cavity wall, filled cavity and external insulation" - - # Solid wall descriptions - solid_brick_internal_insulation: str = "Solid brick, with internal insulation" - solid_brick_external_insulation: str = "Solid brick, with external insulation" - solid_brick_no_insulation_assumed: str = 'Solid brick, as built, no insulation (assumed)' - solid_brick_partial_insulated_assumed: str = 'Solid brick, as built, partial insulation (assumed)' - solid_brick_insulated_assumed: str = 'Solid brick, as built, insulated (assumed)' - - # System - system_external_insulation: str = "System built, with external insulation" - system_internal_insulation: str = "System built, with internal insulation" - system_no_insulation_assumed: str = "System built, as built, no insulation (assumed)" - system_partial_insulated_assumed: str = "System built, as built, partial insulation (assumed)" - system_insulated_assumed: str = "System built, as built, insulated (assumed)" - - # Timber - timber_frame_internal_insulation: str = "Timber frame, with internal insulation" - timber_frame_external_insulation: str = "Timber frame, with external insulation" - timber_frame_no_insulation_assumed: str = "Timber frame, as built, no insulation (assumed)" - timber_frame_partial_insulated_assumed: str = "Timber frame, as built, partial insulation (assumed)" - timber_frame_insulated_assumed: str = "Timber frame, as built, insulated (assumed)" - - # Granite/whinstone - granite_whinstone_external_insulation: str = "Granite or whin, with external insulation" - granite_whinstone_internal_insulation: str = "Granite or whin, with internal insulation" - granite_whinstone_no_insulation_assumed: str = "Granite or whin, as built, no insulation (assumed)" - granite_whinstone_partial_insulated_assumed: str = "Granite or whin, as built, partial insulation (assumed)" - granite_whinestone_insulated_assumed: str = "Granite or whin, as built, insulated (assumed)" - - # Sandstone/limestone - sandstone_limestone_internal_insulation: str = "Sandstone, with internal insulation" - sandstone_limestone_external_insulation: str = "Sandstone, with external insulation" - sandstone_limestone_no_insulation_assumed: str = "Sandstone, as built, no insulation (assumed)" - sandstone_limestone_partial_insulated_assumed: str = "Sandstone, as built, partial insulation (assumed)" - sandstone_limestone_insulated_assumed: str = "Sandstone, as built, insulated (assumed)" - - # Cob - cob_as_built_average = "Cob, as built" - cob_as_built_good = "Cob, as built" - - # unknown descriptions which may get mapped later or handled via fallback - cavity_as_built_unknown = "Cavity wall, as built, unknown insulation" - solid_brick_as_built_unknown = "Solid brick, as built, unknown insulation" - system_as_built_unknown = "System built, as built, unknown insulation" - timber_frame_as_built_unknown = "Timber frame, as built, unknown insulation" - granite_as_built_unknown = "Granite or whin, as built, unknown insulation" - sandstone_as_built_unknown = "Sandstone, as built, unknown insulation" - cob_as_built_unknown = "Cob, as built, unknown insulation" - - @property - def unknown_descriptions(self) -> List["EpcWallDescriptions"]: - return [ - EpcWallDescriptions.cavity_as_built_unknown, - EpcWallDescriptions.solid_brick_as_built_unknown, - EpcWallDescriptions.system_as_built_unknown, - EpcWallDescriptions.timber_frame_as_built_unknown, - EpcWallDescriptions.granite_as_built_unknown, - EpcWallDescriptions.sandstone_as_built_unknown, - EpcWallDescriptions.cob_as_built_unknown, - ] - - -class EpcRoofDescriptions(Enum): - # Loft - # Assumed options - pitched_insulated_assumed = "Pitched, insulated (assumed)" - pitched_no_insulation = "Pitched, no insulation" - # Insulation thickness options - loft_12mm_insulation: str = "Pitched, 12 mm loft insulation" - loft_25mm_insulation: str = "Pitched, 25 mm loft insulation" - loft_50mm_insulation: str = "Pitched, 50 mm loft insulation" - loft_75mm_insulation: str = "Pitched, 75 mm loft insulation" - loft_100mm_insulation: str = "Pitched, 100 mm loft insulation" - loft_125mm_insulation: str = "Pitched, 125 mm loft insulation" - loft_150mm_insulation: str = "Pitched, 150 mm loft insulation" - loft_175mm_insulation: str = "Pitched, 175 mm loft insulation" - loft_200mm_insulation: str = "Pitched, 200 mm loft insulation" - loft_250mm_insulation: str = "Pitched, 250 mm loft insulation" - loft_270mm_insulation: str = "Pitched, 270 mm loft insulation" - loft_300mm_insulation: str = "Pitched, 300 mm loft insulation" - loft_350mm_insulation: str = "Pitched, 350 mm loft insulation" - loft_400mm_plus_insulation: str = "Pitched, 400+ mm loft insulation" - # Insulated at rafters "Pitched, insulated at rafters" - # Rafters - # 400mm, 350mm = very good - # 200-300mm = good - # 125-175 = average - # 50-100 = poor - # 25 and below= very poor - loft_insulated_at_rafters: str = "Pitched, insulated at rafters" - # another dwelling above - another_dwelling_above: str = "(another dwelling above)" - # flat roof, which if there is observed insulation is just "flat, insulated", however there is a - # different efficiency rating depending on insulation thickness - # categories: - # 12mm = very poor & has limited insulation description - # 25, 50 = poor & has limited insulation description - # 75, 100, 125mm = average (Flat, insulated) - # 150, 175, 200, 225, 250mm = good (Flat, insulated) - # 270mm+ = very good (Flat, insulated) - # As built 2023 = Flat, insulated, Very good - # 2003 - 2006, up to 2012-2022 = Flat insulated, Good - # 1983-1990, 1996-2002 = Flat, insulated, Average - # 1976-1982 = Flat, limited insulation, poor - # 1967 - 1975 = Flat, limited insulation, Very Poor - # 1950-1966 and earlier bands = flat, no insulation, very poor - - flat_insulated = "Flat, insulated" - flat_limited_insulation = "Flat, limited insulation" - flat_no_insulation = "Flat, no insulation" - - # Thatched roof descriptions - # With Loft insulation at joists - # Thatched + 12mm = thatched, with additional insulation, average - # Thatched + 25, 50, 100, 150mm = thatched, with additional insulation, good - # Thatched + 175mm+ = thatched, with additional insulation, very good - # With loft insulation at rafters [out of scope atm] - # Unknown insulation - # Pre 1900, 1930-1949, 1967-1975, 1983-1990, 1996-2002 = "Thatched", Average - # 2003-2006, 2012-2022 = "Thatched", Good - # 2023 onwards = "Thatched", Very Good - thatched = "Thatched" # We see this for no insulation, has average performance - thatched_with_additional_insulation: str = "Thatched, with additional insulation" - - # Sloping ceiling - # For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped - sloping_pitched_no_insulation: str = "Pitched, no insulation" - sloping_pitched_limited_insulation: str = "Pitched, limited insulation" - sloping_pitched_insulated: str = "Pitched, insulated" - - # Unknown descriptions which may get mapped later or handled via fallback - flat_as_built_unknown: str = "Flat, as built, unknown insulation" - loft_as_built_unknown: str = "Loft, as built, unknown insulation" - thatched_as_built_unknown: str = "Thatched, as built, unknown insulation" - sloping_pitched_as_built_unknown: str = "Pitched, as built, unknown insulation" - - @property - def unknown_descriptions(self) -> List["EpcRoofDescriptions"]: - return [ - EpcRoofDescriptions.flat_as_built_unknown, - EpcRoofDescriptions.loft_as_built_unknown, - EpcRoofDescriptions.thatched_as_built_unknown, - EpcRoofDescriptions.sloping_pitched_as_built_unknown, - ] - - -class EpcEfficiency(Enum): - VERY_POOR = "Very Poor" - POOR = "Poor" - AVERAGE = "Average" - GOOD = "Good" - VERY_GOOD = "Very Good" - NA = "N/A" +from collections.abc import Mapping +from typing import Callable, Union +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.roof import EpcRoofDescriptions def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: diff --git a/backend/onboarders/mappings/built_form.py b/backend/onboarders/mappings/built_form.py index 23901fc6..12ae6360 100644 --- a/backend/onboarders/mappings/built_form.py +++ b/backend/onboarders/mappings/built_form.py @@ -1,15 +1,10 @@ -parity_map = { - "MidTerrace": "Mid-Terrace", - "EndTerrace": "End-Terrace", - "Detached": "Detached", - "SemiDetached": "Semi-Detached", - "EnclosedMidTerrace": "Enclosed Mid-Terrace", - "EnclosedEndTerrace": "Enclosed End-Terrace", -} +from datatypes.epc.property_type_built_form import BuiltForm -# MidTerrace 41462 -# EndTerrace 20910 -# Detached 16875 -# SemiDetached 14725 -# EnclosedMidTerrace 3176 -# EnclosedEndTerrace 2393 +parity_map = { + "MidTerrace": BuiltForm.mid_terrace, + "EndTerrace": BuiltForm.end_terrace, + "Detached": BuiltForm.detached, + "SemiDetached": BuiltForm.semi_detached, + "EnclosedMidTerrace": BuiltForm.enclosed_mid_terrace, + "EnclosedEndTerrace": BuiltForm.enclosed_end_terrace, +} diff --git a/backend/onboarders/mappings/property_type.py b/backend/onboarders/mappings/property_type.py index 75deef04..f91c0c88 100644 --- a/backend/onboarders/mappings/property_type.py +++ b/backend/onboarders/mappings/property_type.py @@ -1,6 +1,8 @@ +from datatypes.epc.property_type_built_form import PropertyType + parity_map = { - "Flat": "Flat", - "Maisonette": "Maisonette", - "Bungalow": "Bungalow", - "House": "House", + "Flat": PropertyType.flat, + "Maisonette": PropertyType.maisonette, + "Bungalow": PropertyType.bungalow, + "House": PropertyType.house, } diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 88e548c2..c1931437 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -2,12 +2,16 @@ import re from numpy import nan from tqdm import tqdm import pandas as pd +from backend.onboarders.base import OnboarderBase from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions -from backend.onboarders.epc.placeholder import EpcFuel, EpcHeatingControls, EpcHeatingSystems, EpcHotWaterSystems +from datatypes.epc.fuel import EpcFuel +from datatypes.epc.heating_controls import EpcHeatingControls +from datatypes.epc.main_heating import EpcHeatingSystems +from datatypes.epc.hotwater import EpcHotWaterSystems from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \ @@ -15,18 +19,6 @@ from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor tqdm.pandas() - -def check_nulls(data, original_column, mapped_column): - # We only allow nulls if the oroginal value was null - null_vals = data[pd.isnull(data[mapped_column])] - if null_vals.empty: - return True - # We make sure all original values were null - assert pd.isnull(null_vals[original_column]).all(), ( - f"Some values in {mapped_column} were not mapped, but original values were not null" - ) - - # Sample input data data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " @@ -34,21 +26,41 @@ data = pd.read_excel( sheet_name="Sustainability" ) + +class ParityOnboarder(OnboarderBase): + + def __init__( + self, + fileuri: str, + ): + # Extract bucket, and filekey; Will be in the format s3://bucket/key + bucket_name = fileuri.split("/")[2] + file_name = "/".join(fileuri.split("/")[3:]) + + self.read_s3(bucket_name=bucket_name, file_name=file_name) + pass + + def map_construction_age_band(self): + data["construction_age_band"] = data["Construction Years"].map(age_band_map) + self.assert_nulls_only_from_source_nulls(data, "Construction Years", "construction_age_band") + + def map_property_type(self): + data["property_type"] = data["Type"].map(property_map) + self.assert_no_nulls(data, "property_type") + + def process(self): + # ------------ construction_age_band ------------ + self.map_construction_age_band() + + # ------------ property_type ------------ + self.map_property_type() + + # We want to map the parity fields to standard EPC references. This will allow us to # 1) Estimate EPCs, more accurately # 2) Patch incorrect EPCs with ease # 3) Indicate already installed measures -# ------------ construction_age_band ------------ - -data["construction_age_band"] = data["Construction Years"].map(age_band_map) - -check_nulls(data, "Construction Years", "construction_age_band") - -# ------------ property_type ------------ -data["property_type"] = data["Type"].map(property_map) - -assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" # ------------ built_form ------------ data["built_form"] = data["Attachment"].map(built_form_map) diff --git a/datatypes/epc/__init__.py b/datatypes/epc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/datatypes/epc/construction_age_band.py b/datatypes/epc/construction_age_band.py new file mode 100644 index 00000000..c5e7a03b --- /dev/null +++ b/datatypes/epc/construction_age_band.py @@ -0,0 +1,45 @@ +import re +from enum import Enum +from typing import List + + +class EpcConstructionAgeBand(Enum): + before_1900: str = 'England and Wales: before 1900' + from_1900_to_1929: str = 'England and Wales: 1900-1929' + from_1930_to_1949: str = 'England and Wales: 1930-1949' + from_1950_to_1966: str = 'England and Wales: 1950-1966' + from_1967_to_1975: str = 'England and Wales: 1967-1975' + from_1976_to_1982: str = 'England and Wales: 1976-1982' + from_1983_to_1990: str = 'England and Wales: 1983-1990' + from_1991_to_1995: str = 'England and Wales: 1991-1995' + from_1996_to_2002: str = 'England and Wales: 1996-2002' + from_2003_to_2006: str = 'England and Wales: 2003-2006' + from_2007_to_2011: str = 'England and Wales: 2007-2011' + from_2012_onwards: str = 'England and Wales: 2012-onwards' + from_2012_to_2022: str = 'England and Wales: 2012-2022' + from_2023_onwards: str = 'England and Wales: 2023 onwards' + + def start_year(self) -> int: + """ + Extract the starting year of the age band. + """ + value = self.value.lower() + + if 'before' in value: + return 0 + match = re.search(r'(\d{4})', value) + if not match: + raise ValueError(f"Cannot determine start year from '{self.value}'") + + return int(match.group(1)) + + @classmethod + def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]: + """ + Return all age bands whose starting year is >= the given year. + """ + return [ + band + for band in cls + if band.start_year() >= year + ] diff --git a/datatypes/epc/efficiency.py b/datatypes/epc/efficiency.py new file mode 100644 index 00000000..0417f49e --- /dev/null +++ b/datatypes/epc/efficiency.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class EpcEfficiency(Enum): + VERY_POOR: str = "Very Poor" + POOR: str = "Poor" + AVERAGE: str = "Average" + GOOD: str = "Good" + VERY_GOOD: str = "Very Good" + NA: str = "N/A" diff --git a/datatypes/epc/fuel.py b/datatypes/epc/fuel.py new file mode 100644 index 00000000..0d1e455c --- /dev/null +++ b/datatypes/epc/fuel.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class EpcFuel(Enum): + electricity_not_community = "electricity (not community)" + lpg_not_community = "LPG (not community)" + mains_gas_not_community = "mains gas (not community)" + oil_not_community = "oil (not community)" + manufactured_smokeless_fuel = "Solid fuel: manufactured smokeless fuel" + smokeless_coal = "smokeless coal" diff --git a/datatypes/epc/heating_controls.py b/datatypes/epc/heating_controls.py new file mode 100644 index 00000000..48538bff --- /dev/null +++ b/datatypes/epc/heating_controls.py @@ -0,0 +1,18 @@ +from enum import Enum + + +class EpcHeatingControls(Enum): + programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs" + programmers_trvs_bypass = "Programmer, TRVs and bypass" + time_and_temperature_zone_control = "Time and temperature zone control" + + # Room heaters + programmer_and_appliance_thermostats = "Programmer and appliance thermostats" + appliance_thermostats = "Appliance thermostats" + + # Storage heaters + automatic_charge_control = "Automatic charge control" + manual_charge_control = "Manual charge control" + + # Warm air + programmer_and_atleast_two_room_thermostats = "Programmer and at least two room thermostats" diff --git a/datatypes/epc/hotwater.py b/datatypes/epc/hotwater.py new file mode 100644 index 00000000..96af2be3 --- /dev/null +++ b/datatypes/epc/hotwater.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class EpcHotWaterSystems(Enum): + # from primary heating system + from_main_system = "From main system" + # Common for heater-based systems, e.g. room heaters or storage heaters + electric_immersion_off_peak = "Electric immersion, off-peak" diff --git a/datatypes/epc/main_heating.py b/datatypes/epc/main_heating.py new file mode 100644 index 00000000..663ada99 --- /dev/null +++ b/datatypes/epc/main_heating.py @@ -0,0 +1,24 @@ +from enum import Enum + + +class EpcHeatingSystems(Enum): + # boiler and radiators + boiler_and_radiators_electric = "Boiler and radiators, electric" + boiler_and_radiators_lpg = "Boiler and radiators, LPG" + boiler_radiators_mains_gas = "Boiler and radiators, mains gas" + boiler_radiators_oil = "Boiler and radiators, oil" + # underfloor + electric_underfloor_heating = "Electric underfloor heating" + # ashp + air_to_air_ashp = "Air source heat pump, warm air, electric" + ashp_radiators_electric = "Air source heat pump, radiators, electric" + # Room heaters + room_heaters_electric = "Room heaters, electric" + room_heaters_mains_gas = "Room heaters, mains gas" + room_heaters_smokeless_fuel = "Room heaters, smokeless fuel" + room_heaters_coal = "Room heaters, coal" + # Storage heaters + electric_storage_heaters = "Electric storage heaters" + # Warm air + warm_air_electricaire = "Warm air, Electricaire" + warm_air_mains_gas = "Warm air, mains gas" diff --git a/datatypes/epc/property_type_built_form.py b/datatypes/epc/property_type_built_form.py new file mode 100644 index 00000000..2fd59ddf --- /dev/null +++ b/datatypes/epc/property_type_built_form.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class PropertyType(Enum): + flat = "Flat" + maisonette = "Maisonette" + bungalow = "Bungalow" + house = "House" + + +class BuiltForm(Enum): + mid_terrace = "Mid-Terrace" + end_terrace = "End-Terrace" + detached = "Detached" + semi_detached = "Semi-Detached" + enclosed_mid_terrace = "Enclosed Mid-Terrace" + enclosed_end_terrace = "Enclosed End-Terrace" diff --git a/datatypes/epc/roof.py b/datatypes/epc/roof.py new file mode 100644 index 00000000..9cdaac96 --- /dev/null +++ b/datatypes/epc/roof.py @@ -0,0 +1,86 @@ +from enum import Enum +from typing import List + + +class EpcRoofDescriptions(Enum): + # Loft + # Assumed options + pitched_insulated_assumed: str = "Pitched, insulated (assumed)" + pitched_no_insulation: str = "Pitched, no insulation" + # Insulation thickness options + loft_12mm_insulation: str = "Pitched, 12 mm loft insulation" + loft_25mm_insulation: str = "Pitched, 25 mm loft insulation" + loft_50mm_insulation: str = "Pitched, 50 mm loft insulation" + loft_75mm_insulation: str = "Pitched, 75 mm loft insulation" + loft_100mm_insulation: str = "Pitched, 100 mm loft insulation" + loft_125mm_insulation: str = "Pitched, 125 mm loft insulation" + loft_150mm_insulation: str = "Pitched, 150 mm loft insulation" + loft_175mm_insulation: str = "Pitched, 175 mm loft insulation" + loft_200mm_insulation: str = "Pitched, 200 mm loft insulation" + loft_250mm_insulation: str = "Pitched, 250 mm loft insulation" + loft_270mm_insulation: str = "Pitched, 270 mm loft insulation" + loft_300mm_insulation: str = "Pitched, 300 mm loft insulation" + loft_350mm_insulation: str = "Pitched, 350 mm loft insulation" + loft_400mm_plus_insulation: str = "Pitched, 400+ mm loft insulation" + # Insulated at rafters "Pitched, insulated at rafters" + # Rafters + # 400mm, 350mm = very good + # 200-300mm = good + # 125-175 = average + # 50-100 = poor + # 25 and below= very poor + loft_insulated_at_rafters: str = "Pitched, insulated at rafters" + # another dwelling above + another_dwelling_above: str = "(another dwelling above)" + # flat roof, which if there is observed insulation is just "flat, insulated", however there is a + # different efficiency rating depending on insulation thickness + # categories: + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + # As built 2023 = Flat, insulated, Very good + # 2003 - 2006, up to 2012-2022 = Flat insulated, Good + # 1983-1990, 1996-2002 = Flat, insulated, Average + # 1976-1982 = Flat, limited insulation, poor + # 1967 - 1975 = Flat, limited insulation, Very Poor + # 1950-1966 and earlier bands = flat, no insulation, very poor + + flat_insulated: str = "Flat, insulated" + flat_limited_insulation: str = "Flat, limited insulation" + flat_no_insulation: str = "Flat, no insulation" + + # Thatched roof descriptions + # With Loft insulation at joists + # Thatched + 12mm = thatched, with additional insulation, average + # Thatched + 25, 50, 100, 150mm = thatched, with additional insulation, good + # Thatched + 175mm+ = thatched, with additional insulation, very good + # With loft insulation at rafters [out of scope atm] + # Unknown insulation + # Pre 1900, 1930-1949, 1967-1975, 1983-1990, 1996-2002 = "Thatched", Average + # 2003-2006, 2012-2022 = "Thatched", Good + # 2023 onwards = "Thatched", Very Good + thatched: str = "Thatched" # We see this for no insulation, has average performance + thatched_with_additional_insulation: str = "Thatched, with additional insulation" + + # Sloping ceiling + # For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped + sloping_pitched_no_insulation: str = "Pitched, no insulation" + sloping_pitched_limited_insulation: str = "Pitched, limited insulation" + sloping_pitched_insulated: str = "Pitched, insulated" + + # Unknown descriptions which may get mapped later or handled via fallback + flat_as_built_unknown: str = "Flat, as built, unknown insulation" + loft_as_built_unknown: str = "Loft, as built, unknown insulation" + thatched_as_built_unknown: str = "Thatched, as built, unknown insulation" + sloping_pitched_as_built_unknown: str = "Pitched, as built, unknown insulation" + + @property + def unknown_descriptions(self) -> List["EpcRoofDescriptions"]: + return [ + EpcRoofDescriptions.flat_as_built_unknown, + EpcRoofDescriptions.loft_as_built_unknown, + EpcRoofDescriptions.thatched_as_built_unknown, + EpcRoofDescriptions.sloping_pitched_as_built_unknown, + ] diff --git a/datatypes/epc/walls.py b/datatypes/epc/walls.py new file mode 100644 index 00000000..44ca7e49 --- /dev/null +++ b/datatypes/epc/walls.py @@ -0,0 +1,74 @@ +from enum import Enum +from typing import List + + +class EpcWallDescriptions(Enum): + # Cavity wall descriptions + cavity_insulated_assumed: str = "Cavity wall, as built, insulated (assumed)" + cavity_partial_insulated_assumed: str = "Cavity wall, as built, partial insulation (assumed)" + cavity_no_insulation_assumed: str = "Cavity wall, as built, no insulation (assumed)" + cavity_filled_cavity: str = "Cavity wall, filled cavity" + cavity_internal_insulation: str = "Cavity wall, with internal insulation" + cavity_external_insulation: str = "Cavity wall, with external insulation" + cavity_filled_plus_internal: str = "Cavity wall, filled cavity and internal insulation" + cavity_filled_plus_external: str = "Cavity wall, filled cavity and external insulation" + + # Solid wall descriptions + solid_brick_internal_insulation: str = "Solid brick, with internal insulation" + solid_brick_external_insulation: str = "Solid brick, with external insulation" + solid_brick_no_insulation_assumed: str = 'Solid brick, as built, no insulation (assumed)' + solid_brick_partial_insulated_assumed: str = 'Solid brick, as built, partial insulation (assumed)' + solid_brick_insulated_assumed: str = 'Solid brick, as built, insulated (assumed)' + + # System + system_external_insulation: str = "System built, with external insulation" + system_internal_insulation: str = "System built, with internal insulation" + system_no_insulation_assumed: str = "System built, as built, no insulation (assumed)" + system_partial_insulated_assumed: str = "System built, as built, partial insulation (assumed)" + system_insulated_assumed: str = "System built, as built, insulated (assumed)" + + # Timber + timber_frame_internal_insulation: str = "Timber frame, with internal insulation" + timber_frame_external_insulation: str = "Timber frame, with external insulation" + timber_frame_no_insulation_assumed: str = "Timber frame, as built, no insulation (assumed)" + timber_frame_partial_insulated_assumed: str = "Timber frame, as built, partial insulation (assumed)" + timber_frame_insulated_assumed: str = "Timber frame, as built, insulated (assumed)" + + # Granite/whinstone + granite_whinstone_external_insulation: str = "Granite or whin, with external insulation" + granite_whinstone_internal_insulation: str = "Granite or whin, with internal insulation" + granite_whinstone_no_insulation_assumed: str = "Granite or whin, as built, no insulation (assumed)" + granite_whinstone_partial_insulated_assumed: str = "Granite or whin, as built, partial insulation (assumed)" + granite_whinestone_insulated_assumed: str = "Granite or whin, as built, insulated (assumed)" + + # Sandstone/limestone + sandstone_limestone_internal_insulation: str = "Sandstone, with internal insulation" + sandstone_limestone_external_insulation: str = "Sandstone, with external insulation" + sandstone_limestone_no_insulation_assumed: str = "Sandstone, as built, no insulation (assumed)" + sandstone_limestone_partial_insulated_assumed: str = "Sandstone, as built, partial insulation (assumed)" + sandstone_limestone_insulated_assumed: str = "Sandstone, as built, insulated (assumed)" + + # Cob + cob_as_built_average: str = "Cob, as built" + cob_as_built_good: str = "Cob, as built" + + # unknown descriptions which may get mapped later or handled via fallback + cavity_as_built_unknown: str = "Cavity wall, as built, unknown insulation" + solid_brick_as_built_unknown: str = "Solid brick, as built, unknown insulation" + system_as_built_unknown: str = "System built, as built, unknown insulation" + timber_frame_as_built_unknown: str = "Timber frame, as built, unknown insulation" + granite_as_built_unknown: str = "Granite or whin, as built, unknown insulation" + sandstone_as_built_unknown: str = "Sandstone, as built, unknown insulation" + cob_as_built_unknown: str = "Cob, as built, unknown insulation" + + @property + def unknown_descriptions(self) -> List["EpcWallDescriptions"]: + return [ + EpcWallDescriptions.cavity_as_built_unknown, + EpcWallDescriptions.solid_brick_as_built_unknown, + EpcWallDescriptions.system_as_built_unknown, + EpcWallDescriptions.timber_frame_as_built_unknown, + EpcWallDescriptions.granite_as_built_unknown, + EpcWallDescriptions.sandstone_as_built_unknown, + EpcWallDescriptions.cob_as_built_unknown, + ] From ae02561cd0e972b687c15677839cdc8f67963ef1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Feb 2026 08:54:27 +0000 Subject: [PATCH 24/33] preparing partiy class --- backend/onboarders/base.py | 36 +- backend/onboarders/epc_descriptions.py | 16 - backend/onboarders/handler.py | 0 .../mappings/{ => parity}/age_band.py | 0 .../as_built_floor_classifiers.py | 0 .../{ => parity}/as_built_roof_classifiers.py | 0 .../{ => parity}/as_built_wall_classifiers.py | 0 .../mappings/{ => parity}/built_form.py | 0 backend/onboarders/mappings/parity/floor.py | 26 + backend/onboarders/mappings/parity/glazing.py | 20 + backend/onboarders/mappings/parity/heating.py | 330 +++++ .../mappings/{ => parity}/property_type.py | 0 backend/onboarders/mappings/parity/roof.py | 103 ++ backend/onboarders/mappings/parity/walls.py | 56 + backend/onboarders/mappings/walls.py | 3 - backend/onboarders/parity.py | 1077 +++++------------ datatypes/epc/floor.py | 17 + 17 files changed, 859 insertions(+), 825 deletions(-) create mode 100644 backend/onboarders/handler.py rename backend/onboarders/mappings/{ => parity}/age_band.py (100%) rename backend/onboarders/mappings/{ => parity}/as_built_floor_classifiers.py (100%) rename backend/onboarders/mappings/{ => parity}/as_built_roof_classifiers.py (100%) rename backend/onboarders/mappings/{ => parity}/as_built_wall_classifiers.py (100%) rename backend/onboarders/mappings/{ => parity}/built_form.py (100%) create mode 100644 backend/onboarders/mappings/parity/floor.py create mode 100644 backend/onboarders/mappings/parity/glazing.py create mode 100644 backend/onboarders/mappings/parity/heating.py rename backend/onboarders/mappings/{ => parity}/property_type.py (100%) create mode 100644 backend/onboarders/mappings/parity/roof.py create mode 100644 backend/onboarders/mappings/parity/walls.py delete mode 100644 backend/onboarders/mappings/walls.py create mode 100644 datatypes/epc/floor.py diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index 258784f1..b90f5fc4 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -3,7 +3,39 @@ from utils.s3 import read_from_s3 class OnboarderBase: + # Input dataset to be transformed data: pd.DataFrame | None = None + # Description columns + landlord_wall_construction: str = "landlord_wall_construction" + landlord_roof_construction: str = "landlord_roof_construction" + landlord_floor_construction: str = "landlord_floor_construction" + landlord_windows_construction: str = "landlord_windows_construction" + landlord_heating_construction: str = "landlord_heating_construction" + landlord_fuel_construction: str = "landlord_fuel_construction" + landlord_heating_controls_construction: str = "landlord_heating_controls_construction" + landlord_hot_water_system_construction: str = "landlord_hot_water_system_construction" + + # Efficiency columns + landlord_roof_efficiency: str = "landlord_roof_efficiency" + landlord_windows_efficiency: str = "landlord_windows_efficiency" + landlord_heating_controls_efficiency: str = "landlord_heating_controls_efficiency" + landlord_heating_efficiency: str = "landlord_heating_efficiency" + landlord_hot_water_efficiency: str = "landlord_hot_water_efficiency" + landlord_wall_efficiency: str = "landlord_wall_efficiency" + + # Additional windows features + landlord_multi_glaze_proportion: str = "landlord_multi_glaze_proportion" + landlord_glazed_type: str = "landlord_glazed_type" + landlord_glazed_area: str = "landlord_glazed_area" + + # Additional roof features + landlord_has_sloping_ceiling: str = "landlord_has_sloping_ceiling" + + # Shape, dimensions, age + landlord_total_floor_area_m2: str = "landlord_total_floor_area_m2" + landlord_construction_age_band: str = "landlord_construction_age_band" + landlord_property_type: str = "landlord_property_type" + landlord_built_form: str = "landlord_built_form" def read_s3(self, bucket_name: str, file_name: str): self.data = read_from_s3(bucket_name=bucket_name, s3_file_name=file_name) @@ -27,4 +59,6 @@ class OnboarderBase: assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not" def map_construction_age_band(self): - pass + raise NotImplementedError( + "This method should be implemented by subclasses to map construction age bands to descriptions" + ) diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py index bfe6b07f..78cc57c1 100644 --- a/backend/onboarders/epc_descriptions.py +++ b/backend/onboarders/epc_descriptions.py @@ -511,19 +511,3 @@ def resolve_roof_efficiency( except TypeError: # Fallback to (age_band) return rule(age_band) - - -class EpcFloorDescriptions(Enum): - # Solid floor - solid_insulated = "Solid, insulated" - solid_insulated_assumed = "Solid, insulated (assumed)" - solid_no_insulation_assumed = "Solid, no insulation (assumed)" - solid_limited_insulation_assumed = "Solid, limited insulation (assumed)" - - # Suspended floor - suspended_insulated = "Suspended, insulated" - suspended_insulated_assumed = "Suspended, insulated (assumed)" - suspended_no_insulation_assumed = "Suspended, no insulation (assumed)" - suspended_limited_insulation_assumed = "Suspended, limited insulation (assumed)" - - unknown = None # We don't resolve anything diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/parity/age_band.py similarity index 100% rename from backend/onboarders/mappings/age_band.py rename to backend/onboarders/mappings/parity/age_band.py diff --git a/backend/onboarders/mappings/as_built_floor_classifiers.py b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py similarity index 100% rename from backend/onboarders/mappings/as_built_floor_classifiers.py rename to backend/onboarders/mappings/parity/as_built_floor_classifiers.py diff --git a/backend/onboarders/mappings/as_built_roof_classifiers.py b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py similarity index 100% rename from backend/onboarders/mappings/as_built_roof_classifiers.py rename to backend/onboarders/mappings/parity/as_built_roof_classifiers.py diff --git a/backend/onboarders/mappings/as_built_wall_classifiers.py b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py similarity index 100% rename from backend/onboarders/mappings/as_built_wall_classifiers.py rename to backend/onboarders/mappings/parity/as_built_wall_classifiers.py diff --git a/backend/onboarders/mappings/built_form.py b/backend/onboarders/mappings/parity/built_form.py similarity index 100% rename from backend/onboarders/mappings/built_form.py rename to backend/onboarders/mappings/parity/built_form.py diff --git a/backend/onboarders/mappings/parity/floor.py b/backend/onboarders/mappings/parity/floor.py new file mode 100644 index 00000000..653d4c68 --- /dev/null +++ b/backend/onboarders/mappings/parity/floor.py @@ -0,0 +1,26 @@ +from numpy import nan +from datatypes.epc.floor import EpcFloorDescriptions + +floor_map = { + # Solid floor + ('Solid', 'AsBuilt'): None, # Mapped + ('Solid', 'Unknown'): None, # Mapped + ('Solid', nan): None, # Mapped + ('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated, + + # Suspended floor + ('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + + # Unknown type - mapped on age + ('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built + ('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted + (nan, nan): None, # No actual information! + ('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built +} diff --git a/backend/onboarders/mappings/parity/glazing.py b/backend/onboarders/mappings/parity/glazing.py new file mode 100644 index 00000000..46c006bd --- /dev/null +++ b/backend/onboarders/mappings/parity/glazing.py @@ -0,0 +1,20 @@ +from datatypes.epc.efficiency import EpcEfficiency + +glazing_map = { + # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area + # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more + "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None), + "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None), + # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022 + # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to + # how we make updates to the windows data. + # Triple known data is high performance glazing with Good efficiency (at least) + "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None), + # This is also classed as high performance glazing + "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), + # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good) + "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None), + "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), +} diff --git a/backend/onboarders/mappings/parity/heating.py b/backend/onboarders/mappings/parity/heating.py new file mode 100644 index 00000000..aa74834b --- /dev/null +++ b/backend/onboarders/mappings/parity/heating.py @@ -0,0 +1,330 @@ +from datatypes.epc.main_heating import EpcHeatingSystems +from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.fuel import EpcFuel +from datatypes.epc.heating_controls import EpcHeatingControls +from datatypes.epc.hotwater import EpcHotWaterSystems + +heating_map = { + # 0 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 1 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 2 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 3 + ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 4 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 5 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 6 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 7 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 8 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 9 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 10 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 11 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 12 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 13 + ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 14 + ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 15 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 16 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 17 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 19 + ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 20 + ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 21 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 22 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 23 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 24 + ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 25 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 26 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 27 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 28 + ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 29 + ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 30 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 31 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 32 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 33 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 34 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 35 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 36 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 37 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 38 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 39 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 40 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 41 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 42 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 43 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 44 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 45 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 46 + ('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 47 - water done from here + ('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ) +} diff --git a/backend/onboarders/mappings/property_type.py b/backend/onboarders/mappings/parity/property_type.py similarity index 100% rename from backend/onboarders/mappings/property_type.py rename to backend/onboarders/mappings/parity/property_type.py diff --git a/backend/onboarders/mappings/parity/roof.py b/backend/onboarders/mappings/parity/roof.py new file mode 100644 index 00000000..14f0c34e --- /dev/null +++ b/backend/onboarders/mappings/parity/roof.py @@ -0,0 +1,103 @@ +from numpy import nan +from datatypes.epc.roof import EpcRoofDescriptions + +roof_map = { + # Dwelling above + ('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + ('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + # Pitched, normal loft access, with a loft thickness + ('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # Pitched, no loft access, with a loft thickness + ('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed + # With access + ('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + # No access + ('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + + # Flat + ('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation, + # Flat - limited insulation + ('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation, + # Flat insulated + ('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated, + # Flat - as built or unknown + ('Flat', 'AsBuilt'): None, # To be classified + ('Flat', nan): None, # To be classified + ('Flat', 'Unknown'): None, # To be classified + + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + + # Thatched + ('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age + + # Sloping: + # Limited (12 very poor, 25-50 poor) + ('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + # Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good) + ('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated, + # As built/unknown + ('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified + ('PitchedWithSlopingCeiling', nan): None, # To be classified + ('PitchedWithSlopingCeiling', 'Unknown'): None, # +} + +roof_unknown_age_fallback = { + "Flat": EpcRoofDescriptions.flat_as_built_unknown, + "PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown, + "PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown, + "PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, + "PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, +} diff --git a/backend/onboarders/mappings/parity/walls.py b/backend/onboarders/mappings/parity/walls.py new file mode 100644 index 00000000..b46559b9 --- /dev/null +++ b/backend/onboarders/mappings/parity/walls.py @@ -0,0 +1,56 @@ +from datatypes.epc.walls import EpcWallDescriptions + +# Unique combinations +wall_map = { + # Cavity walls + ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity, + ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation, + ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation, + ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal, + ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external, + ('Cavity', 'AsBuilt'): None, # To be classified + ('Cavity', 'Unknown'): None, # To be classified + + # System built walls + ('System', 'External'): EpcWallDescriptions.system_external_insulation, + ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation, + ('System', 'AsBuilt'): None, # To be classified + ('System', 'Unknown'): None, + + # Timber Frame walls + ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation, + ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation, + ('Timber Frame', 'AsBuilt'): None, # To be classified + ('Timber Frame', 'Unknown'): None, + + # Solid Brick walls + ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation, + ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation, + ('Solid Brick', 'AsBuilt'): None, # To be classified + ('Solid Brick', 'Unknown'): None, + + # Granite walls + ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation, + ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation, + ('Granite', 'AsBuilt'): None, + ('Granite', 'Unknown'): None, + + # Sandstone walls + ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation, + ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation, + ('Sandstone', 'Unknown'): None, + ('Sandstone', 'AsBuilt'): None, + + # Cob walls + ('Cob', 'AsBuilt'): None, +} + +wall_unknown_age_fallback = { + "Cavity": EpcWallDescriptions.cavity_as_built_unknown, + "Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown, + "Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown, + "System": EpcWallDescriptions.system_as_built_unknown, + "Granite": EpcWallDescriptions.granite_as_built_unknown, + "Sandstone": EpcWallDescriptions.sandstone_as_built_unknown, + "Cob": EpcWallDescriptions.cob_as_built_unknown, +} diff --git a/backend/onboarders/mappings/walls.py b/backend/onboarders/mappings/walls.py deleted file mode 100644 index 9b70b49c..00000000 --- a/backend/onboarders/mappings/walls.py +++ /dev/null @@ -1,3 +0,0 @@ -parity_map = { - -} diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index c1931437..e820f938 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,21 +1,24 @@ import re -from numpy import nan from tqdm import tqdm import pandas as pd from backend.onboarders.base import OnboarderBase -from backend.onboarders.mappings.property_type import parity_map as property_map -from backend.onboarders.mappings.age_band import parity_map as age_band_map -from backend.onboarders.mappings.built_form import parity_map as built_form_map +from backend.onboarders.mappings.parity.property_type import parity_map as property_map +from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map +from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map +from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ - WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions -from datatypes.epc.fuel import EpcFuel -from datatypes.epc.heating_controls import EpcHeatingControls -from datatypes.epc.main_heating import EpcHeatingSystems -from datatypes.epc.hotwater import EpcHotWaterSystems -from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS -from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS -from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \ + WALL_DESCRIPTION_EFFICIENCIES, resolve_roof_efficiency +from backend.onboarders.mappings.parity.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS +from backend.onboarders.mappings.parity.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS +from backend.onboarders.mappings.parity.as_built_floor_classifiers import unknown_floor_as_built, \ + unknown_floor_retrofitted, \ solid_floor_as_built, suspended_floor_as_built +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.floor import EpcFloorDescriptions +from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback +from onboarders.mappings.parity.floor import floor_map +from onboarders.mappings.parity.heating import heating_map +from onboarders.mappings.parity.glazing import glazing_map tqdm.pandas() @@ -41,811 +44,275 @@ class ParityOnboarder(OnboarderBase): pass def map_construction_age_band(self): - data["construction_age_band"] = data["Construction Years"].map(age_band_map) - self.assert_nulls_only_from_source_nulls(data, "Construction Years", "construction_age_band") + self.data[self.landlord_construction_age_band] = self.data["Construction Years"].map(age_band_map) + self.assert_nulls_only_from_source_nulls( + self.data, "Construction Years", self.landlord_construction_age_band + ) def map_property_type(self): - data["property_type"] = data["Type"].map(property_map) - self.assert_no_nulls(data, "property_type") + self.data[self.landlord_property_type] = self.data["Type"].map(property_map) + self.assert_no_nulls(self.data, self.landlord_property_type) - def process(self): + def map_built_form(self): + self.data[self.landlord_built_form] = self.data["Attachment"].map(built_form_map) + self.assert_no_nulls(self.data, self.landlord_built_form) + + @staticmethod + def _fill_as_built(row: pd.Series) -> EpcWallDescriptions | None: + """ + Utility function, used by map_wall_construction in parity transformation module + :param row: row of input sustainability data, being transformed + :return: EpcWallDescriptions, the as built wall description for the input row, based on the wall construction + type and age band + """ + # Already resolved via direct mapping + if row.landlord_wall_description is not None: + return row.landlord_wall_description + + wall_type = row["Wall Construction"] + + # Missing construction age → conservative fallback + if pd.isnull(row.construction_age_band): + return wall_unknown_age_fallback.get(wall_type) + + classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type) + if classifier is None: + return None + + return classifier(row.construction_age_band) + + @staticmethod + def _resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand | None, + ) -> EpcEfficiency: + # Unknown / holding descriptions → efficiency unknown + if "unknown insulation" in description.value.lower(): + return EpcEfficiency.NA + + rule = WALL_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + if isinstance(rule, EpcEfficiency): + return rule + + # Rule needs age band but we don't have one + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + return rule(age_band) + + def map_wall_construction(self): + self.data[self.landlord_wall_construction] = ( + self.data[["Wall Construction", "Wall Insulation"]] + .apply(tuple, axis=1) + .map(wall_map) + ) + + self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_as_built, axis=1) + + # Sanity check + self.assert_no_nulls(self.data, self.landlord_wall_construction) + + self.data[self.landlord_wall_efficiency] = self.data.progress_apply( + lambda row: self._resolve_wall_efficiency( + row.landlord_wall_description, + row.construction_age_band, + ), + axis=1, + ) + # Additional santify check + self.assert_no_nulls(self.data, self.landlord_wall_efficiency) + + @staticmethod + def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None: + # Already resolved + if not pd.isnull(row.landlord_roof_description): + return row.landlord_roof_description + + roof_type = row["Roof Construction"] + + classifier = AS_BUILT_ROOF_CLASSIFIERS.get(roof_type) + if classifier is None: + raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") + + if pd.isnull(row.construction_age_band): + return roof_unknown_age_fallback.get(roof_type) + + output = classifier(row.construction_age_band) + if output is None: + raise NotImplementedError( + f"Roof classification returned None for roof type '{roof_type}'" + ) + + return output + + @staticmethod + def _extract_insulation_thickness(value: str | None) -> int | None: + """ + Extract insulation thickness in mm from a string like 'mm150'. + Returns None if not present or not parseable. + """ + if value is None or pd.isnull(value): + return None + + match = re.search(r"(\d+)", str(value)) + if not match: + return None + + return int(match.group(1)) + + def map_roof_construction(self): + self.data[self.landlord_roof_construction] = ( + self.data[["Roof Construction", "Roof Insulation"]] + .progress_apply(tuple, axis=1) + .map(roof_map) + ) + + self.data[self.landlord_roof_construction] = self.data.progress_apply( + self._fill_roof_as_built, + axis=1, + ) + + # sanity check + self.assert_no_nulls(self.data, self.landlord_roof_construction) + + self.data["roof_insulation_thickness_mm"] = self.data["Roof Insulation"].apply( + self._extract_insulation_thickness + ) + + self.data[self.landlord_roof_efficiency] = self.data.progress_apply( + lambda row: resolve_roof_efficiency( + description=row.landlord_roof_description, + age_band=row.construction_age_band, + insulation_thickness=row.roof_insulation_thickness_mm, + ), + axis=1, + ) + # sanity check + self.assert_no_nulls(self.data, self.landlord_roof_efficiency) + + # Flag sloping ceiling + data[self.landlord_has_sloping_ceiling] = data["Roof Construction"].apply( + lambda x: x == "PitchedWithSlopingCeiling" + ) + + @staticmethod + def _fill_floor_as_built(row: pd.Series): + # 1. Already resolved + if row.landlord_floor_description is not None: + return row.landlord_floor_description + + age_band = row.construction_age_band + floor_type = row["Floor Construction"] + insulation = row["Floor Insulation"] + + # 2. Missing age band → conservative fallback + if pd.isnull(age_band): + return EpcFloorDescriptions.unknown + + # 3. Known floor types + if floor_type == "Solid": + return solid_floor_as_built(age_band) + + if floor_type in {"SuspendedTimber", "SuspendedNotTimber"}: + return suspended_floor_as_built(age_band) + + # 4. Unknown floor type + if floor_type == "Unknown": + if insulation == "RetroFitted": + return unknown_floor_retrofitted(age_band) + return unknown_floor_as_built(age_band) + + # 5. Truly missing / garbage input + return EpcFloorDescriptions.unknown + + def map_floor_construction(self): + self.data[self.landlord_floor_construction] = ( + self.data[["Floor Construction", "Floor Insulation"]] + .progress_apply(tuple, axis=1) + .map(floor_map) + ) + + self.data[self.landlord_floor_construction] = self.data.progress_apply( + self._fill_floor_as_built, + axis=1, + ) + + self.assert_no_nulls(self.data, self.landlord_floor_construction) + + def map_glazing(self): + # TODO: probably doesn't make sense to store multi glazed proportion, glazed type or glazed area. + # There is maybe an argument for landlord_multi_glaze_proportion as this could be variable, + # however + self.data[ + [ + self.landlord_windows_construction, + self.landlord_windows_efficiency, + self.landlord_multi_glaze_proportion, + self.landlord_glazed_type, + self.landlord_glazed_area + ] + ] = self.data["Glazing"].map(glazing_map).progress_apply(pd.Series) + + def map_heating(self): + # TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating + # For sub optimal heating controls, we're going to make an assumption as to what the heating controls are + # and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an + # upper limit + # as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating + # controls. E.g. it may be programmer and room thermostat + self.data[ + [ + self.landlord_heating_construction, + self.landlord_heating_efficiency, + self.landlord_fuel_construction, + self.landlord_heating_controls_construction, + self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system_construction, + self.landlord_hot_water_efficiency + ] + ] = self.data[ + [ + "Heating", + "Boiler Efficiency", + "Main Fuel", + "Controls Adequacy" + ] + ].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series) + + def map_floor_area(self): + # This is just a rename + self.data = self.data.rename( + columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2} + ) + + def transform(self): # ------------ construction_age_band ------------ self.map_construction_age_band() # ------------ property_type ------------ self.map_property_type() + # ------------ built_form ------------ + self.map_built_form() -# We want to map the parity fields to standard EPC references. This will allow us to -# 1) Estimate EPCs, more accurately -# 2) Patch incorrect EPCs with ease -# 3) Indicate already installed measures + # ------------ Wall Construction ------------ + self.map_wall_construction() + # ------------ Roof Construction ------------ + self.map_roof_construction() -# ------------ built_form ------------ -data["built_form"] = data["Attachment"].map(built_form_map) + # ------------ Floor Construction ------------ + self.map_floor_construction() -assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" + # ------------ Glazing ------------ + self.map_glazing() -# ------------ Wall Construction ------------ + # ------------ Heating, fuel, controls & hot water ------------ + self.map_heating() -# Unique combindations -wall_mapping = { - # Cavity walls - ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity, - ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation, - ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation, - ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal, - ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external, - ('Cavity', 'AsBuilt'): None, # To be classified - ('Cavity', 'Unknown'): None, # To be classified - - # System built walls - ('System', 'External'): EpcWallDescriptions.system_external_insulation, - ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation, - ('System', 'AsBuilt'): None, # To be classified - ('System', 'Unknown'): None, - - # Timber Frame walls - ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation, - ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation, - ('Timber Frame', 'AsBuilt'): None, # To be classified - ('Timber Frame', 'Unknown'): None, - - # Solid Brick walls - ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation, - ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation, - ('Solid Brick', 'AsBuilt'): None, # To be classified - ('Solid Brick', 'Unknown'): None, - - # Granite walls - ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation, - ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation, - ('Granite', 'AsBuilt'): None, - ('Granite', 'Unknown'): None, - - # Sandstone walls - ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation, - ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation, - ('Sandstone', 'Unknown'): None, - ('Sandstone', 'AsBuilt'): None, - - # Cob walls - ('Cob', 'AsBuilt'): None, -} - -WALL_UNKNOWN_AGE_FALLBACK = { - "Cavity": EpcWallDescriptions.cavity_as_built_unknown, - "Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown, - "Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown, - "System": EpcWallDescriptions.system_as_built_unknown, - "Granite": EpcWallDescriptions.granite_as_built_unknown, - "Sandstone": EpcWallDescriptions.sandstone_as_built_unknown, - "Cob": EpcWallDescriptions.cob_as_built_unknown, -} - -data["landlord_wall_description"] = ( - data[["Wall Construction", "Wall Insulation"]] - .apply(tuple, axis=1) - .map(wall_mapping) -) - - -def fill_as_built(row): - # Already resolved via direct mapping - if row.landlord_wall_description is not None: - return row.landlord_wall_description - - wall_type = row["Wall Construction"] - - # Missing construction age → conservative fallback - if pd.isnull(row.construction_age_band): - return WALL_UNKNOWN_AGE_FALLBACK.get(wall_type) - - classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type) - if classifier is None: - return None - - return classifier(row.construction_age_band) - - -def resolve_wall_efficiency( - description: EpcWallDescriptions, - age_band: EpcConstructionAgeBand | None, -) -> EpcEfficiency: - # Unknown / holding descriptions → efficiency unknown - if "unknown insulation" in description.value.lower(): - return EpcEfficiency.NA - - rule = WALL_DESCRIPTION_EFFICIENCIES.get(description) - - if rule is None: - return EpcEfficiency.NA - - if isinstance(rule, EpcEfficiency): - return rule - - # Rule needs age band but we don't have one - if age_band is None or pd.isnull(age_band): - return EpcEfficiency.NA - - return rule(age_band) - - -data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1) - -assert data["landlord_wall_description"].isnull().sum() == 0, ( - "Some wall descriptions could not be resolved" -) - -data["landlord_wall_efficiency"] = data.progress_apply( - lambda row: resolve_wall_efficiency( - row.landlord_wall_description, - row.construction_age_band, - ), - axis=1, -) -# Sanity check -assert data["landlord_wall_efficiency"].isnull().sum() == 0 - -# ------------ Roof Construction ------------ - - -roof_mapping = { - # Dwelling above - ('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, - ('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, - # Pitched, normal loft access, with a loft thickness - ('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, - ('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, - ('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, - ('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, - ('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, - ('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, - ('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, - ('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, - ('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, - ('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, - ('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, - - # Pitched, no loft access, with a loft thickness - ('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, - ('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, - - # All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed - # With access - ('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, - ('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, - ('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, - # No access - ('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, - ('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, - ('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, - - # Flat - ('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation, - # Flat - limited insulation - ('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation, - ('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation, - ('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation, - # Flat insulated - ('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated, - ('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated, - ('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated, - ('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated, - ('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated, - ('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated, - ('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated, - ('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated, - # Flat - as built or unknown - ('Flat', 'AsBuilt'): None, # To be classified - ('Flat', nan): None, # To be classified - ('Flat', 'Unknown'): None, # To be classified - - # 12mm = very poor & has limited insulation description - # 25, 50 = poor & has limited insulation description - # 75, 100, 125mm = average (Flat, insulated) - # 150, 175, 200, 225, 250mm = good (Flat, insulated) - # 270mm+ = very good (Flat, insulated) - - # Thatched - ('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation, - ('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation, - ('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation, - ('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age - - # Sloping: - # Limited (12 very poor, 25-50 poor) - ('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation, - ('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation, - ('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation, - # Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good) - ('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated, - ('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated, - # As built/unknown - ('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified - ('PitchedWithSlopingCeiling', nan): None, # To be classified - ('PitchedWithSlopingCeiling', 'Unknown'): None, # -} - -ROOF_UNKNOWN_AGE_FALLBACK = { - "Flat": EpcRoofDescriptions.flat_as_built_unknown, - "PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown, - "PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown, - "PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, - "PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, -} - - -def fill_roof_as_built(row): - # Already resolved - if not pd.isnull(row.landlord_roof_description): - return row.landlord_roof_description - - roof_type = row["Roof Construction"] - - classifier = AS_BUILT_ROOF_CLASSIFIERS.get(roof_type) - if classifier is None: - raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") - - if pd.isnull(row.construction_age_band): - return ROOF_UNKNOWN_AGE_FALLBACK.get(roof_type) - - output = classifier(row.construction_age_band) - if output is None: - raise NotImplementedError( - f"Roof classification returned None for roof type '{roof_type}'" - ) - - return output - - -data["landlord_roof_description"] = ( - data[["Roof Construction", "Roof Insulation"]] - .progress_apply(tuple, axis=1) - .map(roof_mapping) -) - -data["landlord_roof_description"] = data.progress_apply( - fill_roof_as_built, - axis=1, -) -# Sanity check -assert data["landlord_roof_description"].isnull().sum() == 0, ( - "Some roof descriptions could not be resolved" -) - - -def extract_insulation_thickness(value: str | None) -> int | None: - """ - Extract insulation thickness in mm from a string like 'mm150'. - Returns None if not present or not parseable. - """ - if value is None or pd.isnull(value): - return None - - match = re.search(r"(\d+)", str(value)) - if not match: - return None - - return int(match.group(1)) - - -data["roof_insulation_thickness_mm"] = data["Roof Insulation"].apply( - extract_insulation_thickness -) - -data["landlord_roof_efficiency"] = data.progress_apply( - lambda row: resolve_roof_efficiency( - description=row.landlord_roof_description, - age_band=row.construction_age_band, - insulation_thickness=row.roof_insulation_thickness_mm, - ), - axis=1, -) - -assert data["landlord_roof_efficiency"].isnull().sum() == 0 - -# Flag sloping ceiling -data["has_sloping_ceiling"] = data["Roof Construction"].apply( - lambda x: x == "PitchedWithSlopingCeiling" -) - -# ------------ Floor Construction ------------ - -floor_mapping = { - # Solid floor - ('Solid', 'AsBuilt'): None, # Mapped - ('Solid', 'Unknown'): None, # Mapped - ('Solid', nan): None, # Mapped - ('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated, - - # Suspended floor - ('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built - ('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built - ('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, - ('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built - ('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, - ('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built - ('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built - ('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built - - # Unknown type - mapped on age - ('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built - ('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted - (nan, nan): None, # No actual information! - ('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built -} - -data["landlord_floor_description"] = ( - data[["Floor Construction", "Floor Insulation"]] - .progress_apply(tuple, axis=1) - .map(floor_mapping) -) - - -def fill_floor_as_built(row): - # 1. Already resolved - if row.landlord_floor_description is not None: - return row.landlord_floor_description - - age_band = row.construction_age_band - floor_type = row["Floor Construction"] - insulation = row["Floor Insulation"] - - # 2. Missing age band → conservative fallback - if pd.isnull(age_band): - return EpcFloorDescriptions.unknown - - # 3. Known floor types - if floor_type == "Solid": - return solid_floor_as_built(age_band) - - if floor_type in {"SuspendedTimber", "SuspendedNotTimber"}: - return suspended_floor_as_built(age_band) - - # 4. Unknown floor type - if floor_type == "Unknown": - if insulation == "RetroFitted": - return unknown_floor_retrofitted(age_band) - return unknown_floor_as_built(age_band) - - # 5. Truly missing / garbage input - return EpcFloorDescriptions.unknown - - -data["landlord_floor_description"] = data.progress_apply( - fill_floor_as_built, - axis=1, -) - -# All values should be remapped now -assert data["landlord_floor_description"].isnull().sum() == 0, ( - "Some floor descriptions could not be resolved" -) - -# ------------ Glazing ------------ -glazing_map = { - # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area - # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more - "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None), - "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), - "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), - "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None), - # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022 - # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to - # how we make updates to the windows data. - # Triple known data is high performance glazing with Good efficiency (at least) - "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None), - # This is also classed as high performance glazing - "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), - # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good) - "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None), - "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), -} - -data[["landlord_windows_description", - "landlord_windows_efficiency", - "landlord_multi_glaze_proportion", - "landlord_glazed_type", - "landlord_glazed_area"]] = data["Glazing"].map(glazing_map).progress_apply(pd.Series) - -# Peform the remapping. The columns we wish to produce are the following: -# 1) landlord_windows_description -# 2) landlord_windows_efficiency -# 3) landlord_multi_glaze_proportion - maybe don't need to store this, same for glazing type and area - - -# ------------ Heating, fuel, controls & hot water ------------ - - -# We map to: -# 1) Heating description -# 2) Heating efficiency -# 3) Fuel type -# 4) Heating controls -# 5) Heating controls efficiency -# 6) Hot water system -# 7) Hot water efficiency - -# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating -# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are -# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an upper limit -# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating -# controls. E.g. it may be programmer and room thermostat - -# Boiler ratings based on efficiency -# 90%+ = A -# 86-89.9% = B -> Mapped to good efficiency -# 78 - 85% = C -# 70 - 77.9% = D -# 65 - 69.9% = E -# 60 - 64.9% = F -# <60% = G - -heating_map = { - # 0 - ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 1 - ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 2 - ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 3 - ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 4 - ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 5 - ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 6 - ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 7 - ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 8 - ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 9 - ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 10 - ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 11 - ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 12 - ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 13 - ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 14 - ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 15 - ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 16 - ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 17 - ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 19 - ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 20 - ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 21 - ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 22 - ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 23 - ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 24 - ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 25 - ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 26 - ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 27 - ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 28 - ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 29 - ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 30 - ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 31 - ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 32 - ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 33 - ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 34 - ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 35 - ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 36 - ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - # 37 - ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - # 38 - ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - # 39 - ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 40 - ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 41 - ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 42 - ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, - EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 43 - ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( - EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, - EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, - EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE - ), - # 44 - ('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - # 45 - ('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, - EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - # 46 - ('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, - EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - # 47 - water done from here - ('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, - EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - ('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): ( - EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - ('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, - EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - ('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): ( - EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal, - EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( - EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, - EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, - EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - ('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, - EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ), - ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( - EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, - EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, - EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE - ) -} - -# Apply the mapping -data[ - [ - "landlord_heating_description", - "landlord_heating_efficiency", - "landlord_fuel_type", - "landlord_heating_controls_description", - "landlord_heating_controls_efficiency", - "landlord_hot_water_system_description", - "landlord_hot_water_efficiency" - ] -] = data[ - [ - "Heating", - "Boiler Efficiency", - "Main Fuel", - "Controls Adequacy" - ] -].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series) - -# ------------ Floor Area ------------ -# This is just a rename -data = data.rename( - columns={"Total Floor Area (m2)": "landlord_total_floor_area_m2"} -) + # ------------ Floor Area ------------ + self.map_floor_area() diff --git a/datatypes/epc/floor.py b/datatypes/epc/floor.py new file mode 100644 index 00000000..41786101 --- /dev/null +++ b/datatypes/epc/floor.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class EpcFloorDescriptions(Enum): + # Solid floor + solid_insulated = "Solid, insulated" + solid_insulated_assumed = "Solid, insulated (assumed)" + solid_no_insulation_assumed = "Solid, no insulation (assumed)" + solid_limited_insulation_assumed = "Solid, limited insulation (assumed)" + + # Suspended floor + suspended_insulated = "Suspended, insulated" + suspended_insulated_assumed = "Suspended, insulated (assumed)" + suspended_no_insulation_assumed = "Suspended, no insulation (assumed)" + suspended_limited_insulation_assumed = "Suspended, limited insulation (assumed)" + + unknown = None # We don't resolve anything From 87ebc672b8f3fbe7a01301de8eabd504a0ab5f66 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Feb 2026 09:37:05 +0000 Subject: [PATCH 25/33] structure[C for parity largely complete --- .../parity/as_built_floor_classifiers.py | 19 ++++++++++-- .../parity/as_built_roof_classifiers.py | 10 +++--- .../parity/as_built_wall_classifiers.py | 2 +- backend/onboarders/parity.py | 31 +++++++++---------- 4 files changed, 36 insertions(+), 26 deletions(-) diff --git a/backend/onboarders/mappings/parity/as_built_floor_classifiers.py b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py index 9f14fa7d..05894e61 100644 --- a/backend/onboarders/mappings/parity/as_built_floor_classifiers.py +++ b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py @@ -1,4 +1,5 @@ -from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcFloorDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.floor import EpcFloorDescriptions def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: @@ -25,7 +26,7 @@ def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescr return EpcFloorDescriptions.suspended_insulated -def solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: +def map_solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: year = age_band.start_year() if year >= 2003: @@ -35,7 +36,7 @@ def solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptio return EpcFloorDescriptions.solid_no_insulation_assumed -def suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: +def map_suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: year = age_band.start_year() if year >= 2003: @@ -44,3 +45,15 @@ def suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescri return EpcFloorDescriptions.suspended_limited_insulation_assumed return EpcFloorDescriptions.suspended_no_insulation_assumed + + +as_built_floor_classifiers = { + "Solid": map_solid_floor_as_built, + "SuspendedTimber": map_suspended_floor_as_built, + "SuspendedNotTimber": map_suspended_floor_as_built, +} + +unknown_as_built_floor_classifiers = { + "RetroFitted": unknown_floor_retrofitted, + "AsBuilt": unknown_floor_as_built, +} diff --git a/backend/onboarders/mappings/parity/as_built_roof_classifiers.py b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py index 7c672ce5..d5c883ba 100644 --- a/backend/onboarders/mappings/parity/as_built_roof_classifiers.py +++ b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py @@ -1,7 +1,7 @@ from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcRoofDescriptions -def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: +def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: """ For a flat, as built roof, these are the breakdowns: @@ -26,7 +26,7 @@ def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: return EpcRoofDescriptions.flat_no_insulation -def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: +def map_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: """ For a sloping ceiling, as built roof, these are the breakdowns: 2023 onwards → Sloping pitched, insulated @@ -48,8 +48,8 @@ def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDe return EpcRoofDescriptions.sloping_pitched_no_insulation -AS_BUILT_ROOF_CLASSIFIERS = { +as_built_roof_classifiers = { # Only need to apply this to flat and sloping ceiling roofs - "Flat": classify_flat_roof, - "PitchedWithSlopingCeiling": classify_sloping_ceiling_roof, + "Flat": map_flat_roof, + "PitchedWithSlopingCeiling": map_sloping_ceiling_roof, } diff --git a/backend/onboarders/mappings/parity/as_built_wall_classifiers.py b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py index f907a533..124270c7 100644 --- a/backend/onboarders/mappings/parity/as_built_wall_classifiers.py +++ b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py @@ -101,7 +101,7 @@ def map_cob_wall_insulation(age_band: EpcConstructionAgeBand): ) -AS_BUILT_WALL_CLASSIFIERS = { +as_built_wall_classifiers = { "Cavity": map_cavity_wall_insulation, "Solid Brick": map_solid_wall_insulation, "Timber Frame": map_timber_frame_wall_insulation, diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index e820f938..c7f982df 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -8,11 +8,11 @@ from backend.onboarders.mappings.parity.built_form import parity_map as built_fo from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ WALL_DESCRIPTION_EFFICIENCIES, resolve_roof_efficiency -from backend.onboarders.mappings.parity.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS -from backend.onboarders.mappings.parity.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS -from backend.onboarders.mappings.parity.as_built_floor_classifiers import unknown_floor_as_built, \ - unknown_floor_retrofitted, \ - solid_floor_as_built, suspended_floor_as_built +from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers +from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers +from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( + as_built_floor_classifiers, unknown_as_built_floor_classifiers +) from datatypes.epc.roof import EpcRoofDescriptions from datatypes.epc.floor import EpcFloorDescriptions from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback @@ -58,7 +58,7 @@ class ParityOnboarder(OnboarderBase): self.assert_no_nulls(self.data, self.landlord_built_form) @staticmethod - def _fill_as_built(row: pd.Series) -> EpcWallDescriptions | None: + def _fill_wall_as_built(row: pd.Series) -> EpcWallDescriptions | None: """ Utility function, used by map_wall_construction in parity transformation module :param row: row of input sustainability data, being transformed @@ -75,7 +75,7 @@ class ParityOnboarder(OnboarderBase): if pd.isnull(row.construction_age_band): return wall_unknown_age_fallback.get(wall_type) - classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type) + classifier = as_built_wall_classifiers.get(wall_type) if classifier is None: return None @@ -111,7 +111,7 @@ class ParityOnboarder(OnboarderBase): .map(wall_map) ) - self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_as_built, axis=1) + self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_wall_as_built, axis=1) # Sanity check self.assert_no_nulls(self.data, self.landlord_wall_construction) @@ -134,7 +134,7 @@ class ParityOnboarder(OnboarderBase): roof_type = row["Roof Construction"] - classifier = AS_BUILT_ROOF_CLASSIFIERS.get(roof_type) + classifier = as_built_roof_classifiers.get(roof_type) if classifier is None: raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") @@ -214,17 +214,14 @@ class ParityOnboarder(OnboarderBase): return EpcFloorDescriptions.unknown # 3. Known floor types - if floor_type == "Solid": - return solid_floor_as_built(age_band) - - if floor_type in {"SuspendedTimber", "SuspendedNotTimber"}: - return suspended_floor_as_built(age_band) + if floor_type in ["Solid", "SuspendedTimber", "SuspendedNotTimber"]: + classifier = as_built_floor_classifiers[floor_type] + return classifier(age_band) # 4. Unknown floor type if floor_type == "Unknown": - if insulation == "RetroFitted": - return unknown_floor_retrofitted(age_band) - return unknown_floor_as_built(age_band) + classifier = unknown_as_built_floor_classifiers[insulation] + return classifier(age_band) # 5. Truly missing / garbage input return EpcFloorDescriptions.unknown From 5fa6289b4414bb394a6119892a11345587369488 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Feb 2026 11:19:03 +0000 Subject: [PATCH 26/33] setting up handler with example event --- backend/onboarders/base.py | 14 +- backend/onboarders/epc_descriptions.py | 513 ------------------ backend/onboarders/factory.py | 10 + backend/onboarders/handler.py | 33 ++ .../onboarders/mappings/parity/age_band.py | 2 +- backend/onboarders/mappings/parity/roof.py | 358 ++++++++++++ backend/onboarders/mappings/parity/walls.py | 155 ++++++ backend/onboarders/parity.py | 23 +- 8 files changed, 577 insertions(+), 531 deletions(-) delete mode 100644 backend/onboarders/epc_descriptions.py create mode 100644 backend/onboarders/factory.py diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index b90f5fc4..0e2351bd 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -1,5 +1,5 @@ import pandas as pd -from utils.s3 import read_from_s3 +from utils.s3 import read_from_s3, read_excel_from_s3 class OnboarderBase: @@ -37,8 +37,16 @@ class OnboarderBase: landlord_property_type: str = "landlord_property_type" landlord_built_form: str = "landlord_built_form" - def read_s3(self, bucket_name: str, file_name: str): - self.data = read_from_s3(bucket_name=bucket_name, s3_file_name=file_name) + def read_s3(self, bucket_name: str, file_name: str, **kwargs): + if kwargs.get("format") == "xlsx": + self.data = read_excel_from_s3( + bucket_name=bucket_name, + file_key=file_name, + sheet_name=kwargs.get("sheet_name"), + header_row=kwargs.get("header_row", 0) + ) + else: + self.data = read_from_s3(bucket_name=bucket_name, s3_file_name=file_name) def write(self): pass diff --git a/backend/onboarders/epc_descriptions.py b/backend/onboarders/epc_descriptions.py deleted file mode 100644 index 78cc57c1..00000000 --- a/backend/onboarders/epc_descriptions.py +++ /dev/null @@ -1,513 +0,0 @@ -import pandas as pd -from enum import Enum -from collections.abc import Mapping -from typing import Callable, Union -from datatypes.epc.construction_age_band import EpcConstructionAgeBand -from datatypes.epc.efficiency import EpcEfficiency -from datatypes.epc.walls import EpcWallDescriptions -from datatypes.epc.roof import EpcRoofDescriptions - - -def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """" - Maps cavity filled to efficiency based on construction age band. - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - if age_band in { - EpcConstructionAgeBand.from_2023_onwards - }: - return EpcEfficiency.VERY_GOOD - - return EpcEfficiency.GOOD - - -def internal_external_insulation_efficiency( - age_band: EpcConstructionAgeBand, -) -> EpcEfficiency: - """ - Maps: - - cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed - based on 100mm insulation - - solid brick with internal/external insulation to efficiency based on construction age band. We assumed - based on 100mm insulation - - system built with internal/external insulation to efficiency based on construction age band. We assumed - based on 100mm insulation - - All of these wall types have the same behaviour in elmhurst - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - if age_band in { - EpcConstructionAgeBand.from_1983_to_1990, - EpcConstructionAgeBand.from_1991_to_1995, - EpcConstructionAgeBand.from_1996_to_2002, - EpcConstructionAgeBand.from_2003_to_2006, - EpcConstructionAgeBand.from_2007_to_2011, - EpcConstructionAgeBand.from_2012_to_2022, - EpcConstructionAgeBand.from_2023_onwards, - }: - return EpcEfficiency.VERY_GOOD - - return EpcEfficiency.GOOD - - -def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """" - Maps: - - timber frame with internal/external wall insulation to efficiency based on construction age band. - - sandstone/limestone with internal/external wall insulation to efficiency based on construction age band. - - granite/whinstone with internal/external wall insulation to efficiency based on construction age band. - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - if age_band in { - EpcConstructionAgeBand.from_2023_onwards - }: - return EpcEfficiency.VERY_GOOD - - return EpcEfficiency.GOOD - - -WallEfficiencyRule = Union[ - EpcEfficiency, - Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], -] - -WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = { - # Note: all function mappings have been defined based on Elmhurst - # Cavity - # value mappings - EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR, - EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE, - EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD, - EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD, - EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD, - # function mappings - EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency, - EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency, - EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency, - - # Solid brick - # value mappings - EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR, - EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE, - EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD, - # function mappings - EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency, - EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency, - - # System - # value mappings - EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR, - EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE, - EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD, - # function mappings - EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency, - EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency, - - # Timber frame - # value mappings - EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR, - EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE, - EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD, - # function mappings - EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency, - EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency, - - # Granite / whinstone - EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR, - EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE, - EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD, - # function mappings - EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, - EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency, - - # Sandstone / limestone - EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR, - EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE, - EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD, - # function mappings - EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, - EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency, - - # Cob (special case) - EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE, - EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD, - - # Unknown mappings which are unhandled - EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA, - EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA, - EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA, - EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA, - EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA, - EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA, - EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA, - -} - - -def resolve_wall_efficiency( - description: EpcWallDescriptions, - age_band: EpcConstructionAgeBand, -) -> EpcEfficiency: - rule = WALL_DESCRIPTION_EFFICIENCIES[description] - - if isinstance(rule, EpcEfficiency): - return rule - - return rule(age_band) - - -RoofEfficiencyRule = Union[ - EpcEfficiency, - Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], -] - - -def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """ - before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor - 1976-1982 -> Pitched, limited insulation, Poor - 1983-1990, to 1996-2002 Pitched, insulated, Average - 2003 - 2006, 2012-2022 -> Pitched, insulated, Good - 2023 onwards -> Pitched, insulated, Very Good - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - - start_year = age_band.start_year() - if start_year >= 2023: - return EpcEfficiency.VERY_GOOD - - if start_year >= 2003: - return EpcEfficiency.GOOD - - if start_year >= 1983: - return EpcEfficiency.AVERAGE - - if start_year >= 1976: - return EpcEfficiency.POOR - - return EpcEfficiency.VERY_POOR - - -def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: - """ - 12mm -> Very Poor - 25mm - 50mm -> Poor - 75mm - 125mm -> Pitched, insulated, average - 150mm - 250mm -> good - 270mm+ -> very good - :param insulation_thickness: Insulation thickness in mm - :return: EpcEfficiency - """ - - if insulation_thickness is None: - raise ValueError("Insulation thickness is required for flat insulated efficiency calculation") - - if insulation_thickness >= 270: - return EpcEfficiency.VERY_GOOD - - if 150 <= insulation_thickness <= 250: - return EpcEfficiency.GOOD - - if 75 <= insulation_thickness <= 125: - return EpcEfficiency.AVERAGE - - if 25 <= insulation_thickness <= 50: - return EpcEfficiency.POOR - - return EpcEfficiency.VERY_POOR - - -def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """ - Combines both age band and insulation thickness to determine flat roof efficiency. - :param insulation_thickness: Insulation thickness in mm - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - if insulation_thickness is not None: - return flat_insulated_efficiency_thickness(insulation_thickness) - - return flat_insulated_efficiency_age_band(age_band) - - -def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """ - 2023 onwards -> Very Good - 2012-2022 -> Very Good - 2007-2011 -> Very Good - 2003-2006 -> Very Good - 1996-2002 -> Good - 1991-1995 -> Good - 1983-1990 -> Average - 1976-1982 -> Average - 1967-1975 -> Average - 1950-1966 -> Average - 1930-1949 -> Average - 1900-1929 -> Average - before 1900 -> Average - :param age_band: Input age band, EpcConstructionAgeBand - :return: EpcEfficiency - """ - year = age_band.start_year() - if year >= 2003: - return EpcEfficiency.VERY_GOOD - if year >= 1991: - return EpcEfficiency.GOOD - - return EpcEfficiency.AVERAGE - - -def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """ - Maps thatched roof efficiency based on construction age band. - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - year = age_band.start_year() - if year >= 2023: - return EpcEfficiency.VERY_GOOD - if year >= 2003: - return EpcEfficiency.GOOD - - return EpcEfficiency.AVERAGE - - -def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: - """ - Maps thatched roof efficiency based on insulation thickness. - :param insulation_thickness: Insulation thickness in mm - :return: EpcEfficiency - """ - if insulation_thickness is None: - raise ValueError("Insulation thickness is required for thatched efficiency calculation") - - if insulation_thickness >= 175: - return EpcEfficiency.VERY_GOOD - - if insulation_thickness >= 25: - return EpcEfficiency.GOOD - - return EpcEfficiency.AVERAGE - - -def thatched_efficiency( - insulation_thickness: int | None, - age_band: EpcConstructionAgeBand, -) -> EpcEfficiency: - """ - Combines both age band and insulation thickness to determine thatched roof efficiency. - :param insulation_thickness: Insulation thickness in mm - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - if insulation_thickness is not None: - return thatched_efficiency_thickness(insulation_thickness) - - return thatched_efficiency_age_band(age_band) - - -def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """ - Maps sloping ceiling roof efficiency based on construction age band. - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - year = age_band.start_year() - if year >= 2023: - return EpcEfficiency.VERY_GOOD - if year >= 2003: - return EpcEfficiency.GOOD - if year >= 1983: - return EpcEfficiency.AVERAGE - if year >= 1976: - return EpcEfficiency.POOR - - return EpcEfficiency.VERY_POOR - - -def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: - """ - Maps sloping ceiling roof efficiency based on insulation thickness. - :param insulation_thickness: Insulation thickness in mm - :return: EpcEfficiency - """ - if insulation_thickness is None: - raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation") - - if insulation_thickness >= 270: - return EpcEfficiency.VERY_GOOD - - if insulation_thickness >= 150: - return EpcEfficiency.GOOD - - if insulation_thickness >= 75: - return EpcEfficiency.AVERAGE - - if insulation_thickness >= 25: - return EpcEfficiency.POOR - - return EpcEfficiency.VERY_POOR - - -def sloping_ceiling_efficiency( - insulation_thickness: int | None, - age_band: EpcConstructionAgeBand, -) -> EpcEfficiency: - """ - Combines both age band and insulation thickness to determine sloping ceiling roof efficiency. - :param insulation_thickness: Insulation thickness in mm - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - if insulation_thickness is not None: - return sloping_ceiling_efficiency_thickness(insulation_thickness) - - return sloping_ceiling_efficiency_age_band(age_band) - - -def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: - """ - 400mm, 350mm = very good - 200-300mm = good - 125-175 = average - 50-100 = poor - 25 and below= very poor - :return: - """ - if insulation_thickness is None: - raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation") - - if insulation_thickness >= 350: - return EpcEfficiency.VERY_GOOD - - if insulation_thickness >= 200: - return EpcEfficiency.GOOD - - if insulation_thickness >= 125: - return EpcEfficiency.AVERAGE - - if insulation_thickness >= 50: - return EpcEfficiency.POOR - - return EpcEfficiency.VERY_POOR - - -def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: - """ - # 2023 onwards -> Very Good - # 2003-2006, 2012-2022 -> Good - # 1983 - 1990, 1996-2002 -> Average - # 1976-1982 -> Poor - # 1967-1975 and earlier bands -> Very Poor - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - year = age_band.start_year() - if year >= 2023: - return EpcEfficiency.VERY_GOOD - if year >= 2003: - return EpcEfficiency.GOOD - if year >= 1983: - return EpcEfficiency.AVERAGE - if year >= 1976: - return EpcEfficiency.POOR - - return EpcEfficiency.VERY_POOR - - -def loft_insulated_at_rafters_efficiency( - insulation_thickness: int | None, - age_band: EpcConstructionAgeBand, -) -> EpcEfficiency: - """ - Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency. - :param insulation_thickness: Insulation thickness in mm - :param age_band: EpcConstructionAgeBand - :return: EpcEfficiency - """ - if insulation_thickness is not None: - return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness) - - return loft_insulated_at_rafters_efficiency_age_band(age_band) - - -ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = { - # Flat roof - EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR, - EpcRoofDescriptions.flat_limited_insulation: flat_efficiency, - EpcRoofDescriptions.flat_insulated: flat_efficiency, - - # Loft: - # value mappings - EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR, - EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR, - EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR, - EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE, - EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE, - EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE, - EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD, - EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD, - EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD, - EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD, - EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD, - EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD, - EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD, - EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD, - EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR, - # function mappings - EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency, - - # Loft af rafters - EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency, - - # Another dwelling above - EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA, - - # Thatched - EpcRoofDescriptions.thatched: thatched_efficiency, - EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency, - - # Sloping ceiling - EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency, - EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency, - EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR, - -} - - -def resolve_roof_efficiency( - description: EpcRoofDescriptions, - age_band: EpcConstructionAgeBand | None, - insulation_thickness: int | None, -) -> EpcEfficiency: - """ - Resolve roof efficiency from description + age band + insulation thickness. - """ - - # Unknown / holding descriptions → efficiency unknown - if description in description.unknown_descriptions: - return EpcEfficiency.NA - - rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description) - - if rule is None: - return EpcEfficiency.NA - - # Fixed efficiency - if isinstance(rule, EpcEfficiency): - return rule - - # Callable rule - if age_band is None or pd.isnull(age_band): - return EpcEfficiency.NA - - try: - # Try (thickness, age_band) - return rule(insulation_thickness, age_band) - except TypeError: - # Fallback to (age_band) - return rule(age_band) diff --git a/backend/onboarders/factory.py b/backend/onboarders/factory.py new file mode 100644 index 00000000..13dd5505 --- /dev/null +++ b/backend/onboarders/factory.py @@ -0,0 +1,10 @@ +from onboarders.parity import ParityOnboarder + + +class OnboarderFactory: + @staticmethod + def create_onboarder(onboarder_type): + if onboarder_type == "parity": + return ParityOnboarder + + raise ValueError(f"Unknown onboarder type: {onboarder_type}") diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py index e69de29b..0c38e4d9 100644 --- a/backend/onboarders/handler.py +++ b/backend/onboarders/handler.py @@ -0,0 +1,33 @@ +import json +from onboarders.factory import OnboarderFactory +from utils.logger import setup_logger + +logger = setup_logger() + + +def handler(event, context): + """ + Lambda handler that triggers the model engine for each SQS message. + """ + for record in event.get("Records", []): + try: + event_body = json.loads(record["body"]) + # TODO: Implement logic to check which file type we have + # Sample input data + event_body = { + "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for " + "Domna.xlsx", + "system": "parity", + "format": "xlsx", + "sheet_name": "Sustainability" + } + logger.info("Processing record with body: %s", event_body) + Onboarder = OnboarderFactory.create_onboarder(event_body["system"]) + onboarder = Onboarder(fileuri=event_body["s3_uri"]) + + logger.info("Transforming data for record with body: %s", event_body) + onboarder.transform() + logger.info("Writing data for record with body: %s", event_body) + onboarder.write() + except Exception as e: + logger.error(f"Failed to process record: {e}") diff --git a/backend/onboarders/mappings/parity/age_band.py b/backend/onboarders/mappings/parity/age_band.py index e49fede8..406d39c1 100644 --- a/backend/onboarders/mappings/parity/age_band.py +++ b/backend/onboarders/mappings/parity/age_band.py @@ -1,4 +1,4 @@ -from backend.onboarders.epc_descriptions import EpcConstructionAgeBand +from datatypes.epc.construction_age_band import EpcConstructionAgeBand parity_map = { "Before 1900": EpcConstructionAgeBand.before_1900, diff --git a/backend/onboarders/mappings/parity/roof.py b/backend/onboarders/mappings/parity/roof.py index 14f0c34e..02518c3e 100644 --- a/backend/onboarders/mappings/parity/roof.py +++ b/backend/onboarders/mappings/parity/roof.py @@ -1,5 +1,10 @@ +import pandas as pd from numpy import nan +from typing import Union, Callable +from collections.abc import Mapping from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.construction_age_band import EpcConstructionAgeBand roof_map = { # Dwelling above @@ -101,3 +106,356 @@ roof_unknown_age_fallback = { "PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, "PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, } + +RoofEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + + +def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor + 1976-1982 -> Pitched, limited insulation, Poor + 1983-1990, to 1996-2002 Pitched, insulated, Average + 2003 - 2006, 2012-2022 -> Pitched, insulated, Good + 2023 onwards -> Pitched, insulated, Very Good + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + + start_year = age_band.start_year() + if start_year >= 2023: + return EpcEfficiency.VERY_GOOD + + if start_year >= 2003: + return EpcEfficiency.GOOD + + if start_year >= 1983: + return EpcEfficiency.AVERAGE + + if start_year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + 12mm -> Very Poor + 25mm - 50mm -> Poor + 75mm - 125mm -> Pitched, insulated, average + 150mm - 250mm -> good + 270mm+ -> very good + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for flat insulated efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if 150 <= insulation_thickness <= 250: + return EpcEfficiency.GOOD + + if 75 <= insulation_thickness <= 125: + return EpcEfficiency.AVERAGE + + if 25 <= insulation_thickness <= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine flat roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return flat_insulated_efficiency_thickness(insulation_thickness) + + return flat_insulated_efficiency_age_band(age_band) + + +def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + 2023 onwards -> Very Good + 2012-2022 -> Very Good + 2007-2011 -> Very Good + 2003-2006 -> Very Good + 1996-2002 -> Good + 1991-1995 -> Good + 1983-1990 -> Average + 1976-1982 -> Average + 1967-1975 -> Average + 1950-1966 -> Average + 1930-1949 -> Average + 1900-1929 -> Average + before 1900 -> Average + :param age_band: Input age band, EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2003: + return EpcEfficiency.VERY_GOOD + if year >= 1991: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for thatched efficiency calculation") + + if insulation_thickness >= 175: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 25: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine thatched roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return thatched_efficiency_thickness(insulation_thickness) + + return thatched_efficiency_age_band(age_band) + + +def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 150: + return EpcEfficiency.GOOD + + if insulation_thickness >= 75: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 25: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def sloping_ceiling_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine sloping ceiling roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return sloping_ceiling_efficiency_thickness(insulation_thickness) + + return sloping_ceiling_efficiency_age_band(age_band) + + +def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + 400mm, 350mm = very good + 200-300mm = good + 125-175 = average + 50-100 = poor + 25 and below= very poor + :return: + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation") + + if insulation_thickness >= 350: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 200: + return EpcEfficiency.GOOD + + if insulation_thickness >= 125: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + # 2023 onwards -> Very Good + # 2003-2006, 2012-2022 -> Good + # 1983 - 1990, 1996-2002 -> Average + # 1976-1982 -> Poor + # 1967-1975 and earlier bands -> Very Poor + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness) + + return loft_insulated_at_rafters_efficiency_age_band(age_band) + + +ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = { + # Flat roof + EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.flat_limited_insulation: flat_efficiency, + EpcRoofDescriptions.flat_insulated: flat_efficiency, + + # Loft: + # value mappings + EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR, + # function mappings + EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency, + + # Loft af rafters + EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency, + + # Another dwelling above + EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA, + + # Thatched + EpcRoofDescriptions.thatched: thatched_efficiency, + EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency, + + # Sloping ceiling + EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR, + +} + + +def resolve_roof_efficiency( + description: EpcRoofDescriptions, + age_band: EpcConstructionAgeBand | None, + insulation_thickness: int | None, +) -> EpcEfficiency: + """ + Resolve roof efficiency from description + age band + insulation thickness. + """ + + # Unknown / holding descriptions → efficiency unknown + if description in description.unknown_descriptions: + return EpcEfficiency.NA + + rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + # Fixed efficiency + if isinstance(rule, EpcEfficiency): + return rule + + # Callable rule + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + try: + # Try (thickness, age_band) + return rule(insulation_thickness, age_band) + except TypeError: + # Fallback to (age_band) + return rule(age_band) diff --git a/backend/onboarders/mappings/parity/walls.py b/backend/onboarders/mappings/parity/walls.py index b46559b9..0ad6d6e1 100644 --- a/backend/onboarders/mappings/parity/walls.py +++ b/backend/onboarders/mappings/parity/walls.py @@ -1,4 +1,8 @@ +from typing import Callable, Union +from collections.abc import Mapping from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.efficiency import EpcEfficiency # Unique combinations wall_map = { @@ -54,3 +58,154 @@ wall_unknown_age_fallback = { "Sandstone": EpcWallDescriptions.sandstone_as_built_unknown, "Cob": EpcWallDescriptions.cob_as_built_unknown, } + + +def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps cavity filled to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def internal_external_insulation_efficiency( + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Maps: + - cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - solid brick with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - system built with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + + All of these wall types have the same behaviour in elmhurst + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_1983_to_1990, + EpcConstructionAgeBand.from_1991_to_1995, + EpcConstructionAgeBand.from_1996_to_2002, + EpcConstructionAgeBand.from_2003_to_2006, + EpcConstructionAgeBand.from_2007_to_2011, + EpcConstructionAgeBand.from_2012_to_2022, + EpcConstructionAgeBand.from_2023_onwards, + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps: + - timber frame with internal/external wall insulation to efficiency based on construction age band. + - sandstone/limestone with internal/external wall insulation to efficiency based on construction age band. + - granite/whinstone with internal/external wall insulation to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +WallEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + +WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = { + # Note: all function mappings have been defined based on Elmhurst + # Cavity + # value mappings + EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD, + EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD, + EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD, + # function mappings + EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency, + EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency, + + # Solid brick + # value mappings + EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency, + + # System + # value mappings + EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency, + + # Timber frame + # value mappings + EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Granite / whinstone + EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Sandstone / limestone + EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Cob (special case) + EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD, + + # Unknown mappings which are unhandled + EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA, + +} + + +def resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + rule = WALL_DESCRIPTION_EFFICIENCIES[description] + + if isinstance(rule, EpcEfficiency): + return rule + + return rule(age_band) diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index c7f982df..8fc5496e 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -2,12 +2,15 @@ import re from tqdm import tqdm import pandas as pd from backend.onboarders.base import OnboarderBase +# Parity mappings from backend.onboarders.mappings.parity.property_type import parity_map as property_map from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map -from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback -from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \ - WALL_DESCRIPTION_EFFICIENCIES, resolve_roof_efficiency +from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback, WALL_DESCRIPTION_EFFICIENCIES +from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback, resolve_roof_efficiency +from onboarders.mappings.parity.floor import floor_map +from onboarders.mappings.parity.heating import heating_map +from onboarders.mappings.parity.glazing import glazing_map from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( @@ -15,20 +18,12 @@ from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( ) from datatypes.epc.roof import EpcRoofDescriptions from datatypes.epc.floor import EpcFloorDescriptions -from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback -from onboarders.mappings.parity.floor import floor_map -from onboarders.mappings.parity.heating import heating_map -from onboarders.mappings.parity.glazing import glazing_map +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.efficiency import EpcEfficiency tqdm.pandas() -# Sample input data -data = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " - "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" -) - class ParityOnboarder(OnboarderBase): From a10a3bb1aaa2f1d142c5b3c08f3510b7233b7431 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Feb 2026 13:11:53 +0000 Subject: [PATCH 27/33] preparing filtered columns --- backend/onboarders/base.py | 9 ++- backend/onboarders/handler.py | 38 ++++++--- .../parity/as_built_floor_classifiers.py | 1 + .../parity/as_built_roof_classifiers.py | 3 +- .../parity/as_built_wall_classifiers.py | 3 +- backend/onboarders/parity.py | 80 +++++++++++++++---- backend/onboarders/requirements.txt | 0 7 files changed, 100 insertions(+), 34 deletions(-) create mode 100644 backend/onboarders/requirements.txt diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index 0e2351bd..4d09cfeb 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -1,5 +1,5 @@ import pandas as pd -from utils.s3 import read_from_s3, read_excel_from_s3 +from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3 class OnboarderBase: @@ -48,8 +48,11 @@ class OnboarderBase: else: self.data = read_from_s3(bucket_name=bucket_name, s3_file_name=file_name) - def write(self): - pass + def write(self, bucket_name: str, file_name: str): + if self.data is None: + raise ValueError("No data to write. Please run transform() before writing.") + # Store file as csv - will store in the same route location as the input file + save_csv_to_s3(dataframe=self.data, bucket_name=bucket_name, file_name=file_name) @staticmethod def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool: diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py index 0c38e4d9..dfff7788 100644 --- a/backend/onboarders/handler.py +++ b/backend/onboarders/handler.py @@ -1,10 +1,19 @@ import json +from pydantic import BaseModel, Field +from typing import Optional, Literal from onboarders.factory import OnboarderFactory from utils.logger import setup_logger logger = setup_logger() +class OnboardingEvent(BaseModel): + s3_uri: str = Field(..., description="S3 URI of the raw ARA input file") + system: Literal["parity", "generic"] = Field(..., description="Onboarding system identifier") + format: Literal["csv", "xlsx"] + sheet_name: Optional[str] = None + + def handler(event, context): """ Lambda handler that triggers the model engine for each SQS message. @@ -12,22 +21,27 @@ def handler(event, context): for record in event.get("Records", []): try: event_body = json.loads(record["body"]) - # TODO: Implement logic to check which file type we have # Sample input data - event_body = { - "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for " - "Domna.xlsx", - "system": "parity", - "format": "xlsx", - "sheet_name": "Sustainability" - } + # event_body = { + # "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for " + # "Domna.xlsx", + # "system": "parity", + # "format": "xlsx", + # "sheet_name": "Sustainability" + # } logger.info("Processing record with body: %s", event_body) - Onboarder = OnboarderFactory.create_onboarder(event_body["system"]) - onboarder = Onboarder(fileuri=event_body["s3_uri"]) - logger.info("Transforming data for record with body: %s", event_body) + validated_event = OnboardingEvent(**event_body) + Onboarder = OnboarderFactory.create_onboarder(validated_event.system) + onboarder = Onboarder( + fileuri=validated_event.s3_uri, + format=validated_event.format, + sheet_name=validated_event.sheet_name + ) + + logger.info("Transforming data") onboarder.transform() - logger.info("Writing data for record with body: %s", event_body) + logger.info("Writing data") onboarder.write() except Exception as e: logger.error(f"Failed to process record: {e}") diff --git a/backend/onboarders/mappings/parity/as_built_floor_classifiers.py b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py index 05894e61..3af3c079 100644 --- a/backend/onboarders/mappings/parity/as_built_floor_classifiers.py +++ b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py @@ -56,4 +56,5 @@ as_built_floor_classifiers = { unknown_as_built_floor_classifiers = { "RetroFitted": unknown_floor_retrofitted, "AsBuilt": unknown_floor_as_built, + "Unknown": unknown_floor_as_built, } diff --git a/backend/onboarders/mappings/parity/as_built_roof_classifiers.py b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py index d5c883ba..fcb554bd 100644 --- a/backend/onboarders/mappings/parity/as_built_roof_classifiers.py +++ b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py @@ -1,4 +1,5 @@ -from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcRoofDescriptions +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: diff --git a/backend/onboarders/mappings/parity/as_built_wall_classifiers.py b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py index 124270c7..480a7e24 100644 --- a/backend/onboarders/mappings/parity/as_built_wall_classifiers.py +++ b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py @@ -1,4 +1,5 @@ -from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcWallDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand): diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 8fc5496e..2afc7a73 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -30,12 +30,13 @@ class ParityOnboarder(OnboarderBase): def __init__( self, fileuri: str, + **kwargs ): # Extract bucket, and filekey; Will be in the format s3://bucket/key bucket_name = fileuri.split("/")[2] file_name = "/".join(fileuri.split("/")[3:]) - self.read_s3(bucket_name=bucket_name, file_name=file_name) + self.read_s3(bucket_name=bucket_name, file_name=file_name, **kwargs) pass def map_construction_age_band(self): @@ -61,20 +62,20 @@ class ParityOnboarder(OnboarderBase): type and age band """ # Already resolved via direct mapping - if row.landlord_wall_description is not None: - return row.landlord_wall_description + if row.landlord_wall_construction is not None: + return row.landlord_wall_construction wall_type = row["Wall Construction"] # Missing construction age → conservative fallback - if pd.isnull(row.construction_age_band): + if pd.isnull(row.landlord_construction_age_band): return wall_unknown_age_fallback.get(wall_type) classifier = as_built_wall_classifiers.get(wall_type) if classifier is None: return None - return classifier(row.construction_age_band) + return classifier(row.landlord_construction_age_band) @staticmethod def _resolve_wall_efficiency( @@ -113,8 +114,8 @@ class ParityOnboarder(OnboarderBase): self.data[self.landlord_wall_efficiency] = self.data.progress_apply( lambda row: self._resolve_wall_efficiency( - row.landlord_wall_description, - row.construction_age_band, + row.landlord_wall_construction, + row.landlord_construction_age_band, ), axis=1, ) @@ -124,8 +125,8 @@ class ParityOnboarder(OnboarderBase): @staticmethod def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None: # Already resolved - if not pd.isnull(row.landlord_roof_description): - return row.landlord_roof_description + if not pd.isnull(row.landlord_roof_construction): + return row.landlord_roof_construction roof_type = row["Roof Construction"] @@ -133,10 +134,10 @@ class ParityOnboarder(OnboarderBase): if classifier is None: raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") - if pd.isnull(row.construction_age_band): + if pd.isnull(row.landlord_construction_age_band): return roof_unknown_age_fallback.get(roof_type) - output = classifier(row.construction_age_band) + output = classifier(row.landlord_construction_age_band) if output is None: raise NotImplementedError( f"Roof classification returned None for roof type '{roof_type}'" @@ -180,8 +181,8 @@ class ParityOnboarder(OnboarderBase): self.data[self.landlord_roof_efficiency] = self.data.progress_apply( lambda row: resolve_roof_efficiency( - description=row.landlord_roof_description, - age_band=row.construction_age_band, + description=row.landlord_roof_construction, + age_band=row.landlord_construction_age_band, insulation_thickness=row.roof_insulation_thickness_mm, ), axis=1, @@ -190,17 +191,17 @@ class ParityOnboarder(OnboarderBase): self.assert_no_nulls(self.data, self.landlord_roof_efficiency) # Flag sloping ceiling - data[self.landlord_has_sloping_ceiling] = data["Roof Construction"].apply( + self.data[self.landlord_has_sloping_ceiling] = self.data["Roof Construction"].apply( lambda x: x == "PitchedWithSlopingCeiling" ) @staticmethod def _fill_floor_as_built(row: pd.Series): # 1. Already resolved - if row.landlord_floor_description is not None: - return row.landlord_floor_description + if row.landlord_floor_construction is not None: + return row.landlord_floor_construction - age_band = row.construction_age_band + age_band = row.landlord_construction_age_band floor_type = row["Floor Construction"] insulation = row["Floor Insulation"] @@ -281,6 +282,48 @@ class ParityOnboarder(OnboarderBase): columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2} ) + def select_columns(self): + self.data = self.data[ + [ + "Org Ref", + "UPRN", + "Address 1", + "Address 2", + "Address 3", + "Postcode", + self.landlord_total_floor_area_m2, + self.landlord_construction_age_band, + self.landlord_property_type, + self.landlord_built_form, + self.landlord_wall_construction, + self.landlord_wall_efficiency, + self.landlord_roof_construction, + self.landlord_roof_efficiency, + self.landlord_has_sloping_ceiling, + self.landlord_floor_construction, + self.landlord_windows_construction, + self.landlord_windows_efficiency, + self.landlord_multi_glaze_proportion, + self.landlord_glazed_type, + self.landlord_glazed_area, + self.landlord_heating_construction, + self.landlord_heating_efficiency, + self.landlord_fuel_construction, + self.landlord_heating_controls_construction, + self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system_construction, + self.landlord_hot_water_efficiency + ] + ].rename( + columns={ + "Org Ref": "landlord_property_id", + "Address1": "address1", + "Address2": "address2", + "Address3": "address3", + "Postcode": "postcode", + } + ) + def transform(self): # ------------ construction_age_band ------------ self.map_construction_age_band() @@ -308,3 +351,6 @@ class ParityOnboarder(OnboarderBase): # ------------ Floor Area ------------ self.map_floor_area() + + # ------------ Formating ------------ + self.select_columns() diff --git a/backend/onboarders/requirements.txt b/backend/onboarders/requirements.txt new file mode 100644 index 00000000..e69de29b From 71310526ef2f9c310e336652e8528578ea17aef4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Feb 2026 14:07:43 +0000 Subject: [PATCH 28/33] ready for review (not deployed --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/onboarders/base.py | 31 +++++++++++++++--------- backend/onboarders/handler.py | 5 ++-- backend/onboarders/parity.py | 37 ++++++++++++++++++++--------- backend/onboarders/requirements.txt | 6 +++++ 6 files changed, 57 insertions(+), 26 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..a079cfaf 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..b6084632 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py index 4d09cfeb..93a0b7b0 100644 --- a/backend/onboarders/base.py +++ b/backend/onboarders/base.py @@ -5,15 +5,18 @@ from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3 class OnboarderBase: # Input dataset to be transformed data: pd.DataFrame | None = None + bucket_name = None + input_file_name = None + output_file_name = None # Description columns landlord_wall_construction: str = "landlord_wall_construction" landlord_roof_construction: str = "landlord_roof_construction" landlord_floor_construction: str = "landlord_floor_construction" - landlord_windows_construction: str = "landlord_windows_construction" + landlord_windows_type: str = "landlord_windows_type" landlord_heating_construction: str = "landlord_heating_construction" - landlord_fuel_construction: str = "landlord_fuel_construction" - landlord_heating_controls_construction: str = "landlord_heating_controls_construction" - landlord_hot_water_system_construction: str = "landlord_hot_water_system_construction" + landlord_fuel_type: str = "landlord_fuel_type" + landlord_heating_controls: str = "landlord_heating_controls" + landlord_hot_water_system: str = "landlord_hot_water_system" # Efficiency columns landlord_roof_efficiency: str = "landlord_roof_efficiency" @@ -37,22 +40,28 @@ class OnboarderBase: landlord_property_type: str = "landlord_property_type" landlord_built_form: str = "landlord_built_form" - def read_s3(self, bucket_name: str, file_name: str, **kwargs): - if kwargs.get("format") == "xlsx": + def read_s3(self, file_format, **kwargs): + + if self.input_file_name is None or self.bucket_name is None: + raise ValueError("Bucket name and input file name must be set before reading from S3.") + if file_format == "xlsx": self.data = read_excel_from_s3( - bucket_name=bucket_name, - file_key=file_name, + bucket_name=self.bucket_name, + file_key=self.input_file_name, sheet_name=kwargs.get("sheet_name"), header_row=kwargs.get("header_row", 0) ) else: - self.data = read_from_s3(bucket_name=bucket_name, s3_file_name=file_name) + self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name) - def write(self, bucket_name: str, file_name: str): + def write(self): if self.data is None: raise ValueError("No data to write. Please run transform() before writing.") + + if self.bucket_name is None or self.output_file_name is None: + raise ValueError("Bucket name and output file name must be set before writing to S3.") # Store file as csv - will store in the same route location as the input file - save_csv_to_s3(dataframe=self.data, bucket_name=bucket_name, file_name=file_name) + save_csv_to_s3(dataframe=self.data, bucket_name=self.bucket_name, file_name=self.output_file_name) @staticmethod def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool: diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py index dfff7788..be7c2f38 100644 --- a/backend/onboarders/handler.py +++ b/backend/onboarders/handler.py @@ -36,12 +36,13 @@ def handler(event, context): onboarder = Onboarder( fileuri=validated_event.s3_uri, format=validated_event.format, - sheet_name=validated_event.sheet_name + sheet_name=validated_event.sheet_name, + file_format=validated_event.format ) logger.info("Transforming data") onboarder.transform() - logger.info("Writing data") + logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}") onboarder.write() except Exception as e: logger.error(f"Failed to process record: {e}") diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 2afc7a73..6c79d027 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -30,13 +30,16 @@ class ParityOnboarder(OnboarderBase): def __init__( self, fileuri: str, + file_format: str, **kwargs ): # Extract bucket, and filekey; Will be in the format s3://bucket/key - bucket_name = fileuri.split("/")[2] - file_name = "/".join(fileuri.split("/")[3:]) + self.bucket_name = fileuri.split("/")[2] + self.input_file_name = "/".join(fileuri.split("/")[3:]) + # Also prepare output file name + self.output_file_name = self.input_file_name.replace("." + file_format, "") + "_transformed.csv" - self.read_s3(bucket_name=bucket_name, file_name=file_name, **kwargs) + self.read_s3(file_format=file_format, **kwargs) pass def map_construction_age_band(self): @@ -242,7 +245,7 @@ class ParityOnboarder(OnboarderBase): # however self.data[ [ - self.landlord_windows_construction, + self.landlord_windows_type, self.landlord_windows_efficiency, self.landlord_multi_glaze_proportion, self.landlord_glazed_type, @@ -261,10 +264,10 @@ class ParityOnboarder(OnboarderBase): [ self.landlord_heating_construction, self.landlord_heating_efficiency, - self.landlord_fuel_construction, - self.landlord_heating_controls_construction, + self.landlord_fuel_type, + self.landlord_heating_controls, self.landlord_heating_controls_efficiency, - self.landlord_hot_water_system_construction, + self.landlord_hot_water_system, self.landlord_hot_water_efficiency ] ] = self.data[ @@ -301,17 +304,17 @@ class ParityOnboarder(OnboarderBase): self.landlord_roof_efficiency, self.landlord_has_sloping_ceiling, self.landlord_floor_construction, - self.landlord_windows_construction, + self.landlord_windows_type, self.landlord_windows_efficiency, self.landlord_multi_glaze_proportion, self.landlord_glazed_type, self.landlord_glazed_area, self.landlord_heating_construction, self.landlord_heating_efficiency, - self.landlord_fuel_construction, - self.landlord_heating_controls_construction, + self.landlord_fuel_type, + self.landlord_heating_controls, self.landlord_heating_controls_efficiency, - self.landlord_hot_water_system_construction, + self.landlord_hot_water_system, self.landlord_hot_water_efficiency ] ].rename( @@ -324,6 +327,17 @@ class ParityOnboarder(OnboarderBase): } ) + def extract_values(self): + for columns in [ + self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form, + self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction, + self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type, + self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency, + self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system, self.landlord_hot_water_efficiency + ]: + self.data[columns] = self.data[columns].progress_apply(lambda x: x.value if hasattr(x, "value") else x) + def transform(self): # ------------ construction_age_band ------------ self.map_construction_age_band() @@ -354,3 +368,4 @@ class ParityOnboarder(OnboarderBase): # ------------ Formating ------------ self.select_columns() + self.extract_values() diff --git a/backend/onboarders/requirements.txt b/backend/onboarders/requirements.txt index e69de29b..907cb877 100644 --- a/backend/onboarders/requirements.txt +++ b/backend/onboarders/requirements.txt @@ -0,0 +1,6 @@ +boto3 +numpy==2.1.2 +pandas==2.2.3 +tqdm==4.66.5 +pydantic==2.9.2 +openpyxl==3.1.2 \ No newline at end of file From df2c8e9735f0e6b384f4331eef55926fbabaa09f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 5 Feb 2026 17:02:12 +0000 Subject: [PATCH 29/33] formatting --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/onboarders/handler.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index a079cfaf..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index b6084632..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py index be7c2f38..91ec11d2 100644 --- a/backend/onboarders/handler.py +++ b/backend/onboarders/handler.py @@ -29,6 +29,7 @@ def handler(event, context): # "format": "xlsx", # "sheet_name": "Sustainability" # } + logger.info("Processing record with body: %s", event_body) validated_event = OnboardingEvent(**event_body) @@ -44,5 +45,6 @@ def handler(event, context): onboarder.transform() logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}") onboarder.write() + except Exception as e: logger.error(f"Failed to process record: {e}") From 7cf33e4f33a23a06d6e31ae8cec7a809ca6b196e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 7 Feb 2026 21:15:13 +0000 Subject: [PATCH 30/33] corrected error in ecr name --- infrastructure/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index b97a2f4d..5dfe765f 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -244,7 +244,7 @@ module "lambda_heating_cost_prediction_ecr" { } module "lambda_hot_water_cost_prediction_ecr" { - ecr_name = "hot-water-fcost-prediction-${var.stage}" + ecr_name = "hot-water-cost-prediction-${var.stage}" source = "./modules/ecr" } From c34ed0bbe07eb7cb39a9ddf85949291c19ef9bd7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 7 Feb 2026 21:29:07 +0000 Subject: [PATCH 31/33] fixed imports on unit tests for parity onboarding --- .../onboarders/tests/test_floor_remapping.py | 17 ++++++++-------- .../onboarders/tests/test_roof_remapping.py | 20 +++++++++---------- .../onboarders/tests/test_wall_remapping.py | 12 +++++------ 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/backend/onboarders/tests/test_floor_remapping.py b/backend/onboarders/tests/test_floor_remapping.py index 26a05d22..c20372b7 100644 --- a/backend/onboarders/tests/test_floor_remapping.py +++ b/backend/onboarders/tests/test_floor_remapping.py @@ -1,14 +1,13 @@ import pytest -from backend.onboarders.epc_descriptions import ( - EpcConstructionAgeBand, - EpcFloorDescriptions, -) -from backend.onboarders.mappings.as_built_floor_classifiers import ( +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.floor import EpcFloorDescriptions + +from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( unknown_floor_as_built, unknown_floor_retrofitted, - solid_floor_as_built, - suspended_floor_as_built, + map_solid_floor_as_built, + map_suspended_floor_as_built, ) @@ -75,7 +74,7 @@ def test_unknown_floor_retrofitted(age_band, expected): ], ) def test_solid_floor_as_built(age_band, expected): - assert solid_floor_as_built(age_band) == expected + assert map_solid_floor_as_built(age_band) == expected @pytest.mark.parametrize( @@ -95,4 +94,4 @@ def test_solid_floor_as_built(age_band, expected): ], ) def test_suspended_floor_as_built(age_band, expected): - assert suspended_floor_as_built(age_band) == expected + assert map_suspended_floor_as_built(age_band) == expected diff --git a/backend/onboarders/tests/test_roof_remapping.py b/backend/onboarders/tests/test_roof_remapping.py index a08471f9..cc19e057 100644 --- a/backend/onboarders/tests/test_roof_remapping.py +++ b/backend/onboarders/tests/test_roof_remapping.py @@ -1,16 +1,14 @@ import pytest -from backend.onboarders.epc_descriptions import ( - EpcConstructionAgeBand, - EpcRoofDescriptions, - EpcEfficiency, - resolve_roof_efficiency, -) +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.efficiency import EpcEfficiency -from backend.onboarders.mappings.as_built_roof_classifiers import ( - classify_flat_roof, - classify_sloping_ceiling_roof, +from backend.onboarders.mappings.parity.as_built_roof_classifiers import ( + map_flat_roof, + map_sloping_ceiling_roof, ) +from backend.onboarders.mappings.parity.roof import resolve_roof_efficiency # --------------------------------------------------------------------- @@ -30,7 +28,7 @@ from backend.onboarders.mappings.as_built_roof_classifiers import ( ], ) def test_classify_flat_roof(age_band, expected): - assert classify_flat_roof(age_band) == expected + assert map_flat_roof(age_band) == expected @pytest.mark.parametrize( @@ -45,7 +43,7 @@ def test_classify_flat_roof(age_band, expected): ], ) def test_classify_sloping_ceiling_roof(age_band, expected): - assert classify_sloping_ceiling_roof(age_band) == expected + assert map_sloping_ceiling_roof(age_band) == expected # --------------------------------------------------------------------- diff --git a/backend/onboarders/tests/test_wall_remapping.py b/backend/onboarders/tests/test_wall_remapping.py index eaac5afb..c9476211 100644 --- a/backend/onboarders/tests/test_wall_remapping.py +++ b/backend/onboarders/tests/test_wall_remapping.py @@ -1,13 +1,11 @@ import pytest -from backend.onboarders.epc_descriptions import ( - EpcConstructionAgeBand, - EpcWallDescriptions, - EpcEfficiency, - resolve_wall_efficiency, -) +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.efficiency import EpcEfficiency -from backend.onboarders.mappings.as_built_wall_classifiers import ( +from backend.onboarders.mappings.parity.walls import resolve_wall_efficiency +from backend.onboarders.mappings.parity.as_built_wall_classifiers import ( map_cavity_wall_insulation, map_solid_wall_insulation, map_timber_frame_wall_insulation, From bf34393ceb53b583c683b808dc0df122326b1975 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 9 Feb 2026 08:03:44 +0000 Subject: [PATCH 32/33] Updated factory to return instantiated class --- backend/onboarders/factory.py | 4 ++-- backend/onboarders/handler.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/onboarders/factory.py b/backend/onboarders/factory.py index 13dd5505..2ff7dcbc 100644 --- a/backend/onboarders/factory.py +++ b/backend/onboarders/factory.py @@ -3,8 +3,8 @@ from onboarders.parity import ParityOnboarder class OnboarderFactory: @staticmethod - def create_onboarder(onboarder_type): + def create_onboarder(onboarder_type, **kwargs): if onboarder_type == "parity": - return ParityOnboarder + return ParityOnboarder(**kwargs) raise ValueError(f"Unknown onboarder type: {onboarder_type}") diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py index 91ec11d2..d66b5796 100644 --- a/backend/onboarders/handler.py +++ b/backend/onboarders/handler.py @@ -33,8 +33,8 @@ def handler(event, context): logger.info("Processing record with body: %s", event_body) validated_event = OnboardingEvent(**event_body) - Onboarder = OnboarderFactory.create_onboarder(validated_event.system) - onboarder = Onboarder( + onboarder = OnboarderFactory.create_onboarder( + validated_event.system, fileuri=validated_event.s3_uri, format=validated_event.format, sheet_name=validated_event.sheet_name, From bff54a906305c0f39a4ea87aa24be19607148f37 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 9 Feb 2026 11:49:32 +0000 Subject: [PATCH 33/33] added read me with repo overview and todos --- backend/onboarders/README.md | 102 +++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 backend/onboarders/README.md diff --git a/backend/onboarders/README.md b/backend/onboarders/README.md new file mode 100644 index 00000000..063fee20 --- /dev/null +++ b/backend/onboarders/README.md @@ -0,0 +1,102 @@ +# Retrofit Property Data Onboarding + +This repository contains an ETL pipeline for transforming raw retrofit property data from external source systems ( +currently Parity) into a standardised internal format, compatible for both address2uprn and engine. + +The pipeline is designed to: + +- Run as an AWS Lambda triggered by SQS +- Read raw CSV/XLSX files from S3 +- Perform rule-based mappings +- Infer as built property attributes, assumed based on age +- Output a processed csv, back to s3 to be consumed by address2uprn + +### Structure + +SQS → Lambda handler → OnboarderFactory → System-specific Onboarder → Mapping → CSV to S3 + +Each source system implements its own **Onboarder**, while sharing a common base and mapping process. + +--- + +### Repository Structure + +onboarders/ +├── `handler.py` # Lambda entrypoint \ +├── `factory.py` # Onboarder factory \ +├── `base.py` # Shared onboarding base class \ +├── `parity.py` # Parity-specific transformation logic \ +├── `mappings/` \ +│ └── `parity/` # Parity domain mappings & classifiers \ +│ ├── `age_band.py` \ +│ ├── `property_type.py` \ +│ ├── `built_form.py` \ +│ ├── `walls.py` \ +│ ├── `roof.py` \ +│ ├── `floor.py` \ +│ ├── `glazing.py` \ +│ ├── `heating.py` \ +│ ├── `as_built_wall_classifiers.py` \ +│ ├── `as_built_roof_classifiers.py` \ +│ └── `as_built_floor_classifiers.py` \ +├── `tests/` \ +├── `requirements.txt` \ +└── `README.md` + + +--- + +### Lambda Entry Point (`handler.py`) + +The Lambda handler: + +1. Consumes SQS queue +2. Validates the payload +3. Instantiates the correct onboarder via `OnboarderFactory` +4. Runs the transformation +5. Writes the transformed CSV back to S3 + +### Expected Event Payload + +```json +{ + "s3_uri": "s3://bucket/path/to/input.xlsx", + "system": "parity", + "format": "xlsx", + "sheet_name": "Sustainability" +} + +``` + +### Onboarder Base `(base.py)` + +OnboarderBase provides shared functionality across all systems. + +*Responsibilities* + +- Reading CSV/XLSX files from S3 +- Writing transformed CSVs to S3 +- Defining canonical output column names +- Providing validation helpers +- Common output - for the moment, onboards will be expected to return a csv + +### Parity Onboarder `(parity.py)` + +`ParityOnboarder` contains all Parity-specific transformation logic. + +Responsibilities* + +- Map raw Parity fields to internal EPC-aligned enums +- Infer “as-built” constructions using age bands when insulation data is missing +- Resolve energy efficiency ratings deterministically +- Normalise output into a fixed schema + +The `transform()` method orchestrates the transformation process. + +### TODOs + +- In `backend/onboarders/mappings/parity/glazing.py` we currently map the partiy descriptions + to duples of descriptions and efficiency ratings. This is okay for the moment but we may consider + using a data class, just given how error-prone this is. +- This is also true for heating mappings in `backend/onboarders/mappings/parity/heating.py` +- Implement a AI-enabled version, to replace the standardised asset list \ No newline at end of file