diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index a9defdef..d6466539 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -520,4 +520,14 @@ BUILT_FORM_MAPPINGS = { '2.EXT.WALL FLAT': 'mid-terrace', '2 EXT. WALL FLAT': 'mid-terrace', + 'Maisonette: Detached: Ground Floor': 'detached', + 'Maisonette: Enclosed End Terrace: Top Floor': 'enclosed end-terrace', + 'Flat: End Terrace: Basement': 'end-terrace', + 'Flat: Mid Terrace: Basement': 'mid-terrace', + 'Flat: Enclosed Mid Terrace: Basement': 'enclosed mid-terrace', + 'House: Semi Detached: Top Floor': 'semi-detached', + 'House: End Terrace: Ground Floor': 'end-terrace', + 'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace', + 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace' + } diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index e67fafb4..defce35f 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -17,5 +17,10 @@ EXISTING_PV_MAPPINGS = { 'PV: 10% roof area, PV: 2kWp array': 'already has PV', 'PV: 50% roof area': 'already has PV', 'Solar PV': 'already has PV', - 'SOLAR PV': 'already has PV' + 'SOLAR PV': 'already has PV', + + 'PV: 40% roof area, PV: 2kWp array': 'already has PV', + 'PV: 33% roof area, PV: 2kWp array': 'already has PV', + 'PV: 30% roof area': 'already has PV' + } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index ffd1b198..272d6279 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -494,6 +494,10 @@ HEATING_MAPPINGS = { 'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, ' 'and sealed to, fireplace opening': 'room heaters', 'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel', - 'Boiler: G rated Combi': 'gas condensing combi' + 'Boiler: G rated Combi': 'gas condensing combi', + + 'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler', + 'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators', + 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 1f251598..6f808c9a 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -427,6 +427,18 @@ PROPERTY_MAPPING = { 'End Terrace': 'unknown', 'Detached': 'unknown', 'Mid-terrace': 'unknown', - 'MID - TERRACE': 'unknown' + 'MID - TERRACE': 'unknown', + 'COMOFF': 'unknown', + 'LOTS': 'unknown', + + 'Maisonette: Detached: Ground Floor': 'maisonette', + 'Maisonette: Enclosed End Terrace: Top Floor': 'maisonette', + 'Flat: End Terrace: Basement': 'flat', + 'Bungalow: EnclosedEndTerrace': 'bungalow', + 'Flat: Mid Terrace: Basement': 'flat', + 'House: Semi Detached: Top Floor': 'house', + 'House: End Terrace: Ground Floor': 'house', + 'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette', + 'Flat: Enclosed Mid Terrace: Basement': 'flat' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 0857b046..cf829a5f 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -301,4 +301,13 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'PitchedWithSlopingCeiling: As Built': 'pitched insulated', 'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation', + 'Flat: 150mm, Flat: Unknown': 'flat insulated', + 'AnotherDwellingAbove: Unknown, Flat: Unknown': 'another dwelling above', + 'AnotherDwellingAbove, AnotherDwellingAbove: Unknown': 'another dwelling above', + 'PitchedNormalNoLoftAccess: Unknown, PitchedWithSlopingCeiling: As Built': 'pitched unknown access to loft', + 'Flat: No Insulation': 'flat uninsulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 250mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 175mm': 'pitched insulated', + 'AnotherDwellingAbove: 300mm': 'another dwelling above' + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 418ae9f8..1bb02a9a 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -354,6 +354,15 @@ WALL_CONSTRUCTION_MAPPINGS = { 'System built Internal': 'insulated system built', 'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation', - 'Cavity: FilledCavityPlusExternal': 'filled cavity' + 'Cavity: FilledCavityPlusExternal': 'filled cavity', + + 'Cavity, Filled Cavity': 'filled cavity', + 'Solid Brick, As Built': 'solid brick unknown insulation', + 'Cavity, As Built': 'cavity unknown insulation', + 'Sandstone, As Built': 'sandstone or limestone unknown insulation', + 'Timber Frame, As Built': 'timber frame unknown insulation', + 'Solid Brick, Internal Insulation': 'insulated solid brick', + 'Granite or Whinstone, As Built': 'granite or whinstone unknown insulation', + 'Solid Brick, External': 'insulated solid brick' } diff --git a/backend/onboarders/README.md b/backend/onboarders/README.md new file mode 100644 index 00000000..063fee20 --- /dev/null +++ b/backend/onboarders/README.md @@ -0,0 +1,102 @@ +# Retrofit Property Data Onboarding + +This repository contains an ETL pipeline for transforming raw retrofit property data from external source systems ( +currently Parity) into a standardised internal format, compatible for both address2uprn and engine. + +The pipeline is designed to: + +- Run as an AWS Lambda triggered by SQS +- Read raw CSV/XLSX files from S3 +- Perform rule-based mappings +- Infer as built property attributes, assumed based on age +- Output a processed csv, back to s3 to be consumed by address2uprn + +### Structure + +SQS → Lambda handler → OnboarderFactory → System-specific Onboarder → Mapping → CSV to S3 + +Each source system implements its own **Onboarder**, while sharing a common base and mapping process. + +--- + +### Repository Structure + +onboarders/ +├── `handler.py` # Lambda entrypoint \ +├── `factory.py` # Onboarder factory \ +├── `base.py` # Shared onboarding base class \ +├── `parity.py` # Parity-specific transformation logic \ +├── `mappings/` \ +│ └── `parity/` # Parity domain mappings & classifiers \ +│ ├── `age_band.py` \ +│ ├── `property_type.py` \ +│ ├── `built_form.py` \ +│ ├── `walls.py` \ +│ ├── `roof.py` \ +│ ├── `floor.py` \ +│ ├── `glazing.py` \ +│ ├── `heating.py` \ +│ ├── `as_built_wall_classifiers.py` \ +│ ├── `as_built_roof_classifiers.py` \ +│ └── `as_built_floor_classifiers.py` \ +├── `tests/` \ +├── `requirements.txt` \ +└── `README.md` + + +--- + +### Lambda Entry Point (`handler.py`) + +The Lambda handler: + +1. Consumes SQS queue +2. Validates the payload +3. Instantiates the correct onboarder via `OnboarderFactory` +4. Runs the transformation +5. Writes the transformed CSV back to S3 + +### Expected Event Payload + +```json +{ + "s3_uri": "s3://bucket/path/to/input.xlsx", + "system": "parity", + "format": "xlsx", + "sheet_name": "Sustainability" +} + +``` + +### Onboarder Base `(base.py)` + +OnboarderBase provides shared functionality across all systems. + +*Responsibilities* + +- Reading CSV/XLSX files from S3 +- Writing transformed CSVs to S3 +- Defining canonical output column names +- Providing validation helpers +- Common output - for the moment, onboards will be expected to return a csv + +### Parity Onboarder `(parity.py)` + +`ParityOnboarder` contains all Parity-specific transformation logic. + +Responsibilities* + +- Map raw Parity fields to internal EPC-aligned enums +- Infer “as-built” constructions using age bands when insulation data is missing +- Resolve energy efficiency ratings deterministically +- Normalise output into a fixed schema + +The `transform()` method orchestrates the transformation process. + +### TODOs + +- In `backend/onboarders/mappings/parity/glazing.py` we currently map the partiy descriptions + to duples of descriptions and efficiency ratings. This is okay for the moment but we may consider + using a data class, just given how error-prone this is. +- This is also true for heating mappings in `backend/onboarders/mappings/parity/heating.py` +- Implement a AI-enabled version, to replace the standardised asset list \ No newline at end of file diff --git a/backend/onboarders/__init__.py b/backend/onboarders/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py new file mode 100644 index 00000000..93a0b7b0 --- /dev/null +++ b/backend/onboarders/base.py @@ -0,0 +1,84 @@ +import pandas as pd +from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3 + + +class OnboarderBase: + # Input dataset to be transformed + data: pd.DataFrame | None = None + bucket_name = None + input_file_name = None + output_file_name = None + # Description columns + landlord_wall_construction: str = "landlord_wall_construction" + landlord_roof_construction: str = "landlord_roof_construction" + landlord_floor_construction: str = "landlord_floor_construction" + landlord_windows_type: str = "landlord_windows_type" + landlord_heating_construction: str = "landlord_heating_construction" + landlord_fuel_type: str = "landlord_fuel_type" + landlord_heating_controls: str = "landlord_heating_controls" + landlord_hot_water_system: str = "landlord_hot_water_system" + + # Efficiency columns + landlord_roof_efficiency: str = "landlord_roof_efficiency" + landlord_windows_efficiency: str = "landlord_windows_efficiency" + landlord_heating_controls_efficiency: str = "landlord_heating_controls_efficiency" + landlord_heating_efficiency: str = "landlord_heating_efficiency" + landlord_hot_water_efficiency: str = "landlord_hot_water_efficiency" + landlord_wall_efficiency: str = "landlord_wall_efficiency" + + # Additional windows features + landlord_multi_glaze_proportion: str = "landlord_multi_glaze_proportion" + landlord_glazed_type: str = "landlord_glazed_type" + landlord_glazed_area: str = "landlord_glazed_area" + + # Additional roof features + landlord_has_sloping_ceiling: str = "landlord_has_sloping_ceiling" + + # Shape, dimensions, age + landlord_total_floor_area_m2: str = "landlord_total_floor_area_m2" + landlord_construction_age_band: str = "landlord_construction_age_band" + landlord_property_type: str = "landlord_property_type" + landlord_built_form: str = "landlord_built_form" + + def read_s3(self, file_format, **kwargs): + + if self.input_file_name is None or self.bucket_name is None: + raise ValueError("Bucket name and input file name must be set before reading from S3.") + if file_format == "xlsx": + self.data = read_excel_from_s3( + bucket_name=self.bucket_name, + file_key=self.input_file_name, + sheet_name=kwargs.get("sheet_name"), + header_row=kwargs.get("header_row", 0) + ) + else: + self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name) + + def write(self): + if self.data is None: + raise ValueError("No data to write. Please run transform() before writing.") + + if self.bucket_name is None or self.output_file_name is None: + raise ValueError("Bucket name and output file name must be set before writing to S3.") + # Store file as csv - will store in the same route location as the input file + save_csv_to_s3(dataframe=self.data, bucket_name=self.bucket_name, file_name=self.output_file_name) + + @staticmethod + def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool: + # We only allow nulls if the original value was null + null_vals = data[pd.isnull(data[mapped_column])] + if null_vals.empty: + return True + # We make sure all original values were null + assert pd.isnull(null_vals[original_column]).all(), ( + f"Some values in {mapped_column} were not mapped, but original values were not null" + ) + + @staticmethod + def assert_no_nulls(data: pd.DataFrame, column: str): + assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not" + + def map_construction_age_band(self): + raise NotImplementedError( + "This method should be implemented by subclasses to map construction age bands to descriptions" + ) diff --git a/backend/onboarders/factory.py b/backend/onboarders/factory.py new file mode 100644 index 00000000..2ff7dcbc --- /dev/null +++ b/backend/onboarders/factory.py @@ -0,0 +1,10 @@ +from onboarders.parity import ParityOnboarder + + +class OnboarderFactory: + @staticmethod + def create_onboarder(onboarder_type, **kwargs): + if onboarder_type == "parity": + return ParityOnboarder(**kwargs) + + raise ValueError(f"Unknown onboarder type: {onboarder_type}") diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py new file mode 100644 index 00000000..d66b5796 --- /dev/null +++ b/backend/onboarders/handler.py @@ -0,0 +1,50 @@ +import json +from pydantic import BaseModel, Field +from typing import Optional, Literal +from onboarders.factory import OnboarderFactory +from utils.logger import setup_logger + +logger = setup_logger() + + +class OnboardingEvent(BaseModel): + s3_uri: str = Field(..., description="S3 URI of the raw ARA input file") + system: Literal["parity", "generic"] = Field(..., description="Onboarding system identifier") + format: Literal["csv", "xlsx"] + sheet_name: Optional[str] = None + + +def handler(event, context): + """ + Lambda handler that triggers the model engine for each SQS message. + """ + for record in event.get("Records", []): + try: + event_body = json.loads(record["body"]) + # Sample input data + # event_body = { + # "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for " + # "Domna.xlsx", + # "system": "parity", + # "format": "xlsx", + # "sheet_name": "Sustainability" + # } + + logger.info("Processing record with body: %s", event_body) + + validated_event = OnboardingEvent(**event_body) + onboarder = OnboarderFactory.create_onboarder( + validated_event.system, + fileuri=validated_event.s3_uri, + format=validated_event.format, + sheet_name=validated_event.sheet_name, + file_format=validated_event.format + ) + + logger.info("Transforming data") + onboarder.transform() + logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}") + onboarder.write() + + except Exception as e: + logger.error(f"Failed to process record: {e}") diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py deleted file mode 100644 index 2487c921..00000000 --- a/backend/onboarders/mappings/age_band.py +++ /dev/null @@ -1,14 +0,0 @@ -party_map = { - "Before 1900": 'England and Wales: before 1900', - "1900-1929": 'England and Wales: 1900-1929', - "1930-1949": 'England and Wales: 1930-1949', - "1950-1966": 'England and Wales: 1950-1966', - "1967-1975": 'England and Wales: 1967-1975', - "1976-1982": 'England and Wales: 1976-1982', - "1983-1990": 'England and Wales: 1983-1990', - "1991-1995": 'England and Wales: 1991-1995', - "1996-2002": 'England and Wales: 1996-2002', - "2003-2006": 'England and Wales: 2003-2006', - "2007-2011": 'England and Wales: 2007-2011', - "2012 onwards": 'England and Wales: 2012-2021', -} diff --git a/backend/onboarders/mappings/built_form.py b/backend/onboarders/mappings/built_form.py deleted file mode 100644 index 23901fc6..00000000 --- a/backend/onboarders/mappings/built_form.py +++ /dev/null @@ -1,15 +0,0 @@ -parity_map = { - "MidTerrace": "Mid-Terrace", - "EndTerrace": "End-Terrace", - "Detached": "Detached", - "SemiDetached": "Semi-Detached", - "EnclosedMidTerrace": "Enclosed Mid-Terrace", - "EnclosedEndTerrace": "Enclosed End-Terrace", -} - -# MidTerrace 41462 -# EndTerrace 20910 -# Detached 16875 -# SemiDetached 14725 -# EnclosedMidTerrace 3176 -# EnclosedEndTerrace 2393 diff --git a/backend/onboarders/mappings/parity/age_band.py b/backend/onboarders/mappings/parity/age_band.py new file mode 100644 index 00000000..406d39c1 --- /dev/null +++ b/backend/onboarders/mappings/parity/age_band.py @@ -0,0 +1,19 @@ +from datatypes.epc.construction_age_band import EpcConstructionAgeBand + +parity_map = { + "Before 1900": EpcConstructionAgeBand.before_1900, + "1900-1929": EpcConstructionAgeBand.from_1900_to_1929, + "1930-1949": EpcConstructionAgeBand.from_1930_to_1949, + "1950-1966": EpcConstructionAgeBand.from_1950_to_1966, + "1967-1975": EpcConstructionAgeBand.from_1967_to_1975, + "1976-1982": EpcConstructionAgeBand.from_1976_to_1982, + "1983-1990": EpcConstructionAgeBand.from_1983_to_1990, + "1991-1995": EpcConstructionAgeBand.from_1991_to_1995, + "1996-2002": EpcConstructionAgeBand.from_1996_to_2002, + "2003-2006": EpcConstructionAgeBand.from_2003_to_2006, + "2007-2011": EpcConstructionAgeBand.from_2007_to_2011, + "2012 onwards": EpcConstructionAgeBand.from_2012_onwards, + # Newer age bands, under SAP10 + "2012-2022": EpcConstructionAgeBand.from_2012_to_2022, + "2023 onwards": EpcConstructionAgeBand.from_2023_onwards, +} diff --git a/backend/onboarders/mappings/parity/as_built_floor_classifiers.py b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py new file mode 100644 index 00000000..3af3c079 --- /dev/null +++ b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py @@ -0,0 +1,60 @@ +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.floor import EpcFloorDescriptions + + +def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + + if year >= 1930: + return EpcFloorDescriptions.solid_no_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 1930: + return EpcFloorDescriptions.solid_insulated + + return EpcFloorDescriptions.suspended_insulated + + +def map_solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + return EpcFloorDescriptions.solid_no_insulation_assumed + + +def map_suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.suspended_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.suspended_limited_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +as_built_floor_classifiers = { + "Solid": map_solid_floor_as_built, + "SuspendedTimber": map_suspended_floor_as_built, + "SuspendedNotTimber": map_suspended_floor_as_built, +} + +unknown_as_built_floor_classifiers = { + "RetroFitted": unknown_floor_retrofitted, + "AsBuilt": unknown_floor_as_built, + "Unknown": unknown_floor_as_built, +} diff --git a/backend/onboarders/mappings/parity/as_built_roof_classifiers.py b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py new file mode 100644 index 00000000..fcb554bd --- /dev/null +++ b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py @@ -0,0 +1,56 @@ +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand + + +def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a flat, as built roof, these are the breakdowns: + + 2023 onwards → Flat, insulated + 2003–2022 → Flat, insulated + 1983–2002 → Flat, insulated + 1976–1982 → Flat, limited insulation + 1967–1975 → Flat, limited insulation + 1950–1966 and earlier → Flat, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions + """ + + year = age_band.start_year() + + if year >= 1983: + return EpcRoofDescriptions.flat_insulated + + if year >= 1967: + return EpcRoofDescriptions.flat_limited_insulation + + return EpcRoofDescriptions.flat_no_insulation + + +def map_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a sloping ceiling, as built roof, these are the breakdowns: + 2023 onwards → Sloping pitched, insulated + 2003–2022 → Sloping pitched, insulated + 1983–2002 → Sloping pitched, insulated + 1976–1982 → Sloping pitched, limited insulation + 1967–1975 and earlier → Sloping pitched, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions + """ + year = age_band.start_year() + + if year >= 1983: + return EpcRoofDescriptions.sloping_pitched_insulated + + if year >= 1976: + return EpcRoofDescriptions.sloping_pitched_limited_insulation + + return EpcRoofDescriptions.sloping_pitched_no_insulation + + +as_built_roof_classifiers = { + # Only need to apply this to flat and sloping ceiling roofs + "Flat": map_flat_roof, + "PitchedWithSlopingCeiling": map_sloping_ceiling_roof, +} diff --git a/backend/onboarders/mappings/parity/as_built_wall_classifiers.py b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py new file mode 100644 index 00000000..480a7e24 --- /dev/null +++ b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py @@ -0,0 +1,113 @@ +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions + + +def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.cavity_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.cavity_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cavity_insulated_assumed + + raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping") + + +def map_solid_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.solid_brick_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.solid_brick_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.solid_brick_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for solid wall insulation mapping" + ) + + +def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1950: + return EpcWallDescriptions.timber_frame_no_insulation_assumed + + if age_band.start_year() < 1976: + return EpcWallDescriptions.timber_frame_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1976): + return EpcWallDescriptions.timber_frame_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for timber frame wall insulation mapping" + ) + + +def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.system_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.system_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.system_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for system build wall insulation mapping" + ) + + +def map_granite_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.granite_whinstone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.granite_whinestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for granite wall insulation mapping" + ) + + +def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.sandstone_limestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for sandstone wall insulation mapping" + ) + + +def map_cob_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1983: + return EpcWallDescriptions.cob_as_built_average + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cob_as_built_good + + raise NotImplementedError( + f"Age band {age_band.value} not handled for cob wall insulation mapping" + ) + + +as_built_wall_classifiers = { + "Cavity": map_cavity_wall_insulation, + "Solid Brick": map_solid_wall_insulation, + "Timber Frame": map_timber_frame_wall_insulation, + "System": map_system_build_wall_insulation, + "Granite": map_granite_wall_insulation, + "Sandstone": map_sandstone_wall_insulation, + "Cob": map_cob_wall_insulation, +} diff --git a/backend/onboarders/mappings/parity/built_form.py b/backend/onboarders/mappings/parity/built_form.py new file mode 100644 index 00000000..12ae6360 --- /dev/null +++ b/backend/onboarders/mappings/parity/built_form.py @@ -0,0 +1,10 @@ +from datatypes.epc.property_type_built_form import BuiltForm + +parity_map = { + "MidTerrace": BuiltForm.mid_terrace, + "EndTerrace": BuiltForm.end_terrace, + "Detached": BuiltForm.detached, + "SemiDetached": BuiltForm.semi_detached, + "EnclosedMidTerrace": BuiltForm.enclosed_mid_terrace, + "EnclosedEndTerrace": BuiltForm.enclosed_end_terrace, +} diff --git a/backend/onboarders/mappings/parity/floor.py b/backend/onboarders/mappings/parity/floor.py new file mode 100644 index 00000000..653d4c68 --- /dev/null +++ b/backend/onboarders/mappings/parity/floor.py @@ -0,0 +1,26 @@ +from numpy import nan +from datatypes.epc.floor import EpcFloorDescriptions + +floor_map = { + # Solid floor + ('Solid', 'AsBuilt'): None, # Mapped + ('Solid', 'Unknown'): None, # Mapped + ('Solid', nan): None, # Mapped + ('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated, + + # Suspended floor + ('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + + # Unknown type - mapped on age + ('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built + ('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted + (nan, nan): None, # No actual information! + ('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built +} diff --git a/backend/onboarders/mappings/parity/glazing.py b/backend/onboarders/mappings/parity/glazing.py new file mode 100644 index 00000000..46c006bd --- /dev/null +++ b/backend/onboarders/mappings/parity/glazing.py @@ -0,0 +1,20 @@ +from datatypes.epc.efficiency import EpcEfficiency + +glazing_map = { + # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area + # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more + "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None), + "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None), + # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022 + # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to + # how we make updates to the windows data. + # Triple known data is high performance glazing with Good efficiency (at least) + "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None), + # This is also classed as high performance glazing + "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), + # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good) + "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None), + "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), +} diff --git a/backend/onboarders/mappings/parity/heating.py b/backend/onboarders/mappings/parity/heating.py new file mode 100644 index 00000000..aa74834b --- /dev/null +++ b/backend/onboarders/mappings/parity/heating.py @@ -0,0 +1,330 @@ +from datatypes.epc.main_heating import EpcHeatingSystems +from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.fuel import EpcFuel +from datatypes.epc.heating_controls import EpcHeatingControls +from datatypes.epc.hotwater import EpcHotWaterSystems + +heating_map = { + # 0 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 1 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 2 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 3 + ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 4 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 5 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 6 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 7 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 8 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 9 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 10 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 11 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 12 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 13 + ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 14 + ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 15 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 16 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 17 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 19 + ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 20 + ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 21 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 22 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 23 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 24 + ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 25 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 26 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 27 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 28 + ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 29 + ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 30 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 31 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 32 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 33 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 34 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 35 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 36 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 37 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 38 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 39 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 40 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 41 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 42 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 43 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 44 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 45 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 46 + ('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 47 - water done from here + ('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ) +} diff --git a/backend/onboarders/mappings/parity/property_type.py b/backend/onboarders/mappings/parity/property_type.py new file mode 100644 index 00000000..f91c0c88 --- /dev/null +++ b/backend/onboarders/mappings/parity/property_type.py @@ -0,0 +1,8 @@ +from datatypes.epc.property_type_built_form import PropertyType + +parity_map = { + "Flat": PropertyType.flat, + "Maisonette": PropertyType.maisonette, + "Bungalow": PropertyType.bungalow, + "House": PropertyType.house, +} diff --git a/backend/onboarders/mappings/parity/roof.py b/backend/onboarders/mappings/parity/roof.py new file mode 100644 index 00000000..02518c3e --- /dev/null +++ b/backend/onboarders/mappings/parity/roof.py @@ -0,0 +1,461 @@ +import pandas as pd +from numpy import nan +from typing import Union, Callable +from collections.abc import Mapping +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.construction_age_band import EpcConstructionAgeBand + +roof_map = { + # Dwelling above + ('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + ('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + # Pitched, normal loft access, with a loft thickness + ('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # Pitched, no loft access, with a loft thickness + ('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed + # With access + ('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + # No access + ('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + + # Flat + ('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation, + # Flat - limited insulation + ('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation, + # Flat insulated + ('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated, + # Flat - as built or unknown + ('Flat', 'AsBuilt'): None, # To be classified + ('Flat', nan): None, # To be classified + ('Flat', 'Unknown'): None, # To be classified + + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + + # Thatched + ('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age + + # Sloping: + # Limited (12 very poor, 25-50 poor) + ('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + # Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good) + ('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated, + # As built/unknown + ('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified + ('PitchedWithSlopingCeiling', nan): None, # To be classified + ('PitchedWithSlopingCeiling', 'Unknown'): None, # +} + +roof_unknown_age_fallback = { + "Flat": EpcRoofDescriptions.flat_as_built_unknown, + "PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown, + "PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown, + "PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, + "PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, +} + +RoofEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + + +def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor + 1976-1982 -> Pitched, limited insulation, Poor + 1983-1990, to 1996-2002 Pitched, insulated, Average + 2003 - 2006, 2012-2022 -> Pitched, insulated, Good + 2023 onwards -> Pitched, insulated, Very Good + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + + start_year = age_band.start_year() + if start_year >= 2023: + return EpcEfficiency.VERY_GOOD + + if start_year >= 2003: + return EpcEfficiency.GOOD + + if start_year >= 1983: + return EpcEfficiency.AVERAGE + + if start_year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + 12mm -> Very Poor + 25mm - 50mm -> Poor + 75mm - 125mm -> Pitched, insulated, average + 150mm - 250mm -> good + 270mm+ -> very good + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for flat insulated efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if 150 <= insulation_thickness <= 250: + return EpcEfficiency.GOOD + + if 75 <= insulation_thickness <= 125: + return EpcEfficiency.AVERAGE + + if 25 <= insulation_thickness <= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine flat roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return flat_insulated_efficiency_thickness(insulation_thickness) + + return flat_insulated_efficiency_age_band(age_band) + + +def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + 2023 onwards -> Very Good + 2012-2022 -> Very Good + 2007-2011 -> Very Good + 2003-2006 -> Very Good + 1996-2002 -> Good + 1991-1995 -> Good + 1983-1990 -> Average + 1976-1982 -> Average + 1967-1975 -> Average + 1950-1966 -> Average + 1930-1949 -> Average + 1900-1929 -> Average + before 1900 -> Average + :param age_band: Input age band, EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2003: + return EpcEfficiency.VERY_GOOD + if year >= 1991: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for thatched efficiency calculation") + + if insulation_thickness >= 175: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 25: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine thatched roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return thatched_efficiency_thickness(insulation_thickness) + + return thatched_efficiency_age_band(age_band) + + +def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 150: + return EpcEfficiency.GOOD + + if insulation_thickness >= 75: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 25: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def sloping_ceiling_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine sloping ceiling roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return sloping_ceiling_efficiency_thickness(insulation_thickness) + + return sloping_ceiling_efficiency_age_band(age_band) + + +def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + 400mm, 350mm = very good + 200-300mm = good + 125-175 = average + 50-100 = poor + 25 and below= very poor + :return: + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation") + + if insulation_thickness >= 350: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 200: + return EpcEfficiency.GOOD + + if insulation_thickness >= 125: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + # 2023 onwards -> Very Good + # 2003-2006, 2012-2022 -> Good + # 1983 - 1990, 1996-2002 -> Average + # 1976-1982 -> Poor + # 1967-1975 and earlier bands -> Very Poor + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness) + + return loft_insulated_at_rafters_efficiency_age_band(age_band) + + +ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = { + # Flat roof + EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.flat_limited_insulation: flat_efficiency, + EpcRoofDescriptions.flat_insulated: flat_efficiency, + + # Loft: + # value mappings + EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR, + # function mappings + EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency, + + # Loft af rafters + EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency, + + # Another dwelling above + EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA, + + # Thatched + EpcRoofDescriptions.thatched: thatched_efficiency, + EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency, + + # Sloping ceiling + EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR, + +} + + +def resolve_roof_efficiency( + description: EpcRoofDescriptions, + age_band: EpcConstructionAgeBand | None, + insulation_thickness: int | None, +) -> EpcEfficiency: + """ + Resolve roof efficiency from description + age band + insulation thickness. + """ + + # Unknown / holding descriptions → efficiency unknown + if description in description.unknown_descriptions: + return EpcEfficiency.NA + + rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + # Fixed efficiency + if isinstance(rule, EpcEfficiency): + return rule + + # Callable rule + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + try: + # Try (thickness, age_band) + return rule(insulation_thickness, age_band) + except TypeError: + # Fallback to (age_band) + return rule(age_band) diff --git a/backend/onboarders/mappings/parity/walls.py b/backend/onboarders/mappings/parity/walls.py new file mode 100644 index 00000000..0ad6d6e1 --- /dev/null +++ b/backend/onboarders/mappings/parity/walls.py @@ -0,0 +1,211 @@ +from typing import Callable, Union +from collections.abc import Mapping +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.efficiency import EpcEfficiency + +# Unique combinations +wall_map = { + # Cavity walls + ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity, + ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation, + ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation, + ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal, + ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external, + ('Cavity', 'AsBuilt'): None, # To be classified + ('Cavity', 'Unknown'): None, # To be classified + + # System built walls + ('System', 'External'): EpcWallDescriptions.system_external_insulation, + ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation, + ('System', 'AsBuilt'): None, # To be classified + ('System', 'Unknown'): None, + + # Timber Frame walls + ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation, + ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation, + ('Timber Frame', 'AsBuilt'): None, # To be classified + ('Timber Frame', 'Unknown'): None, + + # Solid Brick walls + ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation, + ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation, + ('Solid Brick', 'AsBuilt'): None, # To be classified + ('Solid Brick', 'Unknown'): None, + + # Granite walls + ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation, + ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation, + ('Granite', 'AsBuilt'): None, + ('Granite', 'Unknown'): None, + + # Sandstone walls + ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation, + ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation, + ('Sandstone', 'Unknown'): None, + ('Sandstone', 'AsBuilt'): None, + + # Cob walls + ('Cob', 'AsBuilt'): None, +} + +wall_unknown_age_fallback = { + "Cavity": EpcWallDescriptions.cavity_as_built_unknown, + "Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown, + "Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown, + "System": EpcWallDescriptions.system_as_built_unknown, + "Granite": EpcWallDescriptions.granite_as_built_unknown, + "Sandstone": EpcWallDescriptions.sandstone_as_built_unknown, + "Cob": EpcWallDescriptions.cob_as_built_unknown, +} + + +def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps cavity filled to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def internal_external_insulation_efficiency( + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Maps: + - cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - solid brick with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - system built with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + + All of these wall types have the same behaviour in elmhurst + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_1983_to_1990, + EpcConstructionAgeBand.from_1991_to_1995, + EpcConstructionAgeBand.from_1996_to_2002, + EpcConstructionAgeBand.from_2003_to_2006, + EpcConstructionAgeBand.from_2007_to_2011, + EpcConstructionAgeBand.from_2012_to_2022, + EpcConstructionAgeBand.from_2023_onwards, + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps: + - timber frame with internal/external wall insulation to efficiency based on construction age band. + - sandstone/limestone with internal/external wall insulation to efficiency based on construction age band. + - granite/whinstone with internal/external wall insulation to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +WallEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + +WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = { + # Note: all function mappings have been defined based on Elmhurst + # Cavity + # value mappings + EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD, + EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD, + EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD, + # function mappings + EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency, + EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency, + + # Solid brick + # value mappings + EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency, + + # System + # value mappings + EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency, + + # Timber frame + # value mappings + EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Granite / whinstone + EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Sandstone / limestone + EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Cob (special case) + EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD, + + # Unknown mappings which are unhandled + EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA, + +} + + +def resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + rule = WALL_DESCRIPTION_EFFICIENCIES[description] + + if isinstance(rule, EpcEfficiency): + return rule + + return rule(age_band) diff --git a/backend/onboarders/mappings/property_type.py b/backend/onboarders/mappings/property_type.py deleted file mode 100644 index 75deef04..00000000 --- a/backend/onboarders/mappings/property_type.py +++ /dev/null @@ -1,6 +0,0 @@ -parity_map = { - "Flat": "Flat", - "Maisonette": "Maisonette", - "Bungalow": "Bungalow", - "House": "House", -} diff --git a/backend/onboarders/mappings/walls.py b/backend/onboarders/mappings/walls.py deleted file mode 100644 index 9b70b49c..00000000 --- a/backend/onboarders/mappings/walls.py +++ /dev/null @@ -1,3 +0,0 @@ -parity_map = { - -} diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 27244777..6c79d027 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,93 +1,371 @@ +import re +from tqdm import tqdm import pandas as pd -from etl.epc.DataProcessor import construction_age_bounds_map -from backend.onboarders.mappings.property_type import parity_map as property_map -from backend.onboarders.mappings.age_band import party_map as age_band_map -from backend.onboarders.mappings.built_form import parity_map as built_form_map - - -def check_nulls(data, original_column, mapped_column): - # We only allow nulls if the oroginal value was null - null_vals = data[pd.isnull(data[mapped_column])] - if null_vals.empty: - return True - # We make sure all original values were null - assert pd.isnull(null_vals[original_column]).all(), ( - f"Some values in {mapped_column} were not mapped, but original values were not null" - ) - - -# Sample input data - -data = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " - "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" +from backend.onboarders.base import OnboarderBase +# Parity mappings +from backend.onboarders.mappings.parity.property_type import parity_map as property_map +from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map +from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map +from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback, WALL_DESCRIPTION_EFFICIENCIES +from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback, resolve_roof_efficiency +from onboarders.mappings.parity.floor import floor_map +from onboarders.mappings.parity.heating import heating_map +from onboarders.mappings.parity.glazing import glazing_map +from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers +from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers +from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( + as_built_floor_classifiers, unknown_as_built_floor_classifiers ) +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.floor import EpcFloorDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.efficiency import EpcEfficiency -# We want to map the parity fields to standard EPC references. This will allow us to -# 1) Estimate EPCs, more accurately -# 2) Patch incorrect EPCs with ease -# 3) Indicate already installed measures - -# ------------ construction_age_band ------------ -# Map to EPC age bands -# def construction_date_to_band(year): -# if pd.isnull(year): -# return None -# # Get the year from the date which is numpy datetime format -# for label, ranges in construction_age_bounds_map.items(): -# if ranges["l"] <= year <= ranges["u"]: -# return label -# raise NotImplementedError("year out of bounds") -# -# -# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) - -data["construction_age_band"] = data["Construction Years"].map(age_band_map) - -check_nulls(data, "Construction Years", "construction_age_band") - -# ------------ property_type ------------ -data["property_type"] = data["Type"].map(property_map) - -assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" - -# ------------ built_form ------------ -data["built_form"] = data["Attachment"].map(built_form_map) - -assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" - -# ------------ Wall Construction ------------ - -data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") - -data["Wall Insulation"].value_counts() -data["Wall Construction"].value_counts() - -as_built_map = { - "Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, -} +tqdm.pandas() -def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): - if wall_insulation == "AsBuilt": - # Deduce based on wall construction and age band - bands = as_built_map.get(wall_constuction, None) - if bands is None: - raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") +class ParityOnboarder(OnboarderBase): - # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated + def __init__( + self, + fileuri: str, + file_format: str, + **kwargs + ): + # Extract bucket, and filekey; Will be in the format s3://bucket/key + self.bucket_name = fileuri.split("/")[2] + self.input_file_name = "/".join(fileuri.split("/")[3:]) + # Also prepare output file name + self.output_file_name = self.input_file_name.replace("." + file_format, "") + "_transformed.csv" -# Variables we want to map -# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', -# 'Attachment', 'Construction Years', 'Wall Construction', -# 'Wall Insulation', 'Roof Construction', 'Roof Insulation', -# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', -# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', -# 'Total Floor Area (m2)' + self.read_s3(file_format=file_format, **kwargs) + pass + + def map_construction_age_band(self): + self.data[self.landlord_construction_age_band] = self.data["Construction Years"].map(age_band_map) + self.assert_nulls_only_from_source_nulls( + self.data, "Construction Years", self.landlord_construction_age_band + ) + + def map_property_type(self): + self.data[self.landlord_property_type] = self.data["Type"].map(property_map) + self.assert_no_nulls(self.data, self.landlord_property_type) + + def map_built_form(self): + self.data[self.landlord_built_form] = self.data["Attachment"].map(built_form_map) + self.assert_no_nulls(self.data, self.landlord_built_form) + + @staticmethod + def _fill_wall_as_built(row: pd.Series) -> EpcWallDescriptions | None: + """ + Utility function, used by map_wall_construction in parity transformation module + :param row: row of input sustainability data, being transformed + :return: EpcWallDescriptions, the as built wall description for the input row, based on the wall construction + type and age band + """ + # Already resolved via direct mapping + if row.landlord_wall_construction is not None: + return row.landlord_wall_construction + + wall_type = row["Wall Construction"] + + # Missing construction age → conservative fallback + if pd.isnull(row.landlord_construction_age_band): + return wall_unknown_age_fallback.get(wall_type) + + classifier = as_built_wall_classifiers.get(wall_type) + if classifier is None: + return None + + return classifier(row.landlord_construction_age_band) + + @staticmethod + def _resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand | None, + ) -> EpcEfficiency: + # Unknown / holding descriptions → efficiency unknown + if "unknown insulation" in description.value.lower(): + return EpcEfficiency.NA + + rule = WALL_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + if isinstance(rule, EpcEfficiency): + return rule + + # Rule needs age band but we don't have one + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + return rule(age_band) + + def map_wall_construction(self): + self.data[self.landlord_wall_construction] = ( + self.data[["Wall Construction", "Wall Insulation"]] + .apply(tuple, axis=1) + .map(wall_map) + ) + + self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_wall_as_built, axis=1) + + # Sanity check + self.assert_no_nulls(self.data, self.landlord_wall_construction) + + self.data[self.landlord_wall_efficiency] = self.data.progress_apply( + lambda row: self._resolve_wall_efficiency( + row.landlord_wall_construction, + row.landlord_construction_age_band, + ), + axis=1, + ) + # Additional santify check + self.assert_no_nulls(self.data, self.landlord_wall_efficiency) + + @staticmethod + def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None: + # Already resolved + if not pd.isnull(row.landlord_roof_construction): + return row.landlord_roof_construction + + roof_type = row["Roof Construction"] + + classifier = as_built_roof_classifiers.get(roof_type) + if classifier is None: + raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") + + if pd.isnull(row.landlord_construction_age_band): + return roof_unknown_age_fallback.get(roof_type) + + output = classifier(row.landlord_construction_age_band) + if output is None: + raise NotImplementedError( + f"Roof classification returned None for roof type '{roof_type}'" + ) + + return output + + @staticmethod + def _extract_insulation_thickness(value: str | None) -> int | None: + """ + Extract insulation thickness in mm from a string like 'mm150'. + Returns None if not present or not parseable. + """ + if value is None or pd.isnull(value): + return None + + match = re.search(r"(\d+)", str(value)) + if not match: + return None + + return int(match.group(1)) + + def map_roof_construction(self): + self.data[self.landlord_roof_construction] = ( + self.data[["Roof Construction", "Roof Insulation"]] + .progress_apply(tuple, axis=1) + .map(roof_map) + ) + + self.data[self.landlord_roof_construction] = self.data.progress_apply( + self._fill_roof_as_built, + axis=1, + ) + + # sanity check + self.assert_no_nulls(self.data, self.landlord_roof_construction) + + self.data["roof_insulation_thickness_mm"] = self.data["Roof Insulation"].apply( + self._extract_insulation_thickness + ) + + self.data[self.landlord_roof_efficiency] = self.data.progress_apply( + lambda row: resolve_roof_efficiency( + description=row.landlord_roof_construction, + age_band=row.landlord_construction_age_band, + insulation_thickness=row.roof_insulation_thickness_mm, + ), + axis=1, + ) + # sanity check + self.assert_no_nulls(self.data, self.landlord_roof_efficiency) + + # Flag sloping ceiling + self.data[self.landlord_has_sloping_ceiling] = self.data["Roof Construction"].apply( + lambda x: x == "PitchedWithSlopingCeiling" + ) + + @staticmethod + def _fill_floor_as_built(row: pd.Series): + # 1. Already resolved + if row.landlord_floor_construction is not None: + return row.landlord_floor_construction + + age_band = row.landlord_construction_age_band + floor_type = row["Floor Construction"] + insulation = row["Floor Insulation"] + + # 2. Missing age band → conservative fallback + if pd.isnull(age_band): + return EpcFloorDescriptions.unknown + + # 3. Known floor types + if floor_type in ["Solid", "SuspendedTimber", "SuspendedNotTimber"]: + classifier = as_built_floor_classifiers[floor_type] + return classifier(age_band) + + # 4. Unknown floor type + if floor_type == "Unknown": + classifier = unknown_as_built_floor_classifiers[insulation] + return classifier(age_band) + + # 5. Truly missing / garbage input + return EpcFloorDescriptions.unknown + + def map_floor_construction(self): + self.data[self.landlord_floor_construction] = ( + self.data[["Floor Construction", "Floor Insulation"]] + .progress_apply(tuple, axis=1) + .map(floor_map) + ) + + self.data[self.landlord_floor_construction] = self.data.progress_apply( + self._fill_floor_as_built, + axis=1, + ) + + self.assert_no_nulls(self.data, self.landlord_floor_construction) + + def map_glazing(self): + # TODO: probably doesn't make sense to store multi glazed proportion, glazed type or glazed area. + # There is maybe an argument for landlord_multi_glaze_proportion as this could be variable, + # however + self.data[ + [ + self.landlord_windows_type, + self.landlord_windows_efficiency, + self.landlord_multi_glaze_proportion, + self.landlord_glazed_type, + self.landlord_glazed_area + ] + ] = self.data["Glazing"].map(glazing_map).progress_apply(pd.Series) + + def map_heating(self): + # TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating + # For sub optimal heating controls, we're going to make an assumption as to what the heating controls are + # and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an + # upper limit + # as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating + # controls. E.g. it may be programmer and room thermostat + self.data[ + [ + self.landlord_heating_construction, + self.landlord_heating_efficiency, + self.landlord_fuel_type, + self.landlord_heating_controls, + self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system, + self.landlord_hot_water_efficiency + ] + ] = self.data[ + [ + "Heating", + "Boiler Efficiency", + "Main Fuel", + "Controls Adequacy" + ] + ].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series) + + def map_floor_area(self): + # This is just a rename + self.data = self.data.rename( + columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2} + ) + + def select_columns(self): + self.data = self.data[ + [ + "Org Ref", + "UPRN", + "Address 1", + "Address 2", + "Address 3", + "Postcode", + self.landlord_total_floor_area_m2, + self.landlord_construction_age_band, + self.landlord_property_type, + self.landlord_built_form, + self.landlord_wall_construction, + self.landlord_wall_efficiency, + self.landlord_roof_construction, + self.landlord_roof_efficiency, + self.landlord_has_sloping_ceiling, + self.landlord_floor_construction, + self.landlord_windows_type, + self.landlord_windows_efficiency, + self.landlord_multi_glaze_proportion, + self.landlord_glazed_type, + self.landlord_glazed_area, + self.landlord_heating_construction, + self.landlord_heating_efficiency, + self.landlord_fuel_type, + self.landlord_heating_controls, + self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system, + self.landlord_hot_water_efficiency + ] + ].rename( + columns={ + "Org Ref": "landlord_property_id", + "Address1": "address1", + "Address2": "address2", + "Address3": "address3", + "Postcode": "postcode", + } + ) + + def extract_values(self): + for columns in [ + self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form, + self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction, + self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type, + self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency, + self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system, self.landlord_hot_water_efficiency + ]: + self.data[columns] = self.data[columns].progress_apply(lambda x: x.value if hasattr(x, "value") else x) + + def transform(self): + # ------------ construction_age_band ------------ + self.map_construction_age_band() + + # ------------ property_type ------------ + self.map_property_type() + + # ------------ built_form ------------ + self.map_built_form() + + # ------------ Wall Construction ------------ + self.map_wall_construction() + + # ------------ Roof Construction ------------ + self.map_roof_construction() + + # ------------ Floor Construction ------------ + self.map_floor_construction() + + # ------------ Glazing ------------ + self.map_glazing() + + # ------------ Heating, fuel, controls & hot water ------------ + self.map_heating() + + # ------------ Floor Area ------------ + self.map_floor_area() + + # ------------ Formating ------------ + self.select_columns() + self.extract_values() diff --git a/backend/onboarders/requirements.txt b/backend/onboarders/requirements.txt new file mode 100644 index 00000000..907cb877 --- /dev/null +++ b/backend/onboarders/requirements.txt @@ -0,0 +1,6 @@ +boto3 +numpy==2.1.2 +pandas==2.2.3 +tqdm==4.66.5 +pydantic==2.9.2 +openpyxl==3.1.2 \ No newline at end of file diff --git a/backend/onboarders/tests/test_floor_remapping.py b/backend/onboarders/tests/test_floor_remapping.py new file mode 100644 index 00000000..c20372b7 --- /dev/null +++ b/backend/onboarders/tests/test_floor_remapping.py @@ -0,0 +1,97 @@ +import pytest + +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.floor import EpcFloorDescriptions + +from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( + unknown_floor_as_built, + unknown_floor_retrofitted, + map_solid_floor_as_built, + map_suspended_floor_as_built, +) + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Before 1900 / 1900–1929 → suspended, no insulation + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1930–1995 → solid, no insulation + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → solid, limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → solid, insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_unknown_floor_as_built(age_band, expected): + assert unknown_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Pre-1930 → suspended, insulated + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated), + + # 1930+ → solid, insulated + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated), + ], +) +def test_unknown_floor_retrofitted(age_band, expected): + assert unknown_floor_retrofitted(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_solid_floor_as_built(age_band, expected): + assert map_solid_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed), + ], +) +def test_suspended_floor_as_built(age_band, expected): + assert map_suspended_floor_as_built(age_band) == expected diff --git a/backend/onboarders/tests/test_roof_remapping.py b/backend/onboarders/tests/test_roof_remapping.py new file mode 100644 index 00000000..cc19e057 --- /dev/null +++ b/backend/onboarders/tests/test_roof_remapping.py @@ -0,0 +1,173 @@ +import pytest + +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.efficiency import EpcEfficiency + +from backend.onboarders.mappings.parity.as_built_roof_classifiers import ( + map_flat_roof, + map_sloping_ceiling_roof, +) +from backend.onboarders.mappings.parity.roof import resolve_roof_efficiency + + +# --------------------------------------------------------------------- +# As-built roof description classification +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.flat_no_insulation), + (EpcConstructionAgeBand.from_1950_to_1966, EpcRoofDescriptions.flat_no_insulation), + (EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.flat_limited_insulation), + (EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.flat_limited_insulation), + (EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.flat_insulated), + (EpcConstructionAgeBand.from_2007_to_2011, EpcRoofDescriptions.flat_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.flat_insulated), + ], +) +def test_classify_flat_roof(age_band, expected): + assert map_flat_roof(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.sloping_pitched_no_insulation), + (EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.sloping_pitched_no_insulation), + (EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.sloping_pitched_limited_insulation), + (EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.sloping_pitched_insulated), + (EpcConstructionAgeBand.from_2012_to_2022, EpcRoofDescriptions.sloping_pitched_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.sloping_pitched_insulated), + ], +) +def test_classify_sloping_ceiling_roof(age_band, expected): + assert map_sloping_ceiling_roof(age_band) == expected + + +# --------------------------------------------------------------------- +# Roof efficiency — fixed & age-band driven +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + # Flat roof, no insulation + (EpcRoofDescriptions.flat_no_insulation, EpcConstructionAgeBand.before_1900, EpcEfficiency.VERY_POOR), + + # Flat roof, limited insulation (age-band driven) + (EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1976_to_1982, EpcEfficiency.POOR), + ( + EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1967_to_1975, + EpcEfficiency.VERY_POOR), + + # Flat roof, insulated (age-band driven) + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_1983_to_1990, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD), + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD), + + # Pitched, insulated assumed (loft) + (EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_1996_to_2002, EpcEfficiency.GOOD), + (EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_2007_to_2011, + EpcEfficiency.VERY_GOOD), + ], +) +def test_roof_efficiency_age_band_only(description, age_band, expected): + assert resolve_roof_efficiency( + description=description, + age_band=age_band, + insulation_thickness=None, + ) == expected + + +# --------------------------------------------------------------------- +# Roof efficiency — insulation thickness driven +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, thickness, expected", + [ + # Loft insulation + (EpcRoofDescriptions.loft_12mm_insulation, 12, EpcEfficiency.VERY_POOR), + (EpcRoofDescriptions.loft_25mm_insulation, 25, EpcEfficiency.POOR), + (EpcRoofDescriptions.loft_75mm_insulation, 75, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.loft_150mm_insulation, 150, EpcEfficiency.GOOD), + (EpcRoofDescriptions.loft_300mm_insulation, 300, EpcEfficiency.VERY_GOOD), + + # Flat insulated — thickness overrides age band + (EpcRoofDescriptions.flat_insulated, 50, EpcEfficiency.POOR), + (EpcRoofDescriptions.flat_insulated, 100, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.flat_insulated, 200, EpcEfficiency.GOOD), + (EpcRoofDescriptions.flat_insulated, 300, EpcEfficiency.VERY_GOOD), + + # Sloping ceiling + (EpcRoofDescriptions.sloping_pitched_insulated, 75, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.sloping_pitched_insulated, 150, EpcEfficiency.GOOD), + (EpcRoofDescriptions.sloping_pitched_insulated, 350, EpcEfficiency.VERY_GOOD), + ], +) +def test_roof_efficiency_thickness_based(description, thickness, expected): + assert resolve_roof_efficiency( + description=description, + age_band=EpcConstructionAgeBand.before_1900, # should be ignored + insulation_thickness=thickness, + ) == expected + + +# --------------------------------------------------------------------- +# Thatched roofs +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.before_1900, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD), + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD), + ], +) +def test_thatched_efficiency_age_band(description, age_band, expected): + assert resolve_roof_efficiency( + description=description, + age_band=age_band, + insulation_thickness=None, + ) == expected + + +@pytest.mark.parametrize( + "thickness, expected", + [ + (12, EpcEfficiency.AVERAGE), + (50, EpcEfficiency.GOOD), + (150, EpcEfficiency.GOOD), + (200, EpcEfficiency.VERY_GOOD), + ], +) +def test_thatched_efficiency_thickness(thickness, expected): + assert resolve_roof_efficiency( + description=EpcRoofDescriptions.thatched_with_additional_insulation, + age_band=EpcConstructionAgeBand.before_1900, + insulation_thickness=thickness, + ) == expected + + +# --------------------------------------------------------------------- +# Unknown / holding descriptions +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description", + [ + EpcRoofDescriptions.flat_as_built_unknown, + EpcRoofDescriptions.loft_as_built_unknown, + EpcRoofDescriptions.thatched_as_built_unknown, + EpcRoofDescriptions.sloping_pitched_as_built_unknown, + ], +) +def test_unknown_roof_descriptions_return_na(description): + assert resolve_roof_efficiency( + description=description, + age_band=None, + insulation_thickness=None, + ) == EpcEfficiency.NA diff --git a/backend/onboarders/tests/test_wall_remapping.py b/backend/onboarders/tests/test_wall_remapping.py new file mode 100644 index 00000000..c9476211 --- /dev/null +++ b/backend/onboarders/tests/test_wall_remapping.py @@ -0,0 +1,161 @@ +import pytest + +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.efficiency import EpcEfficiency + +from backend.onboarders.mappings.parity.walls import resolve_wall_efficiency +from backend.onboarders.mappings.parity.as_built_wall_classifiers import ( + map_cavity_wall_insulation, + map_solid_wall_insulation, + map_timber_frame_wall_insulation, + map_system_build_wall_insulation, + map_granite_wall_insulation, + map_sandstone_wall_insulation, + map_cob_wall_insulation, +) + + +# --------------------------------------------------------------------- +# As-built wall description classification +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cavity_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.cavity_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cavity_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cavity_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcWallDescriptions.cavity_insulated_assumed), + ], +) +def test_map_cavity_wall_insulation(age_band, expected): + assert map_cavity_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.solid_brick_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.solid_brick_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1996_to_2002, EpcWallDescriptions.solid_brick_insulated_assumed), + ], +) +def test_map_solid_wall_insulation(age_band, expected): + assert map_solid_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.timber_frame_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.timber_frame_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.timber_frame_insulated_assumed), + ], +) +def test_map_timber_frame_wall_insulation(age_band, expected): + assert map_timber_frame_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.system_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.system_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2003_to_2006, EpcWallDescriptions.system_insulated_assumed), + ], +) +def test_map_system_wall_insulation(age_band, expected): + assert map_system_build_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.granite_whinstone_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.granite_whinstone_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcWallDescriptions.granite_whinestone_insulated_assumed), + ], +) +def test_map_granite_wall_insulation(age_band, expected): + assert map_granite_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.sandstone_limestone_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2007_to_2011, EpcWallDescriptions.sandstone_limestone_insulated_assumed), + ], +) +def test_map_sandstone_wall_insulation(age_band, expected): + assert map_sandstone_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cob_as_built_average), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cob_as_built_average), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cob_as_built_good), + ], +) +def test_map_cob_wall_insulation(age_band, expected): + assert map_cob_wall_insulation(age_band) == expected + + +# --------------------------------------------------------------------- +# Wall efficiency resolution +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + # Fixed efficiencies + (EpcWallDescriptions.cavity_no_insulation_assumed, None, EpcEfficiency.POOR), + (EpcWallDescriptions.cavity_partial_insulated_assumed, None, EpcEfficiency.AVERAGE), + (EpcWallDescriptions.cavity_insulated_assumed, None, EpcEfficiency.GOOD), + + # Function-based efficiencies + ( + EpcWallDescriptions.cavity_filled_cavity, + EpcConstructionAgeBand.from_2023_onwards, + EpcEfficiency.VERY_GOOD, + ), + ( + EpcWallDescriptions.cavity_filled_cavity, + EpcConstructionAgeBand.from_1991_to_1995, + EpcEfficiency.GOOD, + ), + ( + EpcWallDescriptions.solid_brick_internal_insulation, + EpcConstructionAgeBand.from_2003_to_2006, + EpcEfficiency.VERY_GOOD, + ), + ( + EpcWallDescriptions.solid_brick_internal_insulation, + EpcConstructionAgeBand.from_1950_to_1966, + EpcEfficiency.GOOD, + ), + ], +) +def test_resolve_wall_efficiency(description, age_band, expected): + assert resolve_wall_efficiency(description, age_band) == expected + + +@pytest.mark.parametrize( + "description", + [ + EpcWallDescriptions.cavity_as_built_unknown, + EpcWallDescriptions.solid_brick_as_built_unknown, + EpcWallDescriptions.system_as_built_unknown, + EpcWallDescriptions.timber_frame_as_built_unknown, + EpcWallDescriptions.granite_as_built_unknown, + EpcWallDescriptions.sandstone_as_built_unknown, + EpcWallDescriptions.cob_as_built_unknown, + ], +) +def test_unknown_wall_descriptions_return_na(description): + assert resolve_wall_efficiency(description, None) == EpcEfficiency.NA diff --git a/datatypes/epc/__init__.py b/datatypes/epc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/datatypes/epc/construction_age_band.py b/datatypes/epc/construction_age_band.py new file mode 100644 index 00000000..c5e7a03b --- /dev/null +++ b/datatypes/epc/construction_age_band.py @@ -0,0 +1,45 @@ +import re +from enum import Enum +from typing import List + + +class EpcConstructionAgeBand(Enum): + before_1900: str = 'England and Wales: before 1900' + from_1900_to_1929: str = 'England and Wales: 1900-1929' + from_1930_to_1949: str = 'England and Wales: 1930-1949' + from_1950_to_1966: str = 'England and Wales: 1950-1966' + from_1967_to_1975: str = 'England and Wales: 1967-1975' + from_1976_to_1982: str = 'England and Wales: 1976-1982' + from_1983_to_1990: str = 'England and Wales: 1983-1990' + from_1991_to_1995: str = 'England and Wales: 1991-1995' + from_1996_to_2002: str = 'England and Wales: 1996-2002' + from_2003_to_2006: str = 'England and Wales: 2003-2006' + from_2007_to_2011: str = 'England and Wales: 2007-2011' + from_2012_onwards: str = 'England and Wales: 2012-onwards' + from_2012_to_2022: str = 'England and Wales: 2012-2022' + from_2023_onwards: str = 'England and Wales: 2023 onwards' + + def start_year(self) -> int: + """ + Extract the starting year of the age band. + """ + value = self.value.lower() + + if 'before' in value: + return 0 + match = re.search(r'(\d{4})', value) + if not match: + raise ValueError(f"Cannot determine start year from '{self.value}'") + + return int(match.group(1)) + + @classmethod + def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]: + """ + Return all age bands whose starting year is >= the given year. + """ + return [ + band + for band in cls + if band.start_year() >= year + ] diff --git a/datatypes/epc/efficiency.py b/datatypes/epc/efficiency.py new file mode 100644 index 00000000..0417f49e --- /dev/null +++ b/datatypes/epc/efficiency.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class EpcEfficiency(Enum): + VERY_POOR: str = "Very Poor" + POOR: str = "Poor" + AVERAGE: str = "Average" + GOOD: str = "Good" + VERY_GOOD: str = "Very Good" + NA: str = "N/A" diff --git a/datatypes/epc/floor.py b/datatypes/epc/floor.py new file mode 100644 index 00000000..41786101 --- /dev/null +++ b/datatypes/epc/floor.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class EpcFloorDescriptions(Enum): + # Solid floor + solid_insulated = "Solid, insulated" + solid_insulated_assumed = "Solid, insulated (assumed)" + solid_no_insulation_assumed = "Solid, no insulation (assumed)" + solid_limited_insulation_assumed = "Solid, limited insulation (assumed)" + + # Suspended floor + suspended_insulated = "Suspended, insulated" + suspended_insulated_assumed = "Suspended, insulated (assumed)" + suspended_no_insulation_assumed = "Suspended, no insulation (assumed)" + suspended_limited_insulation_assumed = "Suspended, limited insulation (assumed)" + + unknown = None # We don't resolve anything diff --git a/datatypes/epc/fuel.py b/datatypes/epc/fuel.py new file mode 100644 index 00000000..0d1e455c --- /dev/null +++ b/datatypes/epc/fuel.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class EpcFuel(Enum): + electricity_not_community = "electricity (not community)" + lpg_not_community = "LPG (not community)" + mains_gas_not_community = "mains gas (not community)" + oil_not_community = "oil (not community)" + manufactured_smokeless_fuel = "Solid fuel: manufactured smokeless fuel" + smokeless_coal = "smokeless coal" diff --git a/datatypes/epc/heating_controls.py b/datatypes/epc/heating_controls.py new file mode 100644 index 00000000..48538bff --- /dev/null +++ b/datatypes/epc/heating_controls.py @@ -0,0 +1,18 @@ +from enum import Enum + + +class EpcHeatingControls(Enum): + programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs" + programmers_trvs_bypass = "Programmer, TRVs and bypass" + time_and_temperature_zone_control = "Time and temperature zone control" + + # Room heaters + programmer_and_appliance_thermostats = "Programmer and appliance thermostats" + appliance_thermostats = "Appliance thermostats" + + # Storage heaters + automatic_charge_control = "Automatic charge control" + manual_charge_control = "Manual charge control" + + # Warm air + programmer_and_atleast_two_room_thermostats = "Programmer and at least two room thermostats" diff --git a/datatypes/epc/hotwater.py b/datatypes/epc/hotwater.py new file mode 100644 index 00000000..96af2be3 --- /dev/null +++ b/datatypes/epc/hotwater.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class EpcHotWaterSystems(Enum): + # from primary heating system + from_main_system = "From main system" + # Common for heater-based systems, e.g. room heaters or storage heaters + electric_immersion_off_peak = "Electric immersion, off-peak" diff --git a/datatypes/epc/main_heating.py b/datatypes/epc/main_heating.py new file mode 100644 index 00000000..663ada99 --- /dev/null +++ b/datatypes/epc/main_heating.py @@ -0,0 +1,24 @@ +from enum import Enum + + +class EpcHeatingSystems(Enum): + # boiler and radiators + boiler_and_radiators_electric = "Boiler and radiators, electric" + boiler_and_radiators_lpg = "Boiler and radiators, LPG" + boiler_radiators_mains_gas = "Boiler and radiators, mains gas" + boiler_radiators_oil = "Boiler and radiators, oil" + # underfloor + electric_underfloor_heating = "Electric underfloor heating" + # ashp + air_to_air_ashp = "Air source heat pump, warm air, electric" + ashp_radiators_electric = "Air source heat pump, radiators, electric" + # Room heaters + room_heaters_electric = "Room heaters, electric" + room_heaters_mains_gas = "Room heaters, mains gas" + room_heaters_smokeless_fuel = "Room heaters, smokeless fuel" + room_heaters_coal = "Room heaters, coal" + # Storage heaters + electric_storage_heaters = "Electric storage heaters" + # Warm air + warm_air_electricaire = "Warm air, Electricaire" + warm_air_mains_gas = "Warm air, mains gas" diff --git a/datatypes/epc/property_type_built_form.py b/datatypes/epc/property_type_built_form.py new file mode 100644 index 00000000..2fd59ddf --- /dev/null +++ b/datatypes/epc/property_type_built_form.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class PropertyType(Enum): + flat = "Flat" + maisonette = "Maisonette" + bungalow = "Bungalow" + house = "House" + + +class BuiltForm(Enum): + mid_terrace = "Mid-Terrace" + end_terrace = "End-Terrace" + detached = "Detached" + semi_detached = "Semi-Detached" + enclosed_mid_terrace = "Enclosed Mid-Terrace" + enclosed_end_terrace = "Enclosed End-Terrace" diff --git a/datatypes/epc/roof.py b/datatypes/epc/roof.py new file mode 100644 index 00000000..9cdaac96 --- /dev/null +++ b/datatypes/epc/roof.py @@ -0,0 +1,86 @@ +from enum import Enum +from typing import List + + +class EpcRoofDescriptions(Enum): + # Loft + # Assumed options + pitched_insulated_assumed: str = "Pitched, insulated (assumed)" + pitched_no_insulation: str = "Pitched, no insulation" + # Insulation thickness options + loft_12mm_insulation: str = "Pitched, 12 mm loft insulation" + loft_25mm_insulation: str = "Pitched, 25 mm loft insulation" + loft_50mm_insulation: str = "Pitched, 50 mm loft insulation" + loft_75mm_insulation: str = "Pitched, 75 mm loft insulation" + loft_100mm_insulation: str = "Pitched, 100 mm loft insulation" + loft_125mm_insulation: str = "Pitched, 125 mm loft insulation" + loft_150mm_insulation: str = "Pitched, 150 mm loft insulation" + loft_175mm_insulation: str = "Pitched, 175 mm loft insulation" + loft_200mm_insulation: str = "Pitched, 200 mm loft insulation" + loft_250mm_insulation: str = "Pitched, 250 mm loft insulation" + loft_270mm_insulation: str = "Pitched, 270 mm loft insulation" + loft_300mm_insulation: str = "Pitched, 300 mm loft insulation" + loft_350mm_insulation: str = "Pitched, 350 mm loft insulation" + loft_400mm_plus_insulation: str = "Pitched, 400+ mm loft insulation" + # Insulated at rafters "Pitched, insulated at rafters" + # Rafters + # 400mm, 350mm = very good + # 200-300mm = good + # 125-175 = average + # 50-100 = poor + # 25 and below= very poor + loft_insulated_at_rafters: str = "Pitched, insulated at rafters" + # another dwelling above + another_dwelling_above: str = "(another dwelling above)" + # flat roof, which if there is observed insulation is just "flat, insulated", however there is a + # different efficiency rating depending on insulation thickness + # categories: + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + # As built 2023 = Flat, insulated, Very good + # 2003 - 2006, up to 2012-2022 = Flat insulated, Good + # 1983-1990, 1996-2002 = Flat, insulated, Average + # 1976-1982 = Flat, limited insulation, poor + # 1967 - 1975 = Flat, limited insulation, Very Poor + # 1950-1966 and earlier bands = flat, no insulation, very poor + + flat_insulated: str = "Flat, insulated" + flat_limited_insulation: str = "Flat, limited insulation" + flat_no_insulation: str = "Flat, no insulation" + + # Thatched roof descriptions + # With Loft insulation at joists + # Thatched + 12mm = thatched, with additional insulation, average + # Thatched + 25, 50, 100, 150mm = thatched, with additional insulation, good + # Thatched + 175mm+ = thatched, with additional insulation, very good + # With loft insulation at rafters [out of scope atm] + # Unknown insulation + # Pre 1900, 1930-1949, 1967-1975, 1983-1990, 1996-2002 = "Thatched", Average + # 2003-2006, 2012-2022 = "Thatched", Good + # 2023 onwards = "Thatched", Very Good + thatched: str = "Thatched" # We see this for no insulation, has average performance + thatched_with_additional_insulation: str = "Thatched, with additional insulation" + + # Sloping ceiling + # For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped + sloping_pitched_no_insulation: str = "Pitched, no insulation" + sloping_pitched_limited_insulation: str = "Pitched, limited insulation" + sloping_pitched_insulated: str = "Pitched, insulated" + + # Unknown descriptions which may get mapped later or handled via fallback + flat_as_built_unknown: str = "Flat, as built, unknown insulation" + loft_as_built_unknown: str = "Loft, as built, unknown insulation" + thatched_as_built_unknown: str = "Thatched, as built, unknown insulation" + sloping_pitched_as_built_unknown: str = "Pitched, as built, unknown insulation" + + @property + def unknown_descriptions(self) -> List["EpcRoofDescriptions"]: + return [ + EpcRoofDescriptions.flat_as_built_unknown, + EpcRoofDescriptions.loft_as_built_unknown, + EpcRoofDescriptions.thatched_as_built_unknown, + EpcRoofDescriptions.sloping_pitched_as_built_unknown, + ] diff --git a/datatypes/epc/walls.py b/datatypes/epc/walls.py new file mode 100644 index 00000000..44ca7e49 --- /dev/null +++ b/datatypes/epc/walls.py @@ -0,0 +1,74 @@ +from enum import Enum +from typing import List + + +class EpcWallDescriptions(Enum): + # Cavity wall descriptions + cavity_insulated_assumed: str = "Cavity wall, as built, insulated (assumed)" + cavity_partial_insulated_assumed: str = "Cavity wall, as built, partial insulation (assumed)" + cavity_no_insulation_assumed: str = "Cavity wall, as built, no insulation (assumed)" + cavity_filled_cavity: str = "Cavity wall, filled cavity" + cavity_internal_insulation: str = "Cavity wall, with internal insulation" + cavity_external_insulation: str = "Cavity wall, with external insulation" + cavity_filled_plus_internal: str = "Cavity wall, filled cavity and internal insulation" + cavity_filled_plus_external: str = "Cavity wall, filled cavity and external insulation" + + # Solid wall descriptions + solid_brick_internal_insulation: str = "Solid brick, with internal insulation" + solid_brick_external_insulation: str = "Solid brick, with external insulation" + solid_brick_no_insulation_assumed: str = 'Solid brick, as built, no insulation (assumed)' + solid_brick_partial_insulated_assumed: str = 'Solid brick, as built, partial insulation (assumed)' + solid_brick_insulated_assumed: str = 'Solid brick, as built, insulated (assumed)' + + # System + system_external_insulation: str = "System built, with external insulation" + system_internal_insulation: str = "System built, with internal insulation" + system_no_insulation_assumed: str = "System built, as built, no insulation (assumed)" + system_partial_insulated_assumed: str = "System built, as built, partial insulation (assumed)" + system_insulated_assumed: str = "System built, as built, insulated (assumed)" + + # Timber + timber_frame_internal_insulation: str = "Timber frame, with internal insulation" + timber_frame_external_insulation: str = "Timber frame, with external insulation" + timber_frame_no_insulation_assumed: str = "Timber frame, as built, no insulation (assumed)" + timber_frame_partial_insulated_assumed: str = "Timber frame, as built, partial insulation (assumed)" + timber_frame_insulated_assumed: str = "Timber frame, as built, insulated (assumed)" + + # Granite/whinstone + granite_whinstone_external_insulation: str = "Granite or whin, with external insulation" + granite_whinstone_internal_insulation: str = "Granite or whin, with internal insulation" + granite_whinstone_no_insulation_assumed: str = "Granite or whin, as built, no insulation (assumed)" + granite_whinstone_partial_insulated_assumed: str = "Granite or whin, as built, partial insulation (assumed)" + granite_whinestone_insulated_assumed: str = "Granite or whin, as built, insulated (assumed)" + + # Sandstone/limestone + sandstone_limestone_internal_insulation: str = "Sandstone, with internal insulation" + sandstone_limestone_external_insulation: str = "Sandstone, with external insulation" + sandstone_limestone_no_insulation_assumed: str = "Sandstone, as built, no insulation (assumed)" + sandstone_limestone_partial_insulated_assumed: str = "Sandstone, as built, partial insulation (assumed)" + sandstone_limestone_insulated_assumed: str = "Sandstone, as built, insulated (assumed)" + + # Cob + cob_as_built_average: str = "Cob, as built" + cob_as_built_good: str = "Cob, as built" + + # unknown descriptions which may get mapped later or handled via fallback + cavity_as_built_unknown: str = "Cavity wall, as built, unknown insulation" + solid_brick_as_built_unknown: str = "Solid brick, as built, unknown insulation" + system_as_built_unknown: str = "System built, as built, unknown insulation" + timber_frame_as_built_unknown: str = "Timber frame, as built, unknown insulation" + granite_as_built_unknown: str = "Granite or whin, as built, unknown insulation" + sandstone_as_built_unknown: str = "Sandstone, as built, unknown insulation" + cob_as_built_unknown: str = "Cob, as built, unknown insulation" + + @property + def unknown_descriptions(self) -> List["EpcWallDescriptions"]: + return [ + EpcWallDescriptions.cavity_as_built_unknown, + EpcWallDescriptions.solid_brick_as_built_unknown, + EpcWallDescriptions.system_as_built_unknown, + EpcWallDescriptions.timber_frame_as_built_unknown, + EpcWallDescriptions.granite_as_built_unknown, + EpcWallDescriptions.sandstone_as_built_unknown, + EpcWallDescriptions.cob_as_built_unknown, + ] diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 3ba78ef3..a19c4e21 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -84,7 +84,7 @@ resource "aws_db_instance" "default" { # Temporary to enfore immediate change apply_immediately = true # Set up storage type to gp3 for better performance - storage_type = "gp3" + storage_type = "gp3" } # Set up the bucket that recieve the csv uploads of epc to be retrofit diff --git a/pytest.ini b/pytest.ini index 0a0bbf73..ee203d46 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests