From 54a57363f8a05ffd8ba61c7922369aa756b69313 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Jun 2026 00:31:16 +0000 Subject: [PATCH] feat(epc-prediction): cohort-mode the roof/floor/insulation/age categoricals (ADR-0029) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only main wall_construction was set to the cohort mode; the other homogeneous categoricals (wall insulation, construction age band, roof construction, floor construction) were left as template-copied, so one median-size template's quirks set them. Apply the same cohort-mode mechanism to all of them per ADR-0029 decision 4 — the template still supplies geometry, only the categorical codes move to the mode. Verified mode beats (or ties) template-copy per categorical before applying. Smoke corpus (29 leave-one-out) classification rates: construction_age_band 55.2% -> 65.5% roof_construction 72.4% -> 79.3% floor_construction 46.2% -> 84.6% wall_insulation_type 93.1% (tie — already template-strong) Co-Authored-By: Claude Opus 4.8 --- domain/epc_prediction/epc_prediction.py | 51 ++++++++++++---- .../epc_prediction/test_epc_prediction.py | 60 ++++++++++++++++++- 2 files changed, 99 insertions(+), 12 deletions(-) diff --git a/domain/epc_prediction/epc_prediction.py b/domain/epc_prediction/epc_prediction.py index 68624632..ed05ae2f 100644 --- a/domain/epc_prediction/epc_prediction.py +++ b/domain/epc_prediction/epc_prediction.py @@ -60,16 +60,27 @@ class EpcPrediction: def _apply_categorical_modes( predicted: EpcPropertyData, comparables: ComparableProperties ) -> None: - """Override the predicted picture's homogeneous categoricals with the - cohort mode (robust to an atypical template).""" + """Override the predicted picture's homogeneous categoricals — wall / + roof / floor construction, wall insulation, age band — with the cohort + mode (robust to an atypical template, per ADR-0029 decision 4). The + template still supplies the geometry; only the categorical codes move to + the mode.""" if not predicted.sap_building_parts: return - main = predicted.sap_building_parts[0] - wall_mode = _mode( - _main_wall_construction(c) for c in comparables.members - ) - if wall_mode is not None: - main.wall_construction = wall_mode + main: SapBuildingPart = predicted.sap_building_parts[0] + members = comparables.members + for attr in _MAIN_PART_CATEGORICALS: + mode = _mode(_main_part_attr(c, attr) for c in members) + if mode is not None: + setattr(main, attr, mode) + floor_values: list[int] = [ + v for c in members if (v := _main_floor_construction(c)) is not None + ] + floor_dims = main.sap_floor_dimensions + if floor_values and floor_dims: + floor_dims[0].floor_construction = Counter(floor_values).most_common( + 1 + )[0][0] @staticmethod def _apply_overrides( @@ -85,9 +96,29 @@ class EpcPrediction: ) -def _main_wall_construction(comparable: Comparable) -> Optional[Union[int, str]]: +# The homogeneous categoricals carried directly on the main building part. Floor +# construction lives on the main floor dimension and is handled separately. +_MAIN_PART_CATEGORICALS: tuple[str, ...] = ( + "wall_construction", + "wall_insulation_type", + "construction_age_band", + "roof_construction", +) + + +def _main_part_attr( + comparable: Comparable, attr: str +) -> Optional[Union[int, str]]: parts: list[SapBuildingPart] = comparable.epc.sap_building_parts - return parts[0].wall_construction if parts else None + return getattr(parts[0], attr) if parts else None + + +def _main_floor_construction(comparable: Comparable) -> Optional[int]: + parts: list[SapBuildingPart] = comparable.epc.sap_building_parts + if not parts: + return None + dims = parts[0].sap_floor_dimensions + return dims[0].floor_construction if dims else None def _mode( diff --git a/tests/domain/epc_prediction/test_epc_prediction.py b/tests/domain/epc_prediction/test_epc_prediction.py index 43da0737..da273da1 100644 --- a/tests/domain/epc_prediction/test_epc_prediction.py +++ b/tests/domain/epc_prediction/test_epc_prediction.py @@ -5,9 +5,13 @@ homogeneous categoricals to the recency-weighted cohort mode, apply Landlord Overrides on top. Pure domain logic. """ -from typing import Union +from typing import Optional, Union -from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + SapBuildingPart, + SapFloorDimension, +) from domain.epc_prediction.comparable_properties import ( Comparable, ComparableProperties, @@ -21,6 +25,10 @@ def _epc( building_parts: int = 1, floor_area: float = 80.0, wall_construction: Union[int, str] = 1, + wall_insulation_type: Union[int, str] = 1, + construction_age_band: str = "K", + roof_construction: Optional[int] = 1, + floor_construction: Optional[int] = 1, ) -> EpcPropertyData: epc: EpcPropertyData = object.__new__(EpcPropertyData) epc.property_type = "2" @@ -30,6 +38,12 @@ def _epc( for _ in range(building_parts): part: SapBuildingPart = object.__new__(SapBuildingPart) part.wall_construction = wall_construction + part.wall_insulation_type = wall_insulation_type + part.construction_age_band = construction_age_band + part.roof_construction = roof_construction + floor_dim: SapFloorDimension = object.__new__(SapFloorDimension) + floor_dim.floor_construction = floor_construction + part.sap_floor_dimensions = [floor_dim] parts.append(part) epc.sap_building_parts = parts return epc @@ -100,6 +114,48 @@ def test_sets_main_wall_construction_to_the_cohort_mode() -> None: assert predicted.sap_building_parts[0].wall_construction == 1 +def test_sets_the_other_homogeneous_categoricals_to_the_cohort_mode() -> None: + # Arrange — the median-size template (members[0], 80 m²) is an atypical + # outlier on every categorical; the cohort majority disagrees. Age band, + # wall insulation, roof construction and floor construction are all + # homogeneous categoricals, so each should follow its mode, not the one + # template (ADR-0029 decision 4). + cohort = _cohort( + _epc( + floor_area=80.0, + construction_age_band="A", + wall_insulation_type=9, + roof_construction=7, + floor_construction=7, + ), + _epc( + construction_age_band="K", + wall_insulation_type=1, + roof_construction=2, + floor_construction=3, + ), + _epc( + construction_age_band="K", + wall_insulation_type=1, + roof_construction=2, + floor_construction=3, + ), + ) + + # Act + predicted: EpcPropertyData = EpcPrediction().predict( + PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort + ) + + # Assert — every categorical follows the cohort mode over the outlier + # template. + main = predicted.sap_building_parts[0] + assert main.construction_age_band == "K" + assert main.wall_insulation_type == 1 + assert main.roof_construction == 2 + assert main.sap_floor_dimensions[0].floor_construction == 3 + + def test_applies_a_known_wall_override_over_the_mode() -> None: # Arrange — the cohort mode is cavity (1), but we KNOW the target is solid # brick (2), a Landlord Override. The known value must win over the estimate.