From 9dd23477acc2e3126be1abf64125f3cb8c14d8a6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 14 Jun 2026 09:37:45 +0000 Subject: [PATCH] feat(epc-prediction): cohort-mode roof + floor insulation (ADR-0030) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These independent fabric categoricals were template-copied; mode them like the construction categoricals. Verified mode beats template before applying. Big fixture win on roof insulation thickness (doubled), floor insulation neutral-to-positive: roof_insulation_thickness 14.7% -> 29.4% (gate floor ratcheted up) floor_insulation 90.6% (unchanged on the fixture) Glazing type was tried too (+1.6pp on the 40-postcode corpus) but REGRESSED the 36-target fixture (0.50 -> 0.44) — the gate caught it. Glazing moding is marginal/noisy, so it's left on the template; revisit with a larger corpus. Co-Authored-By: Claude Opus 4.8 --- domain/epc_prediction/epc_prediction.py | 45 +++++++++++++------ .../test_component_accuracy_gate.py | 2 +- .../epc_prediction/test_epc_prediction.py | 33 ++++++++++++++ 3 files changed, 65 insertions(+), 15 deletions(-) diff --git a/domain/epc_prediction/epc_prediction.py b/domain/epc_prediction/epc_prediction.py index ed05ae2f..d522aa56 100644 --- a/domain/epc_prediction/epc_prediction.py +++ b/domain/epc_prediction/epc_prediction.py @@ -61,10 +61,11 @@ class EpcPrediction: predicted: EpcPropertyData, comparables: ComparableProperties ) -> None: """Override the predicted picture's homogeneous categoricals — wall / - roof / floor construction, wall insulation, age band — with the cohort - mode (robust to an atypical template, per ADR-0029 decision 4). The - template still supplies the geometry; only the categorical codes move to - the mode.""" + roof / floor construction + insulation, age band — with the cohort mode + (robust to an atypical template, per ADR-0029 decision 4). The template + still supplies the geometry; only the categorical codes move to the mode. + (Glazing type is deliberately left on the template — moding it is + marginal and noisy; revisit with a larger corpus.)""" if not predicted.sap_building_parts: return main: SapBuildingPart = predicted.sap_building_parts[0] @@ -73,14 +74,12 @@ class EpcPrediction: mode = _mode(_main_part_attr(c, attr) for c in members) if mode is not None: setattr(main, attr, mode) - floor_values: list[int] = [ - v for c in members if (v := _main_floor_construction(c)) is not None - ] floor_dims = main.sap_floor_dimensions - if floor_values and floor_dims: - floor_dims[0].floor_construction = Counter(floor_values).most_common( - 1 - )[0][0] + if floor_dims: + for attr in _FLOOR_DIM_CATEGORICALS: + floor_mode = _int_mode(_main_floor_attr(c, attr) for c in members) + if floor_mode is not None: + setattr(floor_dims[0], attr, floor_mode) @staticmethod def _apply_overrides( @@ -97,12 +96,20 @@ class EpcPrediction: # The homogeneous categoricals carried directly on the main building part. Floor -# construction lives on the main floor dimension and is handled separately. +# categoricals live on the main floor dimension and glazing on the windows; both +# are handled separately. _MAIN_PART_CATEGORICALS: tuple[str, ...] = ( "wall_construction", "wall_insulation_type", "construction_age_band", "roof_construction", + "roof_insulation_thickness", +) + +# Integer-coded categoricals on the main building part's ground-floor dimension. +_FLOOR_DIM_CATEGORICALS: tuple[str, ...] = ( + "floor_construction", + "floor_insulation", ) @@ -113,12 +120,13 @@ def _main_part_attr( return getattr(parts[0], attr) if parts else None -def _main_floor_construction(comparable: Comparable) -> Optional[int]: +def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]: parts: list[SapBuildingPart] = comparable.epc.sap_building_parts if not parts: return None dims = parts[0].sap_floor_dimensions - return dims[0].floor_construction if dims else None + value: Optional[int] = getattr(dims[0], attr) if dims else None + return value def _mode( @@ -129,3 +137,12 @@ def _mode( if not present: return None return Counter(present).most_common(1)[0][0] + + +def _int_mode(values: Iterable[Optional[int]]) -> Optional[int]: + """`_mode` narrowed to int-coded fields (keeps pyright strict happy when the + target attribute is typed `Optional[int]`).""" + present = [v for v in values if v is not None] + if not present: + return None + return Counter(present).most_common(1)[0][0] diff --git a/tests/domain/epc_prediction/test_component_accuracy_gate.py b/tests/domain/epc_prediction/test_component_accuracy_gate.py index 20897e97..63841aaf 100644 --- a/tests/domain/epc_prediction/test_component_accuracy_gate.py +++ b/tests/domain/epc_prediction/test_component_accuracy_gate.py @@ -43,7 +43,7 @@ _RATE_FLOORS: dict[str, float] = { "has_hot_water_cylinder": 0.8889, "cylinder_insulation_type": 0.1667, "secondary_heating_type": 0.0000, - "roof_insulation_thickness": 0.1471, + "roof_insulation_thickness": 0.2941, "floor_insulation": 0.9062, "has_room_in_roof": 0.8333, "modal_glazing_type": 0.5000, diff --git a/tests/domain/epc_prediction/test_epc_prediction.py b/tests/domain/epc_prediction/test_epc_prediction.py index da273da1..df1bb8f4 100644 --- a/tests/domain/epc_prediction/test_epc_prediction.py +++ b/tests/domain/epc_prediction/test_epc_prediction.py @@ -11,6 +11,7 @@ from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, SapBuildingPart, SapFloorDimension, + SapWindow, ) from domain.epc_prediction.comparable_properties import ( Comparable, @@ -28,7 +29,10 @@ def _epc( wall_insulation_type: Union[int, str] = 1, construction_age_band: str = "K", roof_construction: Optional[int] = 1, + roof_insulation_thickness: Optional[Union[str, int]] = 100, floor_construction: Optional[int] = 1, + floor_insulation: Optional[int] = 1, + glazing_type: Union[int, str] = 3, ) -> EpcPropertyData: epc: EpcPropertyData = object.__new__(EpcPropertyData) epc.property_type = "2" @@ -41,11 +45,18 @@ def _epc( part.wall_insulation_type = wall_insulation_type part.construction_age_band = construction_age_band part.roof_construction = roof_construction + part.roof_insulation_thickness = roof_insulation_thickness floor_dim: SapFloorDimension = object.__new__(SapFloorDimension) floor_dim.floor_construction = floor_construction + floor_dim.floor_insulation = floor_insulation part.sap_floor_dimensions = [floor_dim] parts.append(part) epc.sap_building_parts = parts + window: SapWindow = object.__new__(SapWindow) + window.window_width = 1.0 + window.window_height = 1.0 + window.glazing_type = glazing_type + epc.sap_windows = [window] return epc @@ -156,6 +167,28 @@ def test_sets_the_other_homogeneous_categoricals_to_the_cohort_mode() -> None: assert main.sap_floor_dimensions[0].floor_construction == 3 +def test_modes_roof_and_floor_insulation() -> None: + # Arrange — the median-size template (members[0]) is an outlier on roof + # insulation thickness and floor insulation; the cohort majority disagrees. + # These are independent fabric categoricals, so each should follow its + # cohort mode like the construction categoricals do. + cohort = _cohort( + _epc(floor_area=80.0, roof_insulation_thickness=25, floor_insulation=9), + _epc(roof_insulation_thickness=300, floor_insulation=2), + _epc(roof_insulation_thickness=300, floor_insulation=2), + ) + + # Act + predicted: EpcPropertyData = EpcPrediction().predict( + PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort + ) + + # Assert — each follows the cohort mode over the outlier template. + main = predicted.sap_building_parts[0] + assert main.roof_insulation_thickness == 300 + assert main.sap_floor_dimensions[0].floor_insulation == 2 + + def test_applies_a_known_wall_override_over_the_mode() -> None: # Arrange — the cohort mode is cavity (1), but we KNOW the target is solid # brick (2), a Landlord Override. The known value must win over the estimate.