From d762b25808812c96a4da4dd52f366d87a99262c4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 15 Jun 2026 13:35:03 +0000 Subject: [PATCH] feat(epc-prediction): recency-weighted glazing mode (#1223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-component method: glazing type is now the recency-weighted cohort mode applied to every predicted window, rather than copied from the template. Glazing is retrofitted over a dwelling's life (single -> double), so a recent neighbour reflects the current state — same family as roof-insulation thickness. Recency is the CORRECT weighting here: plain moding regressed the fixture (-5.6pp) and was previously reverted; similarity weighting also regressed it; recency improves BOTH (window geometry stays on the template, only the glazing categorical moves). modal_glazing_type: corpus (150pc/514) 60.7 -> 66.7% (+6.0pp); fixture 0.5000 -> 0.5278 (floor ratcheted up). Heating, geometry residuals and all other components unchanged. Refactored _recency_weighted_mode to a reusable _recency_weighted_choice(value_of) shared by roof insulation + glazing. Closes the #1223 per-component approach: floor-area (median estimate) + glazing (recency) shipped as distinct best-fit methods rather than a global recency template, which would have disturbed the coherence-coupled heating cluster. Co-Authored-By: Claude Opus 4.8 --- domain/epc_prediction/epc_prediction.py | 59 ++++++++++++++++--- .../test_component_accuracy_gate.py | 2 +- .../epc_prediction/test_epc_prediction.py | 23 ++++++++ 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/domain/epc_prediction/epc_prediction.py b/domain/epc_prediction/epc_prediction.py index f6bf2166..2d7a47e8 100644 --- a/domain/epc_prediction/epc_prediction.py +++ b/domain/epc_prediction/epc_prediction.py @@ -15,7 +15,7 @@ import statistics from collections import Counter, defaultdict from dataclasses import dataclass from datetime import date -from typing import Iterable, Optional, Union +from typing import Callable, Iterable, Optional, Union from datatypes.epc.domain.epc_property_data import ( EpcPropertyData, @@ -64,9 +64,28 @@ class EpcPrediction: predicted: EpcPropertyData = copy.deepcopy(template.epc) predicted.total_floor_area_m2 = _median_floor_area(comparables.members) self._apply_categorical_modes(predicted, comparables) + self._apply_glazing_mode(predicted, comparables) self._apply_overrides(predicted, target) return predicted + @staticmethod + def _apply_glazing_mode( + predicted: EpcPropertyData, comparables: ComparableProperties + ) -> None: + """Set every window's glazing type to the recency-weighted cohort mode. + Glazing is retrofitted over a dwelling's life (single → double), so a + recent neighbour reflects the current state — its correct method is the + recency-weighted mode (like roof insulation), NOT the plain mode (which + regressed) or the template copy. The window geometry (size, count) is + left on the template; only the glazing categorical moves.""" + glazing = _recency_weighted_choice( + comparables.members, _comparable_modal_glazing + ) + if glazing is None: + return + for window in predicted.sap_windows: + window.glazing_type = glazing + def confidence( self, comparables: ComparableProperties ) -> PredictionConfidence: @@ -305,20 +324,23 @@ def _modal_share( return modal_count / len(present) -def _recency_weighted_mode( - members: tuple[Comparable, ...], attr: str +def _recency_weighted_choice( + members: tuple[Comparable, ...], + value_of: Callable[[Comparable], Optional[Union[int, str]]], ) -> Optional[Union[int, str]]: - """The cohort mode of a main-part attribute, weighting each comparable's vote - by recency — an exponential decay in the cert's age relative to the newest in - the cohort. Newer neighbours dominate, so a stale majority can't outvote the - current state. Falls back to a plain mode when no registration dates are - lodged (all ages 0 ⇒ equal weight).""" + """The recency-weighted cohort mode of a per-comparable value: each + neighbour's vote decays exponentially with the cert's age relative to the + newest in the cohort, so newer neighbours dominate and a stale majority can't + outvote the current state. Falls back to a plain mode when no registration + dates are lodged (all ages 0 ⇒ equal weight). Returns None when no comparable + supplies a value. Used for the time-varying components — those upgraded over a + dwelling's life (loft top-ups, glazing retrofits).""" newest: date = max( (c.registration_date or date.min for c in members), default=date.min ) weights: dict[Union[int, str], float] = defaultdict(float) for comparable in members: - value = _main_part_attr(comparable, attr) + value = value_of(comparable) if value is None: continue lodged: date = comparable.registration_date or date.min @@ -327,3 +349,22 @@ def _recency_weighted_mode( if not weights: return None return max(weights, key=lambda value: weights[value]) + + +def _recency_weighted_mode( + members: tuple[Comparable, ...], attr: str +) -> Optional[Union[int, str]]: + """`_recency_weighted_choice` over a main building-part attribute.""" + return _recency_weighted_choice( + members, lambda comparable: _main_part_attr(comparable, attr) + ) + + +def _comparable_modal_glazing( + comparable: Comparable, +) -> Optional[Union[int, str]]: + """A comparable's modal glazing type — the most common across its windows, or + None when it lodges none. One glazing signal per neighbour, robust to a single + odd window, matching how the harness scores `modal_glazing_type`.""" + types = [window.glazing_type for window in comparable.epc.sap_windows] + return Counter(types).most_common(1)[0][0] if types else None diff --git a/tests/domain/epc_prediction/test_component_accuracy_gate.py b/tests/domain/epc_prediction/test_component_accuracy_gate.py index fccbe437..82443816 100644 --- a/tests/domain/epc_prediction/test_component_accuracy_gate.py +++ b/tests/domain/epc_prediction/test_component_accuracy_gate.py @@ -47,7 +47,7 @@ _RATE_FLOORS: dict[str, float] = { "roof_insulation_thickness": 0.4118, "floor_insulation": 0.9375, "has_room_in_roof": 0.8333, - "modal_glazing_type": 0.5000, + "modal_glazing_type": 0.5278, "has_pv": 1.0000, "solar_water_heating": 1.0000, } diff --git a/tests/domain/epc_prediction/test_epc_prediction.py b/tests/domain/epc_prediction/test_epc_prediction.py index 6c103d57..06a6ae5a 100644 --- a/tests/domain/epc_prediction/test_epc_prediction.py +++ b/tests/domain/epc_prediction/test_epc_prediction.py @@ -348,6 +348,29 @@ def test_confidence_excludes_absent_component_values_from_the_denominator() -> N assert confidence.cohort_size == 3 +def test_glazing_follows_the_recency_weighted_cohort_mode() -> None: + # Arrange — an old majority single-glazed (type 1, 2015) and a recent + # minority double-glazed (type 3, 2025). Glazing is retrofitted over time + # (single → double), so the recent neighbours reflect the current state: the + # recency-weighted mode must pick double over the stale single-glazed + # majority, like roof insulation thickness. + cohort = _dated_cohort( + (_epc(glazing_type=1), date(2015, 1, 1)), + (_epc(glazing_type=1), date(2015, 1, 1)), + (_epc(glazing_type=1), date(2015, 1, 1)), + (_epc(glazing_type=3), date(2025, 1, 1)), + (_epc(glazing_type=3), date(2025, 1, 1)), + ) + + # Act + predicted: EpcPropertyData = EpcPrediction().predict( + PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort + ) + + # Assert — every predicted window takes the recent glazing over the majority. + assert all(window.glazing_type == 3 for window in predicted.sap_windows) + + def test_applies_a_known_wall_override_over_the_mode() -> None: # Arrange — the cohort mode is cavity (1), but we KNOW the target is solid # brick (2), a Landlord Override. The known value must win over the estimate.