feat(epc-prediction): recency-weighted glazing mode (#1223)

Per-component method: glazing type is now the recency-weighted cohort mode
applied to every predicted window, rather than copied from the template.
Glazing is retrofitted over a dwelling's life (single -> double), so a
recent neighbour reflects the current state — same family as roof-insulation
thickness. Recency is the CORRECT weighting here: plain moding regressed the
fixture (-5.6pp) and was previously reverted; similarity weighting also
regressed it; recency improves BOTH (window geometry stays on the template,
only the glazing categorical moves).

modal_glazing_type: corpus (150pc/514) 60.7 -> 66.7% (+6.0pp); fixture
0.5000 -> 0.5278 (floor ratcheted up). Heating, geometry residuals and all
other components unchanged. Refactored _recency_weighted_mode to a reusable
_recency_weighted_choice(value_of) shared by roof insulation + glazing.

Closes the #1223 per-component approach: floor-area (median estimate) +
glazing (recency) shipped as distinct best-fit methods rather than a global
recency template, which would have disturbed the coherence-coupled heating
cluster.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-15 13:35:03 +00:00
parent 51cdc25ce8
commit d762b25808
3 changed files with 74 additions and 10 deletions

View file

@ -15,7 +15,7 @@ import statistics
from collections import Counter, defaultdict
from dataclasses import dataclass
from datetime import date
from typing import Iterable, Optional, Union
from typing import Callable, Iterable, Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
@ -64,9 +64,28 @@ class EpcPrediction:
predicted: EpcPropertyData = copy.deepcopy(template.epc)
predicted.total_floor_area_m2 = _median_floor_area(comparables.members)
self._apply_categorical_modes(predicted, comparables)
self._apply_glazing_mode(predicted, comparables)
self._apply_overrides(predicted, target)
return predicted
@staticmethod
def _apply_glazing_mode(
predicted: EpcPropertyData, comparables: ComparableProperties
) -> None:
"""Set every window's glazing type to the recency-weighted cohort mode.
Glazing is retrofitted over a dwelling's life (single → double), so a
recent neighbour reflects the current state its correct method is the
recency-weighted mode (like roof insulation), NOT the plain mode (which
regressed) or the template copy. The window geometry (size, count) is
left on the template; only the glazing categorical moves."""
glazing = _recency_weighted_choice(
comparables.members, _comparable_modal_glazing
)
if glazing is None:
return
for window in predicted.sap_windows:
window.glazing_type = glazing
def confidence(
self, comparables: ComparableProperties
) -> PredictionConfidence:
@ -305,20 +324,23 @@ def _modal_share(
return modal_count / len(present)
def _recency_weighted_mode(
members: tuple[Comparable, ...], attr: str
def _recency_weighted_choice(
members: tuple[Comparable, ...],
value_of: Callable[[Comparable], Optional[Union[int, str]]],
) -> Optional[Union[int, str]]:
"""The cohort mode of a main-part attribute, weighting each comparable's vote
by recency an exponential decay in the cert's age relative to the newest in
the cohort. Newer neighbours dominate, so a stale majority can't outvote the
current state. Falls back to a plain mode when no registration dates are
lodged (all ages 0 equal weight)."""
"""The recency-weighted cohort mode of a per-comparable value: each
neighbour's vote decays exponentially with the cert's age relative to the
newest in the cohort, so newer neighbours dominate and a stale majority can't
outvote the current state. Falls back to a plain mode when no registration
dates are lodged (all ages 0 equal weight). Returns None when no comparable
supplies a value. Used for the time-varying components those upgraded over a
dwelling's life (loft top-ups, glazing retrofits)."""
newest: date = max(
(c.registration_date or date.min for c in members), default=date.min
)
weights: dict[Union[int, str], float] = defaultdict(float)
for comparable in members:
value = _main_part_attr(comparable, attr)
value = value_of(comparable)
if value is None:
continue
lodged: date = comparable.registration_date or date.min
@ -327,3 +349,22 @@ def _recency_weighted_mode(
if not weights:
return None
return max(weights, key=lambda value: weights[value])
def _recency_weighted_mode(
members: tuple[Comparable, ...], attr: str
) -> Optional[Union[int, str]]:
"""`_recency_weighted_choice` over a main building-part attribute."""
return _recency_weighted_choice(
members, lambda comparable: _main_part_attr(comparable, attr)
)
def _comparable_modal_glazing(
comparable: Comparable,
) -> Optional[Union[int, str]]:
"""A comparable's modal glazing type — the most common across its windows, or
None when it lodges none. One glazing signal per neighbour, robust to a single
odd window, matching how the harness scores `modal_glazing_type`."""
types = [window.glazing_type for window in comparable.epc.sap_windows]
return Counter(types).most_common(1)[0][0] if types else None

View file

@ -47,7 +47,7 @@ _RATE_FLOORS: dict[str, float] = {
"roof_insulation_thickness": 0.4118,
"floor_insulation": 0.9375,
"has_room_in_roof": 0.8333,
"modal_glazing_type": 0.5000,
"modal_glazing_type": 0.5278,
"has_pv": 1.0000,
"solar_water_heating": 1.0000,
}

View file

@ -348,6 +348,29 @@ def test_confidence_excludes_absent_component_values_from_the_denominator() -> N
assert confidence.cohort_size == 3
def test_glazing_follows_the_recency_weighted_cohort_mode() -> None:
# Arrange — an old majority single-glazed (type 1, 2015) and a recent
# minority double-glazed (type 3, 2025). Glazing is retrofitted over time
# (single → double), so the recent neighbours reflect the current state: the
# recency-weighted mode must pick double over the stale single-glazed
# majority, like roof insulation thickness.
cohort = _dated_cohort(
(_epc(glazing_type=1), date(2015, 1, 1)),
(_epc(glazing_type=1), date(2015, 1, 1)),
(_epc(glazing_type=1), date(2015, 1, 1)),
(_epc(glazing_type=3), date(2025, 1, 1)),
(_epc(glazing_type=3), date(2025, 1, 1)),
)
# Act
predicted: EpcPropertyData = EpcPrediction().predict(
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
)
# Assert — every predicted window takes the recent glazing over the majority.
assert all(window.glazing_type == 3 for window in predicted.sap_windows)
def test_applies_a_known_wall_override_over_the_mode() -> None:
# Arrange — the cohort mode is cavity (1), but we KNOW the target is solid
# brick (2), a Landlord Override. The known value must win over the estimate.