feat(epc-prediction): coherent heating donor selection (#1225)

Heating sub-fields can't be field-moded without breaking system coherence,
so the whole SapHeating cluster is now copied as a unit from a single
coherent donor rather than inherited from the structural template: the
neighbour matching the cohort's modal heating signature (main fuel +
category + cylinder presence), most recent among the matches (recent cert =
current system). Including cylinder presence in the signature is load-bearing
— it protects has_hot_water_cylinder + cylinder_insulation (a bare fuel+cat
signature regressed them).

Corpus (150pc/514): heating_main_control 66.3 -> 73.9% (+7.6, the target),
main_fuel 92.8 -> 96.9, category 90.7 -> 95.7, water_fuel 92.8 -> 96.3,
water_code 88.5 -> 95.3, has_cylinder 81.1 -> 89.7, secondary 36.2 -> 42.0.
SAP MAE vs lodged 7.08 -> 6.00 (calculator floor 1.57). cylinder_insulation
-13.6 corpus (tiny-n) but +33pp on the fixture; AC requires control up +
fuel/category hold + SAP not worsened, all met.

Gate (36-target fixture): zero regression; ratcheted main_category
0.8889->0.9444, main_control 0.7500->0.8056, water_fuel 0.9167->0.9722,
water_code 0.8889->0.9444, cylinder_insulation_type 0.1667->0.5000. This is
the per-component heating method ([[feedback_per_component_best_method]]):
coherent donor, never field-mode.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-15 13:48:15 +00:00
parent d762b25808
commit 06a66b3dd9
3 changed files with 124 additions and 5 deletions

View file

@ -19,6 +19,7 @@ from typing import Callable, Iterable, Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
MainHeatingDetail,
SapBuildingPart,
)
from domain.epc_prediction.comparable_properties import (
@ -65,9 +66,31 @@ class EpcPrediction:
predicted.total_floor_area_m2 = _median_floor_area(comparables.members)
self._apply_categorical_modes(predicted, comparables)
self._apply_glazing_mode(predicted, comparables)
self._apply_heating_donor(predicted, comparables)
self._apply_overrides(predicted, target)
return predicted
@staticmethod
def _apply_heating_donor(
predicted: EpcPropertyData, comparables: ComparableProperties
) -> None:
"""Replace the structural template's heating with a coherent donor's whole
`SapHeating` cluster (ADR-0029; issue #1225). Heating sub-fields can't be
field-moded without breaking system coherence (e.g. a fuel that doesn't
match the emitter), so the cluster is copied as a unit from a single
neighbour: the one matching the cohort's modal heating *signature* (main
fuel + category + cylinder presence), the most recent among those matches
(a recent cert reflects the current system). This makes the predicted
heating both representative and internally coherent, rather than whatever
the size-representative template happened to carry. No donor (no neighbour
lodges a main heating system) leaves the template's heating in place."""
donor = _heating_donor(comparables.members)
if donor is None:
return
predicted.sap_heating = copy.deepcopy(donor.epc.sap_heating)
predicted.has_hot_water_cylinder = donor.epc.has_hot_water_cylinder
predicted.solar_water_heating = donor.epc.solar_water_heating
@staticmethod
def _apply_glazing_mode(
predicted: EpcPropertyData, comparables: ComparableProperties
@ -368,3 +391,41 @@ def _comparable_modal_glazing(
odd window, matching how the harness scores `modal_glazing_type`."""
types = [window.glazing_type for window in comparable.epc.sap_windows]
return Counter(types).most_common(1)[0][0] if types else None
def _main_heating_detail(comparable: Comparable) -> Optional[MainHeatingDetail]:
"""The primary heating system's detail row, or None when none is lodged."""
details = comparable.epc.sap_heating.main_heating_details
return details[0] if details else None
def _heating_signature(
comparable: Comparable,
) -> Optional[tuple[Union[int, str], Optional[int], bool]]:
"""The donor-matching signature — main fuel + heating category + cylinder
presence: the coarse identity of the heating system. None when no main heating
system is lodged, so the comparable is not a donor candidate."""
detail = _main_heating_detail(comparable)
if detail is None:
return None
return (
detail.main_fuel_type,
detail.main_heating_category,
comparable.epc.has_hot_water_cylinder,
)
def _heating_donor(members: tuple[Comparable, ...]) -> Optional[Comparable]:
"""The coherent heating donor: the comparable whose heating signature is the
cohort mode, breaking ties toward the most recent cert (then certificate
number, for determinism). None when no neighbour lodges a heating system."""
signed = [(c, _heating_signature(c)) for c in members]
signatures = [sig for _, sig in signed if sig is not None]
if not signatures:
return None
modal = Counter(signatures).most_common(1)[0][0]
matches = [c for c, sig in signed if sig == modal]
return max(
matches,
key=lambda c: (c.registration_date or date.min, c.certificate_number),
)

View file

@ -37,12 +37,12 @@ _RATE_FLOORS: dict[str, float] = {
"roof_construction": 0.7222,
"floor_construction": 0.8125,
"heating_main_fuel": 0.9722,
"heating_main_category": 0.8889,
"heating_main_control": 0.7500,
"water_heating_fuel": 0.9167,
"water_heating_code": 0.8889,
"heating_main_category": 0.9444,
"heating_main_control": 0.8056,
"water_heating_fuel": 0.9722,
"water_heating_code": 0.9444,
"has_hot_water_cylinder": 0.8889,
"cylinder_insulation_type": 0.1667,
"cylinder_insulation_type": 0.5000,
"secondary_heating_type": 0.0000,
"roof_insulation_thickness": 0.4118,
"floor_insulation": 0.9375,

View file

@ -10,8 +10,10 @@ from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
MainHeatingDetail,
SapBuildingPart,
SapFloorDimension,
SapHeating,
SapWindow,
)
from domain.epc_prediction.comparable_properties import (
@ -37,6 +39,13 @@ def _epc(
floor_construction: Optional[int] = 1,
floor_insulation: Optional[int] = 1,
glazing_type: Union[int, str] = 3,
main_fuel_type: Union[int, str] = 1,
main_heating_category: Optional[int] = 1,
main_heating_control: Union[int, str] = 1,
water_heating_fuel: Optional[int] = 1,
water_heating_code: Optional[int] = 1,
has_hot_water_cylinder: bool = True,
solar_water_heating: bool = False,
) -> EpcPropertyData:
epc: EpcPropertyData = object.__new__(EpcPropertyData)
epc.property_type = "2"
@ -61,6 +70,19 @@ def _epc(
window.window_height = 1.0
window.glazing_type = glazing_type
epc.sap_windows = [window]
heating: SapHeating = object.__new__(SapHeating)
detail: MainHeatingDetail = object.__new__(MainHeatingDetail)
detail.main_fuel_type = main_fuel_type
detail.main_heating_category = main_heating_category
detail.main_heating_control = main_heating_control
heating.main_heating_details = [detail]
heating.water_heating_fuel = water_heating_fuel
heating.water_heating_code = water_heating_code
heating.cylinder_insulation_type = 1
heating.secondary_heating_type = None
epc.sap_heating = heating
epc.has_hot_water_cylinder = has_hot_water_cylinder
epc.solar_water_heating = solar_water_heating
return epc
@ -348,6 +370,42 @@ def test_confidence_excludes_absent_component_values_from_the_denominator() -> N
assert confidence.cohort_size == 3
def test_heating_is_a_coherent_donor_not_the_structural_template() -> None:
# Arrange — the size-representative template (median 80 m²) runs an atypical
# system (fuel 99, no cylinder), but the cohort's modal heating signature is a
# gas system (fuel 1) with a cylinder, including a recent 2024 cert. Heating
# sub-fields can't be field-moded, so the whole SapHeating cluster must be
# copied from the coherent modal donor — the most recent among the matches —
# not inherited from the structural template.
cohort = _dated_cohort(
(
_epc(
floor_area=80.0,
main_fuel_type=99,
main_heating_control=99,
has_hot_water_cylinder=False,
),
date(2016, 1, 1),
),
(_epc(main_fuel_type=1, main_heating_control=5), date(2018, 1, 1)),
(_epc(main_fuel_type=1, main_heating_control=5), date(2019, 1, 1)),
(_epc(main_fuel_type=1, main_heating_control=7), date(2024, 1, 1)),
)
# Act
predicted: EpcPropertyData = EpcPrediction().predict(
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
)
# Assert — heating comes coherently from the modal-signature donor (gas +
# cylinder), the most recent match (control 7 from 2024), not the template's
# fuel 99.
detail = predicted.sap_heating.main_heating_details[0]
assert detail.main_fuel_type == 1
assert detail.main_heating_control == 7
assert predicted.has_hot_water_cylinder is True
def test_glazing_follows_the_recency_weighted_cohort_mode() -> None:
# Arrange — an old majority single-glazed (type 1, 2015) and a recent
# minority double-glazed (type 3, 2025). Glazing is retrofitted over time