feat(epc-prediction): cohort-mode roof + floor insulation (ADR-0030)

These independent fabric categoricals were template-copied; mode them like
the construction categoricals. Verified mode beats template before applying.
Big fixture win on roof insulation thickness (doubled), floor insulation
neutral-to-positive:
  roof_insulation_thickness  14.7% -> 29.4%  (gate floor ratcheted up)
  floor_insulation           90.6% (unchanged on the fixture)

Glazing type was tried too (+1.6pp on the 40-postcode corpus) but REGRESSED
the 36-target fixture (0.50 -> 0.44) — the gate caught it. Glazing moding is
marginal/noisy, so it's left on the template; revisit with a larger corpus.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 09:37:45 +00:00
parent a622f97d27
commit 9dd23477ac
3 changed files with 65 additions and 15 deletions

View file

@ -61,10 +61,11 @@ class EpcPrediction:
predicted: EpcPropertyData, comparables: ComparableProperties
) -> None:
"""Override the predicted picture's homogeneous categoricals — wall /
roof / floor construction, wall insulation, age band with the cohort
mode (robust to an atypical template, per ADR-0029 decision 4). The
template still supplies the geometry; only the categorical codes move to
the mode."""
roof / floor construction + insulation, age band with the cohort mode
(robust to an atypical template, per ADR-0029 decision 4). The template
still supplies the geometry; only the categorical codes move to the mode.
(Glazing type is deliberately left on the template moding it is
marginal and noisy; revisit with a larger corpus.)"""
if not predicted.sap_building_parts:
return
main: SapBuildingPart = predicted.sap_building_parts[0]
@ -73,14 +74,12 @@ class EpcPrediction:
mode = _mode(_main_part_attr(c, attr) for c in members)
if mode is not None:
setattr(main, attr, mode)
floor_values: list[int] = [
v for c in members if (v := _main_floor_construction(c)) is not None
]
floor_dims = main.sap_floor_dimensions
if floor_values and floor_dims:
floor_dims[0].floor_construction = Counter(floor_values).most_common(
1
)[0][0]
if floor_dims:
for attr in _FLOOR_DIM_CATEGORICALS:
floor_mode = _int_mode(_main_floor_attr(c, attr) for c in members)
if floor_mode is not None:
setattr(floor_dims[0], attr, floor_mode)
@staticmethod
def _apply_overrides(
@ -97,12 +96,20 @@ class EpcPrediction:
# The homogeneous categoricals carried directly on the main building part. Floor
# construction lives on the main floor dimension and is handled separately.
# categoricals live on the main floor dimension and glazing on the windows; both
# are handled separately.
_MAIN_PART_CATEGORICALS: tuple[str, ...] = (
"wall_construction",
"wall_insulation_type",
"construction_age_band",
"roof_construction",
"roof_insulation_thickness",
)
# Integer-coded categoricals on the main building part's ground-floor dimension.
_FLOOR_DIM_CATEGORICALS: tuple[str, ...] = (
"floor_construction",
"floor_insulation",
)
@ -113,12 +120,13 @@ def _main_part_attr(
return getattr(parts[0], attr) if parts else None
def _main_floor_construction(comparable: Comparable) -> Optional[int]:
def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]:
parts: list[SapBuildingPart] = comparable.epc.sap_building_parts
if not parts:
return None
dims = parts[0].sap_floor_dimensions
return dims[0].floor_construction if dims else None
value: Optional[int] = getattr(dims[0], attr) if dims else None
return value
def _mode(
@ -129,3 +137,12 @@ def _mode(
if not present:
return None
return Counter(present).most_common(1)[0][0]
def _int_mode(values: Iterable[Optional[int]]) -> Optional[int]:
"""`_mode` narrowed to int-coded fields (keeps pyright strict happy when the
target attribute is typed `Optional[int]`)."""
present = [v for v in values if v is not None]
if not present:
return None
return Counter(present).most_common(1)[0][0]

View file

@ -43,7 +43,7 @@ _RATE_FLOORS: dict[str, float] = {
"has_hot_water_cylinder": 0.8889,
"cylinder_insulation_type": 0.1667,
"secondary_heating_type": 0.0000,
"roof_insulation_thickness": 0.1471,
"roof_insulation_thickness": 0.2941,
"floor_insulation": 0.9062,
"has_room_in_roof": 0.8333,
"modal_glazing_type": 0.5000,

View file

@ -11,6 +11,7 @@ from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
SapBuildingPart,
SapFloorDimension,
SapWindow,
)
from domain.epc_prediction.comparable_properties import (
Comparable,
@ -28,7 +29,10 @@ def _epc(
wall_insulation_type: Union[int, str] = 1,
construction_age_band: str = "K",
roof_construction: Optional[int] = 1,
roof_insulation_thickness: Optional[Union[str, int]] = 100,
floor_construction: Optional[int] = 1,
floor_insulation: Optional[int] = 1,
glazing_type: Union[int, str] = 3,
) -> EpcPropertyData:
epc: EpcPropertyData = object.__new__(EpcPropertyData)
epc.property_type = "2"
@ -41,11 +45,18 @@ def _epc(
part.wall_insulation_type = wall_insulation_type
part.construction_age_band = construction_age_band
part.roof_construction = roof_construction
part.roof_insulation_thickness = roof_insulation_thickness
floor_dim: SapFloorDimension = object.__new__(SapFloorDimension)
floor_dim.floor_construction = floor_construction
floor_dim.floor_insulation = floor_insulation
part.sap_floor_dimensions = [floor_dim]
parts.append(part)
epc.sap_building_parts = parts
window: SapWindow = object.__new__(SapWindow)
window.window_width = 1.0
window.window_height = 1.0
window.glazing_type = glazing_type
epc.sap_windows = [window]
return epc
@ -156,6 +167,28 @@ def test_sets_the_other_homogeneous_categoricals_to_the_cohort_mode() -> None:
assert main.sap_floor_dimensions[0].floor_construction == 3
def test_modes_roof_and_floor_insulation() -> None:
# Arrange — the median-size template (members[0]) is an outlier on roof
# insulation thickness and floor insulation; the cohort majority disagrees.
# These are independent fabric categoricals, so each should follow its
# cohort mode like the construction categoricals do.
cohort = _cohort(
_epc(floor_area=80.0, roof_insulation_thickness=25, floor_insulation=9),
_epc(roof_insulation_thickness=300, floor_insulation=2),
_epc(roof_insulation_thickness=300, floor_insulation=2),
)
# Act
predicted: EpcPropertyData = EpcPrediction().predict(
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
)
# Assert — each follows the cohort mode over the outlier template.
main = predicted.sap_building_parts[0]
assert main.roof_insulation_thickness == 300
assert main.sap_floor_dimensions[0].floor_insulation == 2
def test_applies_a_known_wall_override_over_the_mode() -> None:
# Arrange — the cohort mode is cavity (1), but we KNOW the target is solid
# brick (2), a Landlord Override. The known value must win over the estimate.