feat(epc-prediction): cohort-mode the roof/floor/insulation/age categoricals (ADR-0029)

Only main wall_construction was set to the cohort mode; the other
homogeneous categoricals (wall insulation, construction age band, roof
construction, floor construction) were left as template-copied, so one
median-size template's quirks set them. Apply the same cohort-mode
mechanism to all of them per ADR-0029 decision 4 — the template still
supplies geometry, only the categorical codes move to the mode.

Verified mode beats (or ties) template-copy per categorical before
applying. Smoke corpus (29 leave-one-out) classification rates:
  construction_age_band  55.2% -> 65.5%
  roof_construction      72.4% -> 79.3%
  floor_construction     46.2% -> 84.6%
  wall_insulation_type   93.1% (tie — already template-strong)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-14 00:31:16 +00:00
parent ed96df9315
commit 54a57363f8
2 changed files with 99 additions and 12 deletions

View file

@ -60,16 +60,27 @@ class EpcPrediction:
def _apply_categorical_modes(
predicted: EpcPropertyData, comparables: ComparableProperties
) -> None:
"""Override the predicted picture's homogeneous categoricals with the
cohort mode (robust to an atypical template)."""
"""Override the predicted picture's homogeneous categoricals — wall /
roof / floor construction, wall insulation, age band with the cohort
mode (robust to an atypical template, per ADR-0029 decision 4). The
template still supplies the geometry; only the categorical codes move to
the mode."""
if not predicted.sap_building_parts:
return
main = predicted.sap_building_parts[0]
wall_mode = _mode(
_main_wall_construction(c) for c in comparables.members
)
if wall_mode is not None:
main.wall_construction = wall_mode
main: SapBuildingPart = predicted.sap_building_parts[0]
members = comparables.members
for attr in _MAIN_PART_CATEGORICALS:
mode = _mode(_main_part_attr(c, attr) for c in members)
if mode is not None:
setattr(main, attr, mode)
floor_values: list[int] = [
v for c in members if (v := _main_floor_construction(c)) is not None
]
floor_dims = main.sap_floor_dimensions
if floor_values and floor_dims:
floor_dims[0].floor_construction = Counter(floor_values).most_common(
1
)[0][0]
@staticmethod
def _apply_overrides(
@ -85,9 +96,29 @@ class EpcPrediction:
)
def _main_wall_construction(comparable: Comparable) -> Optional[Union[int, str]]:
# The homogeneous categoricals carried directly on the main building part. Floor
# construction lives on the main floor dimension and is handled separately.
_MAIN_PART_CATEGORICALS: tuple[str, ...] = (
"wall_construction",
"wall_insulation_type",
"construction_age_band",
"roof_construction",
)
def _main_part_attr(
comparable: Comparable, attr: str
) -> Optional[Union[int, str]]:
parts: list[SapBuildingPart] = comparable.epc.sap_building_parts
return parts[0].wall_construction if parts else None
return getattr(parts[0], attr) if parts else None
def _main_floor_construction(comparable: Comparable) -> Optional[int]:
parts: list[SapBuildingPart] = comparable.epc.sap_building_parts
if not parts:
return None
dims = parts[0].sap_floor_dimensions
return dims[0].floor_construction if dims else None
def _mode(

View file

@ -5,9 +5,13 @@ homogeneous categoricals to the recency-weighted cohort mode, apply Landlord
Overrides on top. Pure domain logic.
"""
from typing import Union
from typing import Optional, Union
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
SapBuildingPart,
SapFloorDimension,
)
from domain.epc_prediction.comparable_properties import (
Comparable,
ComparableProperties,
@ -21,6 +25,10 @@ def _epc(
building_parts: int = 1,
floor_area: float = 80.0,
wall_construction: Union[int, str] = 1,
wall_insulation_type: Union[int, str] = 1,
construction_age_band: str = "K",
roof_construction: Optional[int] = 1,
floor_construction: Optional[int] = 1,
) -> EpcPropertyData:
epc: EpcPropertyData = object.__new__(EpcPropertyData)
epc.property_type = "2"
@ -30,6 +38,12 @@ def _epc(
for _ in range(building_parts):
part: SapBuildingPart = object.__new__(SapBuildingPart)
part.wall_construction = wall_construction
part.wall_insulation_type = wall_insulation_type
part.construction_age_band = construction_age_band
part.roof_construction = roof_construction
floor_dim: SapFloorDimension = object.__new__(SapFloorDimension)
floor_dim.floor_construction = floor_construction
part.sap_floor_dimensions = [floor_dim]
parts.append(part)
epc.sap_building_parts = parts
return epc
@ -100,6 +114,48 @@ def test_sets_main_wall_construction_to_the_cohort_mode() -> None:
assert predicted.sap_building_parts[0].wall_construction == 1
def test_sets_the_other_homogeneous_categoricals_to_the_cohort_mode() -> None:
# Arrange — the median-size template (members[0], 80 m²) is an atypical
# outlier on every categorical; the cohort majority disagrees. Age band,
# wall insulation, roof construction and floor construction are all
# homogeneous categoricals, so each should follow its mode, not the one
# template (ADR-0029 decision 4).
cohort = _cohort(
_epc(
floor_area=80.0,
construction_age_band="A",
wall_insulation_type=9,
roof_construction=7,
floor_construction=7,
),
_epc(
construction_age_band="K",
wall_insulation_type=1,
roof_construction=2,
floor_construction=3,
),
_epc(
construction_age_band="K",
wall_insulation_type=1,
roof_construction=2,
floor_construction=3,
),
)
# Act
predicted: EpcPropertyData = EpcPrediction().predict(
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
)
# Assert — every categorical follows the cohort mode over the outlier
# template.
main = predicted.sap_building_parts[0]
assert main.construction_age_band == "K"
assert main.wall_insulation_type == 1
assert main.roof_construction == 2
assert main.sap_floor_dimensions[0].floor_construction == 3
def test_applies_a_known_wall_override_over_the_mode() -> None:
# Arrange — the cohort mode is cavity (1), but we KNOW the target is solid
# brick (2), a Landlord Override. The known value must win over the estimate.