mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(epc-prediction): cohort-mode the roof/floor/insulation/age categoricals (ADR-0029)
Only main wall_construction was set to the cohort mode; the other homogeneous categoricals (wall insulation, construction age band, roof construction, floor construction) were left as template-copied, so one median-size template's quirks set them. Apply the same cohort-mode mechanism to all of them per ADR-0029 decision 4 — the template still supplies geometry, only the categorical codes move to the mode. Verified mode beats (or ties) template-copy per categorical before applying. Smoke corpus (29 leave-one-out) classification rates: construction_age_band 55.2% -> 65.5% roof_construction 72.4% -> 79.3% floor_construction 46.2% -> 84.6% wall_insulation_type 93.1% (tie — already template-strong) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
ed96df9315
commit
54a57363f8
2 changed files with 99 additions and 12 deletions
|
|
@ -60,16 +60,27 @@ class EpcPrediction:
|
|||
def _apply_categorical_modes(
|
||||
predicted: EpcPropertyData, comparables: ComparableProperties
|
||||
) -> None:
|
||||
"""Override the predicted picture's homogeneous categoricals with the
|
||||
cohort mode (robust to an atypical template)."""
|
||||
"""Override the predicted picture's homogeneous categoricals — wall /
|
||||
roof / floor construction, wall insulation, age band — with the cohort
|
||||
mode (robust to an atypical template, per ADR-0029 decision 4). The
|
||||
template still supplies the geometry; only the categorical codes move to
|
||||
the mode."""
|
||||
if not predicted.sap_building_parts:
|
||||
return
|
||||
main = predicted.sap_building_parts[0]
|
||||
wall_mode = _mode(
|
||||
_main_wall_construction(c) for c in comparables.members
|
||||
)
|
||||
if wall_mode is not None:
|
||||
main.wall_construction = wall_mode
|
||||
main: SapBuildingPart = predicted.sap_building_parts[0]
|
||||
members = comparables.members
|
||||
for attr in _MAIN_PART_CATEGORICALS:
|
||||
mode = _mode(_main_part_attr(c, attr) for c in members)
|
||||
if mode is not None:
|
||||
setattr(main, attr, mode)
|
||||
floor_values: list[int] = [
|
||||
v for c in members if (v := _main_floor_construction(c)) is not None
|
||||
]
|
||||
floor_dims = main.sap_floor_dimensions
|
||||
if floor_values and floor_dims:
|
||||
floor_dims[0].floor_construction = Counter(floor_values).most_common(
|
||||
1
|
||||
)[0][0]
|
||||
|
||||
@staticmethod
|
||||
def _apply_overrides(
|
||||
|
|
@ -85,9 +96,29 @@ class EpcPrediction:
|
|||
)
|
||||
|
||||
|
||||
def _main_wall_construction(comparable: Comparable) -> Optional[Union[int, str]]:
|
||||
# The homogeneous categoricals carried directly on the main building part. Floor
|
||||
# construction lives on the main floor dimension and is handled separately.
|
||||
_MAIN_PART_CATEGORICALS: tuple[str, ...] = (
|
||||
"wall_construction",
|
||||
"wall_insulation_type",
|
||||
"construction_age_band",
|
||||
"roof_construction",
|
||||
)
|
||||
|
||||
|
||||
def _main_part_attr(
|
||||
comparable: Comparable, attr: str
|
||||
) -> Optional[Union[int, str]]:
|
||||
parts: list[SapBuildingPart] = comparable.epc.sap_building_parts
|
||||
return parts[0].wall_construction if parts else None
|
||||
return getattr(parts[0], attr) if parts else None
|
||||
|
||||
|
||||
def _main_floor_construction(comparable: Comparable) -> Optional[int]:
|
||||
parts: list[SapBuildingPart] = comparable.epc.sap_building_parts
|
||||
if not parts:
|
||||
return None
|
||||
dims = parts[0].sap_floor_dimensions
|
||||
return dims[0].floor_construction if dims else None
|
||||
|
||||
|
||||
def _mode(
|
||||
|
|
|
|||
|
|
@ -5,9 +5,13 @@ homogeneous categoricals to the recency-weighted cohort mode, apply Landlord
|
|||
Overrides on top. Pure domain logic.
|
||||
"""
|
||||
|
||||
from typing import Union
|
||||
from typing import Optional, Union
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
|
||||
from datatypes.epc.domain.epc_property_data import (
|
||||
EpcPropertyData,
|
||||
SapBuildingPart,
|
||||
SapFloorDimension,
|
||||
)
|
||||
from domain.epc_prediction.comparable_properties import (
|
||||
Comparable,
|
||||
ComparableProperties,
|
||||
|
|
@ -21,6 +25,10 @@ def _epc(
|
|||
building_parts: int = 1,
|
||||
floor_area: float = 80.0,
|
||||
wall_construction: Union[int, str] = 1,
|
||||
wall_insulation_type: Union[int, str] = 1,
|
||||
construction_age_band: str = "K",
|
||||
roof_construction: Optional[int] = 1,
|
||||
floor_construction: Optional[int] = 1,
|
||||
) -> EpcPropertyData:
|
||||
epc: EpcPropertyData = object.__new__(EpcPropertyData)
|
||||
epc.property_type = "2"
|
||||
|
|
@ -30,6 +38,12 @@ def _epc(
|
|||
for _ in range(building_parts):
|
||||
part: SapBuildingPart = object.__new__(SapBuildingPart)
|
||||
part.wall_construction = wall_construction
|
||||
part.wall_insulation_type = wall_insulation_type
|
||||
part.construction_age_band = construction_age_band
|
||||
part.roof_construction = roof_construction
|
||||
floor_dim: SapFloorDimension = object.__new__(SapFloorDimension)
|
||||
floor_dim.floor_construction = floor_construction
|
||||
part.sap_floor_dimensions = [floor_dim]
|
||||
parts.append(part)
|
||||
epc.sap_building_parts = parts
|
||||
return epc
|
||||
|
|
@ -100,6 +114,48 @@ def test_sets_main_wall_construction_to_the_cohort_mode() -> None:
|
|||
assert predicted.sap_building_parts[0].wall_construction == 1
|
||||
|
||||
|
||||
def test_sets_the_other_homogeneous_categoricals_to_the_cohort_mode() -> None:
|
||||
# Arrange — the median-size template (members[0], 80 m²) is an atypical
|
||||
# outlier on every categorical; the cohort majority disagrees. Age band,
|
||||
# wall insulation, roof construction and floor construction are all
|
||||
# homogeneous categoricals, so each should follow its mode, not the one
|
||||
# template (ADR-0029 decision 4).
|
||||
cohort = _cohort(
|
||||
_epc(
|
||||
floor_area=80.0,
|
||||
construction_age_band="A",
|
||||
wall_insulation_type=9,
|
||||
roof_construction=7,
|
||||
floor_construction=7,
|
||||
),
|
||||
_epc(
|
||||
construction_age_band="K",
|
||||
wall_insulation_type=1,
|
||||
roof_construction=2,
|
||||
floor_construction=3,
|
||||
),
|
||||
_epc(
|
||||
construction_age_band="K",
|
||||
wall_insulation_type=1,
|
||||
roof_construction=2,
|
||||
floor_construction=3,
|
||||
),
|
||||
)
|
||||
|
||||
# Act
|
||||
predicted: EpcPropertyData = EpcPrediction().predict(
|
||||
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
|
||||
)
|
||||
|
||||
# Assert — every categorical follows the cohort mode over the outlier
|
||||
# template.
|
||||
main = predicted.sap_building_parts[0]
|
||||
assert main.construction_age_band == "K"
|
||||
assert main.wall_insulation_type == 1
|
||||
assert main.roof_construction == 2
|
||||
assert main.sap_floor_dimensions[0].floor_construction == 3
|
||||
|
||||
|
||||
def test_applies_a_known_wall_override_over_the_mode() -> None:
|
||||
# Arrange — the cohort mode is cavity (1), but we KNOW the target is solid
|
||||
# brick (2), a Landlord Override. The known value must win over the estimate.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue