mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(epc-prediction): classify roof/floor/insulation/age categoricals (ADR-0029)
The comparison only scored main wall_construction; everything else the predictor produces (by template-copy) went unmeasured. Extend compare_prediction to the rest of the ADR-0029 homogeneous categoricals — wall insulation type, construction age band, roof construction, floor construction — and aggregate per-categorical classification rates in the runner. A categorical hit is "not applicable" (None, excluded from the denominator) when the actual lodges no value, so absent-roof flats don't score free wins. Smoke corpus (29 leave-one-out, all but wall are template-copied today): wall_construction 93.1% wall_insulation_type 93.1% construction_age_band 55.2% <- loud; candidate for cohort-mode roof_construction 72.4% floor_construction 46.2% (n=13) These numbers drive the next slice (extend cohort-mode coverage). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
4fa20ae76b
commit
ed96df9315
3 changed files with 125 additions and 8 deletions
|
|
@ -12,6 +12,7 @@ runner, which has the calculator and the lodged SAP.
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingPart
|
||||
|
||||
|
|
@ -19,9 +20,16 @@ from datatypes.epc.domain.epc_property_data import EpcPropertyData, SapBuildingP
|
|||
@dataclass(frozen=True)
|
||||
class PredictionComparison:
|
||||
"""One Property's prediction accuracy: classification hits + geometry
|
||||
residuals (predicted − actual)."""
|
||||
residuals (predicted − actual). A categorical hit is `None` ("not
|
||||
applicable") when the actual lodges no value there, so the harness can keep
|
||||
it out of the classification-rate denominator rather than score a free win.
|
||||
"""
|
||||
|
||||
wall_construction_correct: bool
|
||||
wall_insulation_type_correct: bool
|
||||
construction_age_band_correct: bool
|
||||
roof_construction_correct: Optional[bool]
|
||||
floor_construction_correct: Optional[bool]
|
||||
floor_area_residual: float
|
||||
building_parts_residual: int
|
||||
window_count_residual: int
|
||||
|
|
@ -32,6 +40,21 @@ def _main(epc: EpcPropertyData) -> SapBuildingPart:
|
|||
return epc.sap_building_parts[0]
|
||||
|
||||
|
||||
def _main_floor_construction(epc: EpcPropertyData) -> Optional[int]:
|
||||
"""The main building part's ground-floor construction code, or None when no
|
||||
floor dimension is lodged."""
|
||||
dims = _main(epc).sap_floor_dimensions
|
||||
return dims[0].floor_construction if dims else None
|
||||
|
||||
|
||||
def _classify(predicted: object, actual: object) -> Optional[bool]:
|
||||
"""A categorical hit: None ("not applicable") when the actual is absent,
|
||||
else whether the predicted value matches it."""
|
||||
if actual is None:
|
||||
return None
|
||||
return predicted == actual
|
||||
|
||||
|
||||
def _total_window_area(epc: EpcPropertyData) -> float:
|
||||
return sum(w.window_width * w.window_height for w in epc.sap_windows)
|
||||
|
||||
|
|
@ -45,6 +68,20 @@ def compare_prediction(
|
|||
wall_construction_correct=(
|
||||
_main(predicted).wall_construction == _main(actual).wall_construction
|
||||
),
|
||||
wall_insulation_type_correct=(
|
||||
_main(predicted).wall_insulation_type
|
||||
== _main(actual).wall_insulation_type
|
||||
),
|
||||
construction_age_band_correct=(
|
||||
_main(predicted).construction_age_band
|
||||
== _main(actual).construction_age_band
|
||||
),
|
||||
roof_construction_correct=_classify(
|
||||
_main(predicted).roof_construction, _main(actual).roof_construction
|
||||
),
|
||||
floor_construction_correct=_classify(
|
||||
_main_floor_construction(predicted), _main_floor_construction(actual)
|
||||
),
|
||||
floor_area_residual=(
|
||||
predicted.total_floor_area_m2 - actual.total_floor_area_m2
|
||||
),
|
||||
|
|
|
|||
|
|
@ -75,7 +75,15 @@ def main() -> None:
|
|||
calculator = Sap10Calculator()
|
||||
predictor = EpcPrediction()
|
||||
|
||||
wall_hits = wall_total = 0
|
||||
# Classification: name -> [hits, applicable-total]. A None hit (the actual
|
||||
# lodges no value) is excluded from the denominator.
|
||||
categoricals: dict[str, list[int]] = {
|
||||
"wall_construction": [0, 0],
|
||||
"wall_insulation_type": [0, 0],
|
||||
"construction_age_band": [0, 0],
|
||||
"roof_construction": [0, 0],
|
||||
"floor_construction": [0, 0],
|
||||
}
|
||||
floor_res: list[float] = []
|
||||
window_count_res: list[int] = []
|
||||
window_area_res: list[float] = []
|
||||
|
|
@ -105,8 +113,19 @@ def main() -> None:
|
|||
predicted_n += 1
|
||||
|
||||
cmp = compare_prediction(predicted, actual)
|
||||
wall_total += 1
|
||||
wall_hits += int(cmp.wall_construction_correct)
|
||||
_tally(categoricals["wall_construction"], cmp.wall_construction_correct)
|
||||
_tally(
|
||||
categoricals["wall_insulation_type"],
|
||||
cmp.wall_insulation_type_correct,
|
||||
)
|
||||
_tally(
|
||||
categoricals["construction_age_band"],
|
||||
cmp.construction_age_band_correct,
|
||||
)
|
||||
_tally(categoricals["roof_construction"], cmp.roof_construction_correct)
|
||||
_tally(
|
||||
categoricals["floor_construction"], cmp.floor_construction_correct
|
||||
)
|
||||
floor_res.append(cmp.floor_area_residual)
|
||||
window_count_res.append(cmp.window_count_residual)
|
||||
window_area_res.append(cmp.total_window_area_residual)
|
||||
|
|
@ -130,9 +149,10 @@ def main() -> None:
|
|||
|
||||
print(f"corpus: {CORPUS}")
|
||||
print(f"predicted {predicted_n} held-out certs ({skipped_no_cohort} had no cohort)\n")
|
||||
if wall_total:
|
||||
print(f"CLASSIFICATION wall_construction: {wall_hits}/{wall_total} = "
|
||||
f"{wall_hits / wall_total:.1%}")
|
||||
for name, (hits, total) in categoricals.items():
|
||||
if total:
|
||||
print(f"CLASSIFICATION {name}: {hits}/{total} = {hits / total:.1%}")
|
||||
print()
|
||||
_residual("floor_area (m2)", floor_res)
|
||||
_residual("window_count", [float(x) for x in window_count_res])
|
||||
_residual("total_window_area (m2)", window_area_res)
|
||||
|
|
@ -143,6 +163,15 @@ def main() -> None:
|
|||
_sap_line("SAP |neighbour-mean − lodged| (baseline)", sap_vs_neighbour_mean)
|
||||
|
||||
|
||||
def _tally(counter: list[int], hit: Optional[bool]) -> None:
|
||||
"""Record one classification outcome: a None hit (actual absent) is not
|
||||
applicable and skipped; else increment the applicable total and the hits."""
|
||||
if hit is None:
|
||||
return
|
||||
counter[1] += 1
|
||||
counter[0] += int(hit)
|
||||
|
||||
|
||||
def _residual(label: str, values: list[float]) -> None:
|
||||
if not values:
|
||||
print(f"RESIDUAL {label}: (none)")
|
||||
|
|
|
|||
|
|
@ -5,11 +5,12 @@ and residuals on the geometry. Pure; SAP residual is computed in the runner
|
|||
(it needs the calculator + lodged SAP).
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from typing import Optional, Union
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import (
|
||||
EpcPropertyData,
|
||||
SapBuildingPart,
|
||||
SapFloorDimension,
|
||||
SapWindow,
|
||||
)
|
||||
from domain.epc_prediction.prediction_comparison import compare_prediction
|
||||
|
|
@ -18,6 +19,10 @@ from domain.epc_prediction.prediction_comparison import compare_prediction
|
|||
def _epc(
|
||||
*,
|
||||
wall_construction: int = 1,
|
||||
wall_insulation_type: Union[int, str] = 1,
|
||||
construction_age_band: str = "K",
|
||||
roof_construction: Optional[int] = 1,
|
||||
floor_construction: Optional[int] = 1,
|
||||
floor_area: float = 80.0,
|
||||
building_parts: int = 1,
|
||||
windows: Optional[list[tuple[float, float]]] = None,
|
||||
|
|
@ -28,6 +33,12 @@ def _epc(
|
|||
for _ in range(building_parts):
|
||||
part: SapBuildingPart = object.__new__(SapBuildingPart)
|
||||
part.wall_construction = wall_construction
|
||||
part.wall_insulation_type = wall_insulation_type
|
||||
part.construction_age_band = construction_age_band
|
||||
part.roof_construction = roof_construction
|
||||
floor_dim: SapFloorDimension = object.__new__(SapFloorDimension)
|
||||
floor_dim.floor_construction = floor_construction
|
||||
part.sap_floor_dimensions = [floor_dim]
|
||||
parts.append(part)
|
||||
epc.sap_building_parts = parts
|
||||
sap_windows: list[SapWindow] = []
|
||||
|
|
@ -64,6 +75,46 @@ def test_flags_an_incorrect_main_wall_construction_classification() -> None:
|
|||
assert comparison.wall_construction_correct is False
|
||||
|
||||
|
||||
def test_classifies_the_extra_homogeneous_categoricals() -> None:
|
||||
# Arrange — predicted agrees on age band, wall insulation, roof and floor
|
||||
# construction with the actual; only wall insulation differs.
|
||||
predicted = _epc(
|
||||
construction_age_band="K",
|
||||
wall_insulation_type=2,
|
||||
roof_construction=3,
|
||||
floor_construction=1,
|
||||
)
|
||||
actual = _epc(
|
||||
construction_age_band="K",
|
||||
wall_insulation_type=1,
|
||||
roof_construction=3,
|
||||
floor_construction=1,
|
||||
)
|
||||
|
||||
# Act
|
||||
comparison = compare_prediction(predicted, actual)
|
||||
|
||||
# Assert
|
||||
assert comparison.construction_age_band_correct is True
|
||||
assert comparison.wall_insulation_type_correct is False
|
||||
assert comparison.roof_construction_correct is True
|
||||
assert comparison.floor_construction_correct is True
|
||||
|
||||
|
||||
def test_categorical_hit_is_not_applicable_when_actual_is_absent() -> None:
|
||||
# Arrange — the actual lodges no roof construction (a flat under another
|
||||
# dwelling). A hit there is not applicable, not a free win, so it must not
|
||||
# count towards the roof classification rate.
|
||||
predicted = _epc(roof_construction=3)
|
||||
actual = _epc(roof_construction=None)
|
||||
|
||||
# Act
|
||||
comparison = compare_prediction(predicted, actual)
|
||||
|
||||
# Assert
|
||||
assert comparison.roof_construction_correct is None
|
||||
|
||||
|
||||
def test_reports_the_floor_area_residual_as_predicted_minus_actual() -> None:
|
||||
# Arrange — predicted 90 m², actual 100 m² (a 10 m² under-prediction).
|
||||
predicted = _epc(floor_area=90.0)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue