mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
fix(epc-prediction): size-representative template selection (ADR-0029)
Template (the comparable whose structure/geometry is copied wholesale) was members[0] — an arbitrary draw from the API search order. With floor area varying widely within a property_type cohort (NG71AA houses span 51-340 m2), this made the copied geometry noisy and systematically large. Pick the member whose floor area is closest to the cohort median instead, implementing ADR-0029 decision 4's unimplemented "closest on size" criterion while keeping the structure coherent (it is still one real property, so floor dims / windows / parts stay internally consistent for the calculator). Smoke corpus (29 leave-one-out predictions): floor_area mean|.| 68.0 -> 37.9 m2 (bias +46.8 -> -3.9) window_area mean|.| 11.1 -> 7.3 m2 parts mean|.| 1.00 -> 0.38 SAP |pred-calc - calc(actual)| MAE 7.19 -> 4.86 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
f3ad6343a3
commit
4fa20ae76b
2 changed files with 35 additions and 2 deletions
|
|
@ -10,6 +10,7 @@ logic — deterministic neighbour synthesis, not ML.
|
|||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import statistics
|
||||
from collections import Counter
|
||||
from typing import Iterable, Optional, Union
|
||||
|
||||
|
|
@ -41,8 +42,19 @@ class EpcPrediction:
|
|||
|
||||
@staticmethod
|
||||
def _template(comparables: ComparableProperties) -> Comparable:
|
||||
"""The representative comparable whose structure seeds the prediction."""
|
||||
return comparables.members[0]
|
||||
"""The representative comparable whose structure seeds the prediction:
|
||||
the member whose floor area is closest to the cohort median. A single
|
||||
neighbour's geometry is copied wholesale, so a size-representative
|
||||
template keeps the prediction off the cohort's size outliers (ADR-0029
|
||||
decision 4: closest on size)."""
|
||||
members: tuple[Comparable, ...] = comparables.members
|
||||
median_area: float = statistics.median(
|
||||
c.epc.total_floor_area_m2 for c in members
|
||||
)
|
||||
return min(
|
||||
members,
|
||||
key=lambda c: abs(c.epc.total_floor_area_m2 - median_area),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _apply_categorical_modes(
|
||||
|
|
|
|||
|
|
@ -59,6 +59,27 @@ def test_predicts_a_picture_by_copying_a_representative_template() -> None:
|
|||
assert predicted is not template
|
||||
|
||||
|
||||
def test_template_is_the_member_closest_to_the_cohort_median_size() -> None:
|
||||
# Arrange — the cohort spans a wide range of sizes; members[0] is an atypical
|
||||
# tiny 20 m² outlier. A single neighbour's geometry is copied wholesale, so
|
||||
# the template must be the size-representative member (closest to the median),
|
||||
# not whoever happens to come first (ADR-0029 decision 4: closest on size).
|
||||
cohort = _cohort(
|
||||
_epc(floor_area=20.0),
|
||||
_epc(floor_area=80.0),
|
||||
_epc(floor_area=200.0),
|
||||
)
|
||||
|
||||
# Act
|
||||
predicted: EpcPropertyData = EpcPrediction().predict(
|
||||
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
|
||||
)
|
||||
|
||||
# Assert — the 80 m² member (the median) seeds the structure, not the 20 m²
|
||||
# outlier sitting at members[0].
|
||||
assert predicted.total_floor_area_m2 == 80.0
|
||||
|
||||
|
||||
def test_sets_main_wall_construction_to_the_cohort_mode() -> None:
|
||||
# Arrange — the template (members[0]) is solid brick (2), but the cohort
|
||||
# majority is cavity (1). The homogeneous categorical should follow the mode,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue