mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(epc-prediction): cohort-median floor-area estimate (#1223)
Per-component method, not a global template change: the predicted floor area is now the cohort median (the MAD-minimising point estimate of the target's size) rather than whichever structural template's own area. The calculator derives heat loss from building-part geometry, not this scalar, so decoupling them is safe and the scalar becomes a better size estimate. floor_area mean|.|: corpus (150pc/514 targets) 10.62 -> 10.48; fixture 12.2175 -> 11.8983 (ceiling ratcheted down). No other component moves. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
6e9f831296
commit
51cdc25ce8
3 changed files with 38 additions and 3 deletions
|
|
@ -56,10 +56,13 @@ class EpcPrediction:
|
|||
self, target: PredictionTarget, comparables: ComparableProperties
|
||||
) -> EpcPropertyData:
|
||||
"""Predict the target's EPC picture: copy a representative template's
|
||||
structure (coherent for the calculator), then set the homogeneous
|
||||
categoricals to the cohort mode."""
|
||||
structure (coherent for the calculator), set the predicted floor area to
|
||||
the cohort median (the best point estimate of the target's size, decoupled
|
||||
from the one template's own area), then set the homogeneous categoricals
|
||||
to the cohort mode."""
|
||||
template: Comparable = self._template(comparables)
|
||||
predicted: EpcPropertyData = copy.deepcopy(template.epc)
|
||||
predicted.total_floor_area_m2 = _median_floor_area(comparables.members)
|
||||
self._apply_categorical_modes(predicted, comparables)
|
||||
self._apply_overrides(predicted, target)
|
||||
return predicted
|
||||
|
|
@ -213,6 +216,15 @@ def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]:
|
|||
return value
|
||||
|
||||
|
||||
def _median_floor_area(members: tuple[Comparable, ...]) -> float:
|
||||
"""The cohort's median floor area — the point estimate of the target's size.
|
||||
The median minimises mean absolute deviation, so it is the best single guess
|
||||
for an unknown neighbour's area; it is set independently of the structural
|
||||
template (the calculator derives heat loss from the building-part geometry,
|
||||
not this scalar, so the two need not agree)."""
|
||||
return statistics.median(c.epc.total_floor_area_m2 for c in members)
|
||||
|
||||
|
||||
def _age_band_index(comparable: Comparable) -> Optional[int]:
|
||||
"""The main building part's construction-age-band position (A=0 … L=11), or
|
||||
None when no recognisable band is lodged."""
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ _RATE_FLOORS: dict[str, float] = {
|
|||
# the predicted picture clusters at a mapper-default 4 windows while actuals
|
||||
# spread 1-21, yet total_window_area (the SAP-relevant signal) stays tight.
|
||||
_RESIDUAL_CEILINGS: dict[str, float] = {
|
||||
"floor_area": 12.2175,
|
||||
"floor_area": 11.8983,
|
||||
"total_window_area": 4.4067,
|
||||
"building_parts": 0.3333,
|
||||
"door_count": 0.6389,
|
||||
|
|
|
|||
|
|
@ -226,6 +226,29 @@ def test_recency_weights_roof_insulation_mode() -> None:
|
|||
assert predicted.sap_building_parts[0].roof_insulation_thickness == 300
|
||||
|
||||
|
||||
def test_floor_area_is_the_cohort_median_not_the_templates_own_area() -> None:
|
||||
# Arrange — an even-sized cohort whose median (70) falls between members, so
|
||||
# the size-representative template (the first member closest to the median,
|
||||
# 60 m²) does not itself sit on the median. The predicted floor area is a
|
||||
# point estimate of the target's size, best served by the cohort median (the
|
||||
# MAD-minimising estimator), decoupled from whichever template seeds the
|
||||
# structure.
|
||||
cohort = _cohort(
|
||||
_epc(floor_area=40.0),
|
||||
_epc(floor_area=60.0),
|
||||
_epc(floor_area=80.0),
|
||||
_epc(floor_area=100.0),
|
||||
)
|
||||
|
||||
# Act
|
||||
predicted: EpcPropertyData = EpcPrediction().predict(
|
||||
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
|
||||
)
|
||||
|
||||
# Assert — the floor area is the cohort median (70), not the template's 60.
|
||||
assert predicted.total_floor_area_m2 == 70.0
|
||||
|
||||
|
||||
def test_categorical_mode_leans_on_size_similar_neighbours() -> None:
|
||||
# Arrange — a count majority (three) carries wall-insulation 9, but two of
|
||||
# them are 400 m² size outliers; the cohort centre (median 100 m²) holds
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue