mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
feat(epc-prediction): cohort-median floor-area estimate (#1223)
Per-component method, not a global template change: the predicted floor area is now the cohort median (the MAD-minimising point estimate of the target's size) rather than whichever structural template's own area. The calculator derives heat loss from building-part geometry, not this scalar, so decoupling them is safe and the scalar becomes a better size estimate. floor_area mean|.|: corpus (150pc/514 targets) 10.62 -> 10.48; fixture 12.2175 -> 11.8983 (ceiling ratcheted down). No other component moves. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
6e9f831296
commit
51cdc25ce8
3 changed files with 38 additions and 3 deletions
|
|
@ -56,10 +56,13 @@ class EpcPrediction:
|
||||||
self, target: PredictionTarget, comparables: ComparableProperties
|
self, target: PredictionTarget, comparables: ComparableProperties
|
||||||
) -> EpcPropertyData:
|
) -> EpcPropertyData:
|
||||||
"""Predict the target's EPC picture: copy a representative template's
|
"""Predict the target's EPC picture: copy a representative template's
|
||||||
structure (coherent for the calculator), then set the homogeneous
|
structure (coherent for the calculator), set the predicted floor area to
|
||||||
categoricals to the cohort mode."""
|
the cohort median (the best point estimate of the target's size, decoupled
|
||||||
|
from the one template's own area), then set the homogeneous categoricals
|
||||||
|
to the cohort mode."""
|
||||||
template: Comparable = self._template(comparables)
|
template: Comparable = self._template(comparables)
|
||||||
predicted: EpcPropertyData = copy.deepcopy(template.epc)
|
predicted: EpcPropertyData = copy.deepcopy(template.epc)
|
||||||
|
predicted.total_floor_area_m2 = _median_floor_area(comparables.members)
|
||||||
self._apply_categorical_modes(predicted, comparables)
|
self._apply_categorical_modes(predicted, comparables)
|
||||||
self._apply_overrides(predicted, target)
|
self._apply_overrides(predicted, target)
|
||||||
return predicted
|
return predicted
|
||||||
|
|
@ -213,6 +216,15 @@ def _main_floor_attr(comparable: Comparable, attr: str) -> Optional[int]:
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _median_floor_area(members: tuple[Comparable, ...]) -> float:
|
||||||
|
"""The cohort's median floor area — the point estimate of the target's size.
|
||||||
|
The median minimises mean absolute deviation, so it is the best single guess
|
||||||
|
for an unknown neighbour's area; it is set independently of the structural
|
||||||
|
template (the calculator derives heat loss from the building-part geometry,
|
||||||
|
not this scalar, so the two need not agree)."""
|
||||||
|
return statistics.median(c.epc.total_floor_area_m2 for c in members)
|
||||||
|
|
||||||
|
|
||||||
def _age_band_index(comparable: Comparable) -> Optional[int]:
|
def _age_band_index(comparable: Comparable) -> Optional[int]:
|
||||||
"""The main building part's construction-age-band position (A=0 … L=11), or
|
"""The main building part's construction-age-band position (A=0 … L=11), or
|
||||||
None when no recognisable band is lodged."""
|
None when no recognisable band is lodged."""
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ _RATE_FLOORS: dict[str, float] = {
|
||||||
# the predicted picture clusters at a mapper-default 4 windows while actuals
|
# the predicted picture clusters at a mapper-default 4 windows while actuals
|
||||||
# spread 1-21, yet total_window_area (the SAP-relevant signal) stays tight.
|
# spread 1-21, yet total_window_area (the SAP-relevant signal) stays tight.
|
||||||
_RESIDUAL_CEILINGS: dict[str, float] = {
|
_RESIDUAL_CEILINGS: dict[str, float] = {
|
||||||
"floor_area": 12.2175,
|
"floor_area": 11.8983,
|
||||||
"total_window_area": 4.4067,
|
"total_window_area": 4.4067,
|
||||||
"building_parts": 0.3333,
|
"building_parts": 0.3333,
|
||||||
"door_count": 0.6389,
|
"door_count": 0.6389,
|
||||||
|
|
|
||||||
|
|
@ -226,6 +226,29 @@ def test_recency_weights_roof_insulation_mode() -> None:
|
||||||
assert predicted.sap_building_parts[0].roof_insulation_thickness == 300
|
assert predicted.sap_building_parts[0].roof_insulation_thickness == 300
|
||||||
|
|
||||||
|
|
||||||
|
def test_floor_area_is_the_cohort_median_not_the_templates_own_area() -> None:
|
||||||
|
# Arrange — an even-sized cohort whose median (70) falls between members, so
|
||||||
|
# the size-representative template (the first member closest to the median,
|
||||||
|
# 60 m²) does not itself sit on the median. The predicted floor area is a
|
||||||
|
# point estimate of the target's size, best served by the cohort median (the
|
||||||
|
# MAD-minimising estimator), decoupled from whichever template seeds the
|
||||||
|
# structure.
|
||||||
|
cohort = _cohort(
|
||||||
|
_epc(floor_area=40.0),
|
||||||
|
_epc(floor_area=60.0),
|
||||||
|
_epc(floor_area=80.0),
|
||||||
|
_epc(floor_area=100.0),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
predicted: EpcPropertyData = EpcPrediction().predict(
|
||||||
|
PredictionTarget(postcode="LS6 1AA", property_type="2"), cohort
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert — the floor area is the cohort median (70), not the template's 60.
|
||||||
|
assert predicted.total_floor_area_m2 == 70.0
|
||||||
|
|
||||||
|
|
||||||
def test_categorical_mode_leans_on_size_similar_neighbours() -> None:
|
def test_categorical_mode_leans_on_size_similar_neighbours() -> None:
|
||||||
# Arrange — a count majority (three) carries wall-insulation 9, but two of
|
# Arrange — a count majority (three) carries wall-insulation 9, but two of
|
||||||
# them are 400 m² size outliers; the cohort centre (median 100 m²) holds
|
# them are 400 m² size outliers; the cohort centre (median 100 m²) holds
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue