mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
slice 14l: bigger-run fixes — UCL guard, PV Measurement coercion, sMAPE
Three changes surfaced by the 25k 2026 run: - transform._peui_ucl returns None for non-positive raw PEUI (net-exporters). apply_ucl_correction would otherwise raise ValueError on negative input. - PhotovoltaicArray scalars (peak_power, pitch, orientation, overshading) now accept Measurement | int | float in the schema; mapper coerces via _measurement_value. - train_baseline reports sMAPE alongside MAPE — handles zero-actual rows (e.g. co2_emissions for net-zero certs) where MAPE explodes. Results at N=25,000 RdSAP 2026 certs (~32s end-to-end): sap_score MAPE=0.064 sMAPE=0.054 R^2=0.762 co2_emissions sMAPE=0.140 R^2=0.890 peui_raw MAPE=0.126 sMAPE=0.120 R^2=0.714 peui_ucl MAPE=0.114 sMAPE=0.108 R^2=0.736 space_heating_kwh MAPE=0.167 sMAPE=0.157 R^2=0.915 hot_water_kwh MAPE=0.089 sMAPE=0.086 R^2=0.737 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
8fddd25b9a
commit
c496f345f8
4 changed files with 39 additions and 14 deletions
|
|
@ -102,10 +102,10 @@ def _map_schema_21_pv(
|
|||
if isinstance(es_pv_supply, list):
|
||||
flattened = [
|
||||
PhotovoltaicArray(
|
||||
peak_power=array.peak_power,
|
||||
pitch=array.pitch,
|
||||
orientation=array.orientation,
|
||||
overshading=array.overshading,
|
||||
peak_power=_measurement_value(array.peak_power),
|
||||
pitch=int(_measurement_value(array.pitch)),
|
||||
orientation=int(_measurement_value(array.orientation)),
|
||||
overshading=int(_measurement_value(array.overshading)),
|
||||
)
|
||||
for inner_list in es_pv_supply
|
||||
for array in inner_list
|
||||
|
|
|
|||
|
|
@ -111,12 +111,12 @@ class PhotovoltaicArray:
|
|||
Modern SAP10 EPCs with measured PV carry `photovoltaic_supply` as a nested
|
||||
list (`list[list[PhotovoltaicArray]]`) rather than the legacy wrapper dict
|
||||
`PhotovoltaicSupply`. The Union type on SapEnergySource.photovoltaic_supply
|
||||
accepts either shape.
|
||||
accepts either shape. Some certs wrap the scalars in Measurement dicts.
|
||||
"""
|
||||
peak_power: float
|
||||
pitch: int
|
||||
orientation: int
|
||||
overshading: int
|
||||
peak_power: Union[Measurement, int, float]
|
||||
pitch: Union[Measurement, int]
|
||||
orientation: Union[Measurement, int]
|
||||
overshading: Union[Measurement, int]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -147,8 +147,9 @@ class SapWindow:
|
|||
orientation: int
|
||||
window_type: int
|
||||
glazing_type: int
|
||||
window_width: float
|
||||
window_height: float
|
||||
# Real-API certs sometimes carry a Measurement dict for dimensions, not a plain float.
|
||||
window_width: Union[Measurement, int, float]
|
||||
window_height: Union[Measurement, int, float]
|
||||
draught_proofed: str # TODO: make bool
|
||||
window_location: int
|
||||
window_wall_type: int
|
||||
|
|
|
|||
|
|
@ -595,13 +595,20 @@ class EpcMlTransform:
|
|||
def _peui_ucl(epc: EpcPropertyData) -> Optional[float]:
|
||||
"""Apply the Few et al. per-band UCL correction to PEUI for training labels.
|
||||
|
||||
Returns None when either the raw PEUI or the SAP score is missing — those rows
|
||||
are unusable as `peui_ucl` training labels and should be dropped upstream.
|
||||
Returns None when:
|
||||
- either the raw PEUI or the SAP score is missing, or
|
||||
- the raw PEUI is non-positive (e.g. net-exporter homes with negative PEUI)
|
||||
so the UCL correction is undefined.
|
||||
Those rows are unusable as `peui_ucl` training labels and should be dropped
|
||||
upstream rather than crashing the transform.
|
||||
"""
|
||||
if epc.energy_consumption_current is None or epc.energy_rating_current is None:
|
||||
return None
|
||||
peui_raw = float(epc.energy_consumption_current)
|
||||
if peui_raw <= 0:
|
||||
return None
|
||||
band = Epc.from_sap_score(epc.energy_rating_current)
|
||||
return apply_ucl_correction(float(epc.energy_consumption_current), band)
|
||||
return apply_ucl_correction(peui_raw, band)
|
||||
|
||||
|
||||
def _pv_aggregates(es: SapEnergySource) -> dict[str, Any]:
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ def train_baseline(
|
|||
|
||||
metrics[target] = {
|
||||
"mape": float(cast(float, mean_absolute_percentage_error(y_test, preds))),
|
||||
"smape": _smape(y_test, preds),
|
||||
"r2": float(cast(float, r2_score(y_test, preds))),
|
||||
}
|
||||
|
||||
|
|
@ -79,3 +80,19 @@ def train_baseline(
|
|||
json.dumps(metrics, indent=2).encode("utf-8"),
|
||||
)
|
||||
return metrics
|
||||
|
||||
|
||||
def _smape(y_true: Any, y_pred: Any) -> float:
|
||||
"""Symmetric MAPE: mean(|y - yhat| / ((|y| + |yhat|) / 2)).
|
||||
|
||||
Bounded in [0, 2] (often reported as 0-200%). Stable when |y| is near zero,
|
||||
so it's a better summary than MAPE for low-magnitude targets like
|
||||
`hot_water_kwh` in well-insulated homes.
|
||||
"""
|
||||
y_t = np.asarray(y_true, dtype=float)
|
||||
y_p = np.asarray(y_pred, dtype=float)
|
||||
denom = (np.abs(y_t) + np.abs(y_p)) / 2.0
|
||||
mask = denom > 0
|
||||
if not mask.any():
|
||||
return 0.0
|
||||
return float(np.mean(np.abs(y_t[mask] - y_p[mask]) / denom[mask]))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue