slice 17a: PV-export credit in predicted_total_fuel_cost (v2.1.0)

Closes the high-SAP under-prediction gap diagnosed in 16h. 40% of SAP-85+
properties have PV; predicted_ecf was 1.74 mean at that band -> SAP ~88
via the formula, vs label SAP 90+. Inverse: PV homes had HIGHER predicted_ecf
than non-PV at the same band because cost reconstruction had zero export
credit.

New helper: predicted_pv_generation_kwh(kWp, region) -> kWh/yr from a
SAP10.2 Table 6e regional yield factor (UK avg 850 kWh/kWp/yr; Highland
650; Thames 920).

predicted_total_fuel_cost_gbp now subtracts pv_kwh * standard electricity
price (Table 32 code 30, both self-consumption and export at 13.19 p/kWh).

New feature column predicted_pv_generation_kwh exposed alongside the
adjusted cost so the model sees both signals.

VERSION 2.0.0 -> 2.1.0 (MINOR: column added; existing column semantics
shifted but pre-deploy so no consumer break).
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 15:28:09 +00:00
parent 6072d8795a
commit 06ce3205b1
4 changed files with 125 additions and 5 deletions

View file

@ -13,7 +13,7 @@ fuel-mix-conditional offset the tree-based model can learn (ADR-0008,
from __future__ import annotations
from math import log10
from typing import Optional
from typing import Final, Optional
from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh
@ -21,9 +21,39 @@ from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh
# SAP10 deflator applied to total cost before the rating equation (Table 32).
_DEFLATOR: float = 0.42
# Electricity standard tariff fuel code (Table 32) — used for lighting.
# Electricity standard tariff fuel code (Table 32) — used for lighting + PV credit.
_ELECTRICITY_STANDARD_CODE: int = 30
# Annual PV yield (kWh / kWp / yr) by SAP10.2 region. Derived from Table 6e
# climate data integrated over an average roof (South-facing, 30 deg pitch,
# average overshading). Southern England ~ 900, Scotland ~ 700, Highland ~ 600.
_PV_YIELD_BY_REGION: Final[dict[int, float]] = {
1: 920, 2: 950, 3: 970, 4: 960, 5: 920, 6: 880, 7: 850, 8: 830,
9: 820, 10: 820, 11: 830, 12: 880, 13: 850, 14: 770, 15: 740,
16: 700, 17: 650, 18: 700, 19: 690, 20: 680, 21: 870, 22: 870,
}
_PV_YIELD_UK_AVG: Final[float] = 850.0
def predicted_pv_generation_kwh(
pv_total_peak_power_kw: Optional[float],
region_code: Optional[str],
) -> float:
"""Annual PV generation (kWh/yr) for the dwelling's PV array(s).
Linear in peak power; uses a SAP-region yield factor with South-facing,
30 deg pitch, average overshading assumptions (SAP10.2 Table 6e).
"""
if pv_total_peak_power_kw is None or pv_total_peak_power_kw <= 0:
return 0.0
yield_factor = _PV_YIELD_UK_AVG
if region_code is not None:
try:
yield_factor = _PV_YIELD_BY_REGION.get(int(region_code), _PV_YIELD_UK_AVG)
except (TypeError, ValueError):
pass
return pv_total_peak_power_kw * yield_factor
def predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh: float,
@ -31,19 +61,27 @@ def predicted_total_fuel_cost_gbp(
predicted_lighting_kwh: float,
main_fuel_code: Optional[int],
water_heating_fuel_code: Optional[int],
predicted_pv_kwh: float = 0.0,
) -> float:
"""Annual regulated fuel cost (gbp/yr).
Skips standing charges; sums delivered kWh * unit price across the
three included end uses. Lighting always uses standard electricity.
three included end uses (lighting always at standard electricity).
Slice 17a: subtracts predicted_pv_kwh * standard electricity price as
a flat PV credit. SAP10.2 splits PV between self-consumption and
export with separate rates; both are 13.19 p/kWh in Table 32 so a
single rate is fine at this fidelity.
"""
space_p_per_kwh = fuel_unit_price_p_per_kwh(main_fuel_code)
dhw_p_per_kwh = fuel_unit_price_p_per_kwh(water_heating_fuel_code)
light_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE)
pv_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE)
total_pence = (
predicted_space_heating_kwh * space_p_per_kwh
+ predicted_hot_water_kwh * dhw_p_per_kwh
+ predicted_lighting_kwh * light_p_per_kwh
- predicted_pv_kwh * pv_p_per_kwh
)
return total_pence / 100.0

View file

@ -8,10 +8,75 @@ import pytest
from domain.ml.ecf import (
predicted_ecf,
predicted_log10_ecf,
predicted_pv_generation_kwh,
predicted_total_fuel_cost_gbp,
)
def test_predicted_pv_generation_kwh_scales_linearly_with_peak_power() -> None:
# Arrange — UK average yield ~ 850 kWh/kWp/yr; 4 kWp -> ~3400 kWh.
# Act
a = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="1")
b = predicted_pv_generation_kwh(pv_total_peak_power_kw=8.0, region_code="1")
# Assert
assert b == pytest.approx(2.0 * a, abs=0.01)
def test_predicted_pv_generation_kwh_returns_zero_for_no_pv() -> None:
# Arrange / Act / Assert
assert predicted_pv_generation_kwh(pv_total_peak_power_kw=0.0, region_code="1") == 0.0
assert predicted_pv_generation_kwh(pv_total_peak_power_kw=None, region_code="1") == 0.0
def test_predicted_total_fuel_cost_subtracts_pv_credit_at_electricity_price() -> None:
# Arrange — gas heat + DHW + lighting, with 3000 kWh PV generation.
# Base cost: (10000*3.48 + 2500*3.48 + 600*13.19) / 100 = 514.14
# PV credit: 3000 * 13.19 / 100 = 395.70
# Net: 514.14 - 395.70 = 118.44
# Act
with_pv = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
predicted_pv_kwh=3000.0,
)
no_pv = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
predicted_pv_kwh=0.0,
)
# Assert
assert no_pv == pytest.approx(514.14, abs=0.05)
assert with_pv == pytest.approx(118.44, abs=0.05)
def test_predicted_total_fuel_cost_pv_kwh_defaults_to_zero_for_backwards_compatibility() -> None:
# Arrange / Act — existing callers pre-17a omit predicted_pv_kwh entirely.
# Act
result = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
)
# Assert — same as predicted_pv_kwh=0.
assert result == pytest.approx(514.14, abs=0.05)
def test_predicted_pv_generation_kwh_scotland_lower_than_southern_england() -> None:
# Arrange — Thames (region 1) vs Highland (region 17); same kWp.
# Act
thames = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="1")
highland = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="17")
# Assert
assert highland < thames
def test_predicted_total_fuel_cost_gas_heated_returns_expected_gbp() -> None:
# Arrange — 12,000 kWh gas heat, 3,000 kWh gas DHW, 800 kWh lighting.
# Gas (code 1) 3.48 p/kWh, electricity (30) 13.19 p/kWh.

View file

@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:
# Assert
assert isinstance(schema, TransformSchema)
assert schema.transform_version == "2.0.0"
assert schema.transform_version == "2.1.0"
assert schema.transform_version == EpcMlTransform.VERSION
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():

View file

@ -30,6 +30,7 @@ from domain.ml.demand import (
from domain.ml.ecf import (
predicted_ecf,
predicted_log10_ecf,
predicted_pv_generation_kwh,
predicted_total_fuel_cost_gbp,
)
from domain.ml.envelope import envelope_heat_loss_w_per_k
@ -812,6 +813,15 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
"9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008."
),
),
"predicted_pv_generation_kwh": ColumnSpec(
dtype=float, nullable=False,
description=(
"Annual PV generation kWh: pv_total_peak_power_kw * yield_factor "
"(SAP10.2 Table 6e region-keyed; UK avg 850 kWh/kWp/yr). "
"Subtracted from predicted_total_fuel_cost at the standard "
"electricity rate per SAP10 §13 (slice 17a)."
),
),
"predicted_total_fuel_cost_gbp": ColumnSpec(
dtype=float, nullable=False,
description=(
@ -891,7 +901,7 @@ class EpcMlTransform:
Version 0.1.0 schema contract only; feature columns added in subsequent slices.
"""
VERSION: str = "2.0.0"
VERSION: str = "2.1.0"
def schema(self) -> TransformSchema:
"""The cross-repo ML data contract.
@ -971,12 +981,18 @@ class EpcMlTransform:
)
main_fuel_code = heating_aggregates.get("primary_main_fuel_type")
water_fuel_code = heating_aggregates.get("water_heating_fuel")
pv_kw = pv_aggregates.get("pv_total_peak_power_kw") or 0.0
pred_pv_kwh = predicted_pv_generation_kwh(
pv_total_peak_power_kw=float(pv_kw),
region_code=epc.region_code,
)
pred_cost = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=pred_space_kwh,
predicted_hot_water_kwh=pred_hw_kwh,
predicted_lighting_kwh=pred_light_kwh,
main_fuel_code=main_fuel_code if isinstance(main_fuel_code, int) else None,
water_heating_fuel_code=water_fuel_code if isinstance(water_fuel_code, int) else None,
predicted_pv_kwh=pred_pv_kwh,
)
pred_ecf_v = predicted_ecf(
predicted_total_cost_gbp=pred_cost,
@ -1021,6 +1037,7 @@ class EpcMlTransform:
"predicted_space_heating_kwh": pred_space_kwh,
"predicted_hot_water_kwh": pred_hw_kwh,
"predicted_lighting_kwh": pred_light_kwh,
"predicted_pv_generation_kwh": pred_pv_kwh,
"predicted_total_fuel_cost_gbp": pred_cost,
"predicted_ecf": pred_ecf_v,
"predicted_log10_ecf": pred_log10_ecf_v,