slice 17a: PV-export credit in predicted_total_fuel_cost (v2.1.0)

Closes the high-SAP under-prediction gap diagnosed in 16h. 40% of SAP-85+ properties have PV; predicted_ecf was 1.74 mean at that band -> SAP ~88 via the formula, vs label SAP 90+. Inverse: PV homes had HIGHER predicted_ecf than non-PV at the same band because cost reconstruction had zero export credit. New helper: predicted_pv_generation_kwh(kWp, region) -> kWh/yr from a SAP10.2 Table 6e regional yield factor (UK avg 850 kWh/kWp/yr; Highland 650; Thames 920). predicted_total_fuel_cost_gbp now subtracts pv_kwh * standard electricity price (Table 32 code 30, both self-consumption and export at 13.19 p/kWh). New feature column predicted_pv_generation_kwh exposed alongside the adjusted cost so the model sees both signals. VERSION 2.0.0 -> 2.1.0 (MINOR: column added; existing column semantics shifted but pre-deploy so no consumer break).
2026-07-27 23:35:01 +00:00 · 2026-05-17 15:28:09 +00:00 · 2026-05-17 15:28:09 +00:00 · 06ce3205b1
commit 06ce3205b1
parent 6072d8795a
4 changed files with 125 additions and 5 deletions
--- a/packages/domain/src/domain/ml/ecf.py
+++ b/packages/domain/src/domain/ml/ecf.py
@ -13,7 +13,7 @@ fuel-mix-conditional offset the tree-based model can learn (ADR-0008,
 from __future__ import annotations

 from math import log10
-from typing import Optional
+from typing import Final, Optional

 from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh

@ -21,9 +21,39 @@ from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh
 # SAP10 deflator applied to total cost before the rating equation (Table 32).
 _DEFLATOR: float = 0.42

-# Electricity standard tariff fuel code (Table 32) — used for lighting.
+# Electricity standard tariff fuel code (Table 32) — used for lighting + PV credit.
 _ELECTRICITY_STANDARD_CODE: int = 30

+# Annual PV yield (kWh / kWp / yr) by SAP10.2 region.  Derived from Table 6e
+# climate data integrated over an average roof (South-facing, 30 deg pitch,
+# average overshading).  Southern England ~ 900, Scotland ~ 700, Highland ~ 600.
+_PV_YIELD_BY_REGION: Final[dict[int, float]] = {
+    1: 920, 2: 950, 3: 970, 4: 960, 5: 920, 6: 880, 7: 850, 8: 830,
+    9: 820, 10: 820, 11: 830, 12: 880, 13: 850, 14: 770, 15: 740,
+    16: 700, 17: 650, 18: 700, 19: 690, 20: 680, 21: 870, 22: 870,
+}
+_PV_YIELD_UK_AVG: Final[float] = 850.0
+
+
+def predicted_pv_generation_kwh(
+    pv_total_peak_power_kw: Optional[float],
+    region_code: Optional[str],
+) -> float:
+    """Annual PV generation (kWh/yr) for the dwelling's PV array(s).
+
+    Linear in peak power; uses a SAP-region yield factor with South-facing,
+    30 deg pitch, average overshading assumptions (SAP10.2 Table 6e).
+    """
+    if pv_total_peak_power_kw is None or pv_total_peak_power_kw <= 0:
+        return 0.0
+    yield_factor = _PV_YIELD_UK_AVG
+    if region_code is not None:
+        try:
+            yield_factor = _PV_YIELD_BY_REGION.get(int(region_code), _PV_YIELD_UK_AVG)
+        except (TypeError, ValueError):
+            pass
+    return pv_total_peak_power_kw * yield_factor
+

 def predicted_total_fuel_cost_gbp(
    predicted_space_heating_kwh: float,
@ -31,19 +61,27 @@ def predicted_total_fuel_cost_gbp(
    predicted_lighting_kwh: float,
    main_fuel_code: Optional[int],
    water_heating_fuel_code: Optional[int],
+    predicted_pv_kwh: float = 0.0,
 ) -> float:
    """Annual regulated fuel cost (gbp/yr).

    Skips standing charges; sums delivered kWh * unit price across the
-    three included end uses. Lighting always uses standard electricity.
+    three included end uses (lighting always at standard electricity).
+
+    Slice 17a: subtracts predicted_pv_kwh * standard electricity price as
+    a flat PV credit.  SAP10.2 splits PV between self-consumption and
+    export with separate rates; both are 13.19 p/kWh in Table 32 so a
+    single rate is fine at this fidelity.
    """
    space_p_per_kwh = fuel_unit_price_p_per_kwh(main_fuel_code)
    dhw_p_per_kwh = fuel_unit_price_p_per_kwh(water_heating_fuel_code)
    light_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE)
+    pv_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE)
    total_pence = (
        predicted_space_heating_kwh * space_p_per_kwh
        + predicted_hot_water_kwh * dhw_p_per_kwh
        + predicted_lighting_kwh * light_p_per_kwh
+        - predicted_pv_kwh * pv_p_per_kwh
    )
    return total_pence / 100.0

--- a/packages/domain/src/domain/ml/tests/test_ecf.py
+++ b/packages/domain/src/domain/ml/tests/test_ecf.py
@ -8,10 +8,75 @@ import pytest
 from domain.ml.ecf import (
    predicted_ecf,
    predicted_log10_ecf,
+    predicted_pv_generation_kwh,
    predicted_total_fuel_cost_gbp,
 )


+def test_predicted_pv_generation_kwh_scales_linearly_with_peak_power() -> None:
+    # Arrange — UK average yield ~ 850 kWh/kWp/yr; 4 kWp -> ~3400 kWh.
+
+    # Act
+    a = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="1")
+    b = predicted_pv_generation_kwh(pv_total_peak_power_kw=8.0, region_code="1")
+
+    # Assert
+    assert b == pytest.approx(2.0 * a, abs=0.01)
+
+
+def test_predicted_pv_generation_kwh_returns_zero_for_no_pv() -> None:
+    # Arrange / Act / Assert
+    assert predicted_pv_generation_kwh(pv_total_peak_power_kw=0.0, region_code="1") == 0.0
+    assert predicted_pv_generation_kwh(pv_total_peak_power_kw=None, region_code="1") == 0.0
+
+
+def test_predicted_total_fuel_cost_subtracts_pv_credit_at_electricity_price() -> None:
+    # Arrange — gas heat + DHW + lighting, with 3000 kWh PV generation.
+    # Base cost: (10000*3.48 + 2500*3.48 + 600*13.19) / 100 = 514.14
+    # PV credit: 3000 * 13.19 / 100 = 395.70
+    # Net: 514.14 - 395.70 = 118.44
+
+    # Act
+    with_pv = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
+        predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
+        predicted_pv_kwh=3000.0,
+    )
+    no_pv = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
+        predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
+        predicted_pv_kwh=0.0,
+    )
+
+    # Assert
+    assert no_pv == pytest.approx(514.14, abs=0.05)
+    assert with_pv == pytest.approx(118.44, abs=0.05)
+
+
+def test_predicted_total_fuel_cost_pv_kwh_defaults_to_zero_for_backwards_compatibility() -> None:
+    # Arrange / Act — existing callers pre-17a omit predicted_pv_kwh entirely.
+
+    # Act
+    result = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
+        predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
+    )
+
+    # Assert — same as predicted_pv_kwh=0.
+    assert result == pytest.approx(514.14, abs=0.05)
+
+
+def test_predicted_pv_generation_kwh_scotland_lower_than_southern_england() -> None:
+    # Arrange — Thames (region 1) vs Highland (region 17); same kWp.
+
+    # Act
+    thames = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="1")
+    highland = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="17")
+
+    # Assert
+    assert highland < thames
+
+
 def test_predicted_total_fuel_cost_gas_heated_returns_expected_gbp() -> None:
    # Arrange — 12,000 kWh gas heat, 3,000 kWh gas DHW, 800 kWh lighting.
    # Gas (code 1) 3.48 p/kWh, electricity (30) 13.19 p/kWh.
--- a/packages/domain/src/domain/ml/tests/test_transform.py
+++ b/packages/domain/src/domain/ml/tests/test_transform.py
@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:

    # Assert
    assert isinstance(schema, TransformSchema)
-    assert schema.transform_version == "2.0.0"
+    assert schema.transform_version == "2.1.0"
    assert schema.transform_version == EpcMlTransform.VERSION
    assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
    for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():
--- a/packages/domain/src/domain/ml/transform.py
+++ b/packages/domain/src/domain/ml/transform.py
@ -30,6 +30,7 @@ from domain.ml.demand import (
 from domain.ml.ecf import (
    predicted_ecf,
    predicted_log10_ecf,
+    predicted_pv_generation_kwh,
    predicted_total_fuel_cost_gbp,
 )
 from domain.ml.envelope import envelope_heat_loss_w_per_k
@ -812,6 +813,15 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
            "9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008."
        ),
    ),
+    "predicted_pv_generation_kwh": ColumnSpec(
+        dtype=float, nullable=False,
+        description=(
+            "Annual PV generation kWh: pv_total_peak_power_kw * yield_factor "
+            "(SAP10.2 Table 6e region-keyed; UK avg 850 kWh/kWp/yr). "
+            "Subtracted from predicted_total_fuel_cost at the standard "
+            "electricity rate per SAP10 §13 (slice 17a)."
+        ),
+    ),
    "predicted_total_fuel_cost_gbp": ColumnSpec(
        dtype=float, nullable=False,
        description=(
@ -891,7 +901,7 @@ class EpcMlTransform:
    Version 0.1.0 — schema contract only; feature columns added in subsequent slices.
    """

-    VERSION: str = "2.0.0"
+    VERSION: str = "2.1.0"

    def schema(self) -> TransformSchema:
        """The cross-repo ML data contract.
@ -971,12 +981,18 @@ class EpcMlTransform:
        )
        main_fuel_code = heating_aggregates.get("primary_main_fuel_type")
        water_fuel_code = heating_aggregates.get("water_heating_fuel")
+        pv_kw = pv_aggregates.get("pv_total_peak_power_kw") or 0.0
+        pred_pv_kwh = predicted_pv_generation_kwh(
+            pv_total_peak_power_kw=float(pv_kw),
+            region_code=epc.region_code,
+        )
        pred_cost = predicted_total_fuel_cost_gbp(
            predicted_space_heating_kwh=pred_space_kwh,
            predicted_hot_water_kwh=pred_hw_kwh,
            predicted_lighting_kwh=pred_light_kwh,
            main_fuel_code=main_fuel_code if isinstance(main_fuel_code, int) else None,
            water_heating_fuel_code=water_fuel_code if isinstance(water_fuel_code, int) else None,
+            predicted_pv_kwh=pred_pv_kwh,
        )
        pred_ecf_v = predicted_ecf(
            predicted_total_cost_gbp=pred_cost,
@ -1021,6 +1037,7 @@ class EpcMlTransform:
            "predicted_space_heating_kwh": pred_space_kwh,
            "predicted_hot_water_kwh": pred_hw_kwh,
            "predicted_lighting_kwh": pred_light_kwh,
+            "predicted_pv_generation_kwh": pred_pv_kwh,
            "predicted_total_fuel_cost_gbp": pred_cost,
            "predicted_ecf": pred_ecf_v,
            "predicted_log10_ecf": pred_log10_ecf_v,