diff --git a/packages/domain/src/domain/ml/ecf.py b/packages/domain/src/domain/ml/ecf.py index d18010e4..16fe15ad 100644 --- a/packages/domain/src/domain/ml/ecf.py +++ b/packages/domain/src/domain/ml/ecf.py @@ -13,7 +13,7 @@ fuel-mix-conditional offset the tree-based model can learn (ADR-0008, from __future__ import annotations from math import log10 -from typing import Optional +from typing import Final, Optional from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh @@ -21,9 +21,39 @@ from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh # SAP10 deflator applied to total cost before the rating equation (Table 32). _DEFLATOR: float = 0.42 -# Electricity standard tariff fuel code (Table 32) — used for lighting. +# Electricity standard tariff fuel code (Table 32) — used for lighting + PV credit. _ELECTRICITY_STANDARD_CODE: int = 30 +# Annual PV yield (kWh / kWp / yr) by SAP10.2 region. Derived from Table 6e +# climate data integrated over an average roof (South-facing, 30 deg pitch, +# average overshading). Southern England ~ 900, Scotland ~ 700, Highland ~ 600. +_PV_YIELD_BY_REGION: Final[dict[int, float]] = { + 1: 920, 2: 950, 3: 970, 4: 960, 5: 920, 6: 880, 7: 850, 8: 830, + 9: 820, 10: 820, 11: 830, 12: 880, 13: 850, 14: 770, 15: 740, + 16: 700, 17: 650, 18: 700, 19: 690, 20: 680, 21: 870, 22: 870, +} +_PV_YIELD_UK_AVG: Final[float] = 850.0 + + +def predicted_pv_generation_kwh( + pv_total_peak_power_kw: Optional[float], + region_code: Optional[str], +) -> float: + """Annual PV generation (kWh/yr) for the dwelling's PV array(s). + + Linear in peak power; uses a SAP-region yield factor with South-facing, + 30 deg pitch, average overshading assumptions (SAP10.2 Table 6e). + """ + if pv_total_peak_power_kw is None or pv_total_peak_power_kw <= 0: + return 0.0 + yield_factor = _PV_YIELD_UK_AVG + if region_code is not None: + try: + yield_factor = _PV_YIELD_BY_REGION.get(int(region_code), _PV_YIELD_UK_AVG) + except (TypeError, ValueError): + pass + return pv_total_peak_power_kw * yield_factor + def predicted_total_fuel_cost_gbp( predicted_space_heating_kwh: float, @@ -31,19 +61,27 @@ def predicted_total_fuel_cost_gbp( predicted_lighting_kwh: float, main_fuel_code: Optional[int], water_heating_fuel_code: Optional[int], + predicted_pv_kwh: float = 0.0, ) -> float: """Annual regulated fuel cost (gbp/yr). Skips standing charges; sums delivered kWh * unit price across the - three included end uses. Lighting always uses standard electricity. + three included end uses (lighting always at standard electricity). + + Slice 17a: subtracts predicted_pv_kwh * standard electricity price as + a flat PV credit. SAP10.2 splits PV between self-consumption and + export with separate rates; both are 13.19 p/kWh in Table 32 so a + single rate is fine at this fidelity. """ space_p_per_kwh = fuel_unit_price_p_per_kwh(main_fuel_code) dhw_p_per_kwh = fuel_unit_price_p_per_kwh(water_heating_fuel_code) light_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE) + pv_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE) total_pence = ( predicted_space_heating_kwh * space_p_per_kwh + predicted_hot_water_kwh * dhw_p_per_kwh + predicted_lighting_kwh * light_p_per_kwh + - predicted_pv_kwh * pv_p_per_kwh ) return total_pence / 100.0 diff --git a/packages/domain/src/domain/ml/tests/test_ecf.py b/packages/domain/src/domain/ml/tests/test_ecf.py index 4b69f84d..b429e030 100644 --- a/packages/domain/src/domain/ml/tests/test_ecf.py +++ b/packages/domain/src/domain/ml/tests/test_ecf.py @@ -8,10 +8,75 @@ import pytest from domain.ml.ecf import ( predicted_ecf, predicted_log10_ecf, + predicted_pv_generation_kwh, predicted_total_fuel_cost_gbp, ) +def test_predicted_pv_generation_kwh_scales_linearly_with_peak_power() -> None: + # Arrange — UK average yield ~ 850 kWh/kWp/yr; 4 kWp -> ~3400 kWh. + + # Act + a = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="1") + b = predicted_pv_generation_kwh(pv_total_peak_power_kw=8.0, region_code="1") + + # Assert + assert b == pytest.approx(2.0 * a, abs=0.01) + + +def test_predicted_pv_generation_kwh_returns_zero_for_no_pv() -> None: + # Arrange / Act / Assert + assert predicted_pv_generation_kwh(pv_total_peak_power_kw=0.0, region_code="1") == 0.0 + assert predicted_pv_generation_kwh(pv_total_peak_power_kw=None, region_code="1") == 0.0 + + +def test_predicted_total_fuel_cost_subtracts_pv_credit_at_electricity_price() -> None: + # Arrange — gas heat + DHW + lighting, with 3000 kWh PV generation. + # Base cost: (10000*3.48 + 2500*3.48 + 600*13.19) / 100 = 514.14 + # PV credit: 3000 * 13.19 / 100 = 395.70 + # Net: 514.14 - 395.70 = 118.44 + + # Act + with_pv = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0, + predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1, + predicted_pv_kwh=3000.0, + ) + no_pv = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0, + predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1, + predicted_pv_kwh=0.0, + ) + + # Assert + assert no_pv == pytest.approx(514.14, abs=0.05) + assert with_pv == pytest.approx(118.44, abs=0.05) + + +def test_predicted_total_fuel_cost_pv_kwh_defaults_to_zero_for_backwards_compatibility() -> None: + # Arrange / Act — existing callers pre-17a omit predicted_pv_kwh entirely. + + # Act + result = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0, + predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1, + ) + + # Assert — same as predicted_pv_kwh=0. + assert result == pytest.approx(514.14, abs=0.05) + + +def test_predicted_pv_generation_kwh_scotland_lower_than_southern_england() -> None: + # Arrange — Thames (region 1) vs Highland (region 17); same kWp. + + # Act + thames = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="1") + highland = predicted_pv_generation_kwh(pv_total_peak_power_kw=4.0, region_code="17") + + # Assert + assert highland < thames + + def test_predicted_total_fuel_cost_gas_heated_returns_expected_gbp() -> None: # Arrange — 12,000 kWh gas heat, 3,000 kWh gas DHW, 800 kWh lighting. # Gas (code 1) 3.48 p/kWh, electricity (30) 13.19 p/kWh. diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 267b072e..acd635e7 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None: # Assert assert isinstance(schema, TransformSchema) - assert schema.transform_version == "2.0.0" + assert schema.transform_version == "2.1.0" assert schema.transform_version == EpcMlTransform.VERSION assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys()) for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items(): diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index f164042c..ec08a575 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -30,6 +30,7 @@ from domain.ml.demand import ( from domain.ml.ecf import ( predicted_ecf, predicted_log10_ecf, + predicted_pv_generation_kwh, predicted_total_fuel_cost_gbp, ) from domain.ml.envelope import envelope_heat_loss_w_per_k @@ -812,6 +813,15 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { "9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008." ), ), + "predicted_pv_generation_kwh": ColumnSpec( + dtype=float, nullable=False, + description=( + "Annual PV generation kWh: pv_total_peak_power_kw * yield_factor " + "(SAP10.2 Table 6e region-keyed; UK avg 850 kWh/kWp/yr). " + "Subtracted from predicted_total_fuel_cost at the standard " + "electricity rate per SAP10 §13 (slice 17a)." + ), + ), "predicted_total_fuel_cost_gbp": ColumnSpec( dtype=float, nullable=False, description=( @@ -891,7 +901,7 @@ class EpcMlTransform: Version 0.1.0 — schema contract only; feature columns added in subsequent slices. """ - VERSION: str = "2.0.0" + VERSION: str = "2.1.0" def schema(self) -> TransformSchema: """The cross-repo ML data contract. @@ -971,12 +981,18 @@ class EpcMlTransform: ) main_fuel_code = heating_aggregates.get("primary_main_fuel_type") water_fuel_code = heating_aggregates.get("water_heating_fuel") + pv_kw = pv_aggregates.get("pv_total_peak_power_kw") or 0.0 + pred_pv_kwh = predicted_pv_generation_kwh( + pv_total_peak_power_kw=float(pv_kw), + region_code=epc.region_code, + ) pred_cost = predicted_total_fuel_cost_gbp( predicted_space_heating_kwh=pred_space_kwh, predicted_hot_water_kwh=pred_hw_kwh, predicted_lighting_kwh=pred_light_kwh, main_fuel_code=main_fuel_code if isinstance(main_fuel_code, int) else None, water_heating_fuel_code=water_fuel_code if isinstance(water_fuel_code, int) else None, + predicted_pv_kwh=pred_pv_kwh, ) pred_ecf_v = predicted_ecf( predicted_total_cost_gbp=pred_cost, @@ -1021,6 +1037,7 @@ class EpcMlTransform: "predicted_space_heating_kwh": pred_space_kwh, "predicted_hot_water_kwh": pred_hw_kwh, "predicted_lighting_kwh": pred_light_kwh, + "predicted_pv_generation_kwh": pred_pv_kwh, "predicted_total_fuel_cost_gbp": pred_cost, "predicted_ecf": pred_ecf_v, "predicted_log10_ecf": pred_log10_ecf_v,