diff --git a/packages/domain/src/domain/ml/ecf.py b/packages/domain/src/domain/ml/ecf.py new file mode 100644 index 00000000..d18010e4 --- /dev/null +++ b/packages/domain/src/domain/ml/ecf.py @@ -0,0 +1,70 @@ +"""SAP10 §20.1 cost reconstruction: predicted total fuel cost + ECF. + +ECF = 0.42 * total_cost / (TFA + 45) (SAP rating relationship) + +Total cost (gbp/yr) = (space_kwh * space_fuel_price + dhw_kwh * dhw_fuel_price + + lighting_kwh * elec_price) / 100 [pence -> pounds] + +Standing charges are deliberately omitted at this slice -- they add a +fuel-mix-conditional offset the tree-based model can learn (ADR-0008, +"+ Lighting" scope). +""" + +from __future__ import annotations + +from math import log10 +from typing import Optional + +from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh + + +# SAP10 deflator applied to total cost before the rating equation (Table 32). +_DEFLATOR: float = 0.42 + +# Electricity standard tariff fuel code (Table 32) — used for lighting. +_ELECTRICITY_STANDARD_CODE: int = 30 + + +def predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh: float, + predicted_hot_water_kwh: float, + predicted_lighting_kwh: float, + main_fuel_code: Optional[int], + water_heating_fuel_code: Optional[int], +) -> float: + """Annual regulated fuel cost (gbp/yr). + + Skips standing charges; sums delivered kWh * unit price across the + three included end uses. Lighting always uses standard electricity. + """ + space_p_per_kwh = fuel_unit_price_p_per_kwh(main_fuel_code) + dhw_p_per_kwh = fuel_unit_price_p_per_kwh(water_heating_fuel_code) + light_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE) + total_pence = ( + predicted_space_heating_kwh * space_p_per_kwh + + predicted_hot_water_kwh * dhw_p_per_kwh + + predicted_lighting_kwh * light_p_per_kwh + ) + return total_pence / 100.0 + + +def predicted_ecf( + predicted_total_cost_gbp: float, + total_floor_area_m2: Optional[float], +) -> float: + """SAP rating Energy Cost Factor: 0.42 * total_cost / (TFA + 45).""" + if total_floor_area_m2 is None or total_floor_area_m2 <= 0: + return 0.0 + return _DEFLATOR * predicted_total_cost_gbp / (total_floor_area_m2 + 45.0) + + +def predicted_log10_ecf(predicted_ecf_value: float) -> float: + """log10(ECF). Returns 0.0 for non-positive input so the feature is + finite for the (rare) all-PV property. + + The SAP rating formula uses log10(ECF) for ECF >= 3.5 (low-SAP region); + in the high-SAP linear region the model can still use log10_ecf as a + monotone proxy for SAP.""" + if predicted_ecf_value <= 0: + return 0.0 + return log10(predicted_ecf_value) diff --git a/packages/domain/src/domain/ml/tests/test_ecf.py b/packages/domain/src/domain/ml/tests/test_ecf.py new file mode 100644 index 00000000..4b69f84d --- /dev/null +++ b/packages/domain/src/domain/ml/tests/test_ecf.py @@ -0,0 +1,93 @@ +"""Tests for the predicted_total_cost / predicted_ecf / predicted_log10_ecf +features (slice 16e, ADR-0008).""" + +from math import log10 + +import pytest + +from domain.ml.ecf import ( + predicted_ecf, + predicted_log10_ecf, + predicted_total_fuel_cost_gbp, +) + + +def test_predicted_total_fuel_cost_gas_heated_returns_expected_gbp() -> None: + # Arrange — 12,000 kWh gas heat, 3,000 kWh gas DHW, 800 kWh lighting. + # Gas (code 1) 3.48 p/kWh, electricity (30) 13.19 p/kWh. + # Expected total: (12000*3.48 + 3000*3.48 + 800*13.19) / 100 = (41760 + 10440 + 10552) / 100 = 627.52 + + # Act + result = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=12000.0, + predicted_hot_water_kwh=3000.0, + predicted_lighting_kwh=800.0, + main_fuel_code=1, + water_heating_fuel_code=1, + ) + + # Assert + assert result == pytest.approx(627.52, abs=0.05) + + +def test_predicted_total_fuel_cost_electric_heated_higher_than_gas() -> None: + # Arrange — same kWh demand on electricity vs gas. + + # Act + gas = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0, + predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1, + ) + elec = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0, + predicted_lighting_kwh=600.0, main_fuel_code=30, water_heating_fuel_code=30, + ) + + # Assert + assert elec > gas * 2.0 + + +def test_predicted_ecf_uses_sap_deflator_and_tfa_plus_45() -> None: + # Arrange — total cost 627.52, TFA 100. + # ECF = 0.42 * 627.52 / (100 + 45) = 263.56 / 145 = 1.817 + + # Act + result = predicted_ecf(predicted_total_cost_gbp=627.52, total_floor_area_m2=100.0) + + # Assert + assert result == pytest.approx(1.817, abs=0.005) + + +def test_predicted_ecf_returns_zero_for_unspecified_floor_area() -> None: + # Arrange / Act / Assert + assert predicted_ecf(predicted_total_cost_gbp=627.52, total_floor_area_m2=None) == 0.0 + + +def test_predicted_log10_ecf_matches_log10_for_positive_input() -> None: + # Arrange / Act / Assert + assert predicted_log10_ecf(1.817) == pytest.approx(log10(1.817), abs=0.0001) + + +def test_predicted_log10_ecf_returns_zero_for_nonpositive_input() -> None: + # Arrange / Act / Assert + assert predicted_log10_ecf(0.0) == 0.0 + assert predicted_log10_ecf(-1.5) == 0.0 + + +def test_predicted_ecf_grows_when_more_expensive_fuel() -> None: + # Arrange — same kWh, different fuel; electricity ECF >> gas ECF. + + # Act + gas_cost = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0, + predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1, + ) + elec_cost = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0, + predicted_lighting_kwh=600.0, main_fuel_code=30, water_heating_fuel_code=30, + ) + gas_ecf = predicted_ecf(gas_cost, total_floor_area_m2=100.0) + elec_ecf = predicted_ecf(elec_cost, total_floor_area_m2=100.0) + + # Assert — higher ECF -> worse SAP, matches intuition for resistive-electric heating. + assert elec_ecf > gas_ecf diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index fdd71655..33d00cbc 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None: # Assert assert isinstance(schema, TransformSchema) - assert schema.transform_version == "0.3.0" + assert schema.transform_version == "0.4.0" assert schema.transform_version == EpcMlTransform.VERSION assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys()) for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items(): diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 6249adc1..83ae3cd7 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -27,6 +27,11 @@ from domain.ml.demand import ( predicted_lighting_kwh, predicted_space_heating_kwh, ) +from domain.ml.ecf import ( + predicted_ecf, + predicted_log10_ecf, + predicted_total_fuel_cost_gbp, +) from domain.ml.envelope import envelope_heat_loss_w_per_k from domain.ml.sap_efficiencies import seasonal_efficiency, water_heating_efficiency from domain.ml.schema import ColumnSpec, TransformSchema @@ -813,6 +818,31 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { "9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008." ), ), + "predicted_total_fuel_cost_gbp": ColumnSpec( + dtype=float, nullable=False, + description=( + "Annual regulated fuel cost (gbp/yr): space + DHW + lighting kWh " + "multiplied by Table 32 unit prices. Standing charges omitted " + "(approximately a constant fuel-mix offset the model can learn). " + "ADR-0008 '+ Lighting' scope." + ), + ), + "predicted_ecf": ColumnSpec( + dtype=float, nullable=False, + description=( + "SAP10 §20.1 Energy Cost Factor: 0.42 * predicted_total_fuel_cost / " + "(TFA + 45). SAP score is a piecewise log/linear function of ECF. " + "ADR-0008." + ), + ), + "predicted_log10_ecf": ColumnSpec( + dtype=float, nullable=False, + description=( + "log10 of predicted_ecf. Monotone with sap_score so a tree-based " + "model can use this as a near-target feature; the SAP rating's " + "piecewise kink at ECF=3.5 is one further split. ADR-0008." + ), + ), } @@ -867,7 +897,7 @@ class EpcMlTransform: Version 0.1.0 — schema contract only; feature columns added in subsequent slices. """ - VERSION: str = "0.3.0" + VERSION: str = "0.4.0" def schema(self) -> TransformSchema: """The cross-repo ML data contract. @@ -945,6 +975,20 @@ class EpcMlTransform: led_count=epc.led_fixed_lighting_bulbs_count, incandescent_count=epc.incandescent_fixed_lighting_bulbs_count, ) + main_fuel_code = heating_aggregates.get("primary_main_fuel_type") + water_fuel_code = heating_aggregates.get("water_heating_fuel") + pred_cost = predicted_total_fuel_cost_gbp( + predicted_space_heating_kwh=pred_space_kwh, + predicted_hot_water_kwh=pred_hw_kwh, + predicted_lighting_kwh=pred_light_kwh, + main_fuel_code=main_fuel_code if isinstance(main_fuel_code, int) else None, + water_heating_fuel_code=water_fuel_code if isinstance(water_fuel_code, int) else None, + ) + pred_ecf_v = predicted_ecf( + predicted_total_cost_gbp=pred_cost, + total_floor_area_m2=epc.total_floor_area_m2, + ) + pred_log10_ecf_v = predicted_log10_ecf(pred_ecf_v) return { # Features — geometry "total_floor_area_m2": epc.total_floor_area_m2, @@ -984,6 +1028,9 @@ class EpcMlTransform: "predicted_space_heating_kwh": pred_space_kwh, "predicted_hot_water_kwh": pred_hw_kwh, "predicted_lighting_kwh": pred_light_kwh, + "predicted_total_fuel_cost_gbp": pred_cost, + "predicted_ecf": pred_ecf_v, + "predicted_log10_ecf": pred_log10_ecf_v, # Features — heating system (primary slot + water + secondary) **heating_aggregates, # Features — PV (capacity source + array aggregates by SAP octant)