slice 16e: predicted_total_fuel_cost / predicted_ecf / predicted_log10_ecf

ECF reconstruction per SAP10 §20.1 (Mid physics, ADR-0008): total_cost_gbp = (space_kwh*p_space + dhw_kwh*p_dhw + light_kwh*p_elec) / 100 ECF = 0.42 * total_cost / (TFA + 45) log10_ecf = log10(ECF) [0 for non-positive] p_* are Table 32 unit prices via fuel_unit_price_p_per_kwh. Standing charges deliberately omitted (constant fuel-mix offset; ADR-0008). predicted_sap_score is NOT emitted as a feature (ADR-0008 Mid not Deep): the model is left to learn the piecewise log/linear transform from log10_ecf -> SAP itself, keeping the data layer SAP-version-agnostic. VERSION 0.3.0 -> 0.4.0 (MINOR).
2026-07-27 23:35:01 +00:00 · 2026-05-17 12:00:06 +00:00 · 2026-05-17 12:00:06 +00:00 · cda469dd7d
commit cda469dd7d
parent eee5421112
4 changed files with 212 additions and 2 deletions
--- a/packages/domain/src/domain/ml/ecf.py
+++ b/packages/domain/src/domain/ml/ecf.py
@ -0,0 +1,70 @@
+"""SAP10 §20.1 cost reconstruction: predicted total fuel cost + ECF.
+
+ECF = 0.42 * total_cost / (TFA + 45)   (SAP rating relationship)
+
+Total cost (gbp/yr) = (space_kwh * space_fuel_price + dhw_kwh * dhw_fuel_price
+                       + lighting_kwh * elec_price) / 100   [pence -> pounds]
+
+Standing charges are deliberately omitted at this slice -- they add a
+fuel-mix-conditional offset the tree-based model can learn (ADR-0008,
+"+ Lighting" scope).
+"""
+
+from __future__ import annotations
+
+from math import log10
+from typing import Optional
+
+from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh
+
+
+# SAP10 deflator applied to total cost before the rating equation (Table 32).
+_DEFLATOR: float = 0.42
+
+# Electricity standard tariff fuel code (Table 32) — used for lighting.
+_ELECTRICITY_STANDARD_CODE: int = 30
+
+
+def predicted_total_fuel_cost_gbp(
+    predicted_space_heating_kwh: float,
+    predicted_hot_water_kwh: float,
+    predicted_lighting_kwh: float,
+    main_fuel_code: Optional[int],
+    water_heating_fuel_code: Optional[int],
+) -> float:
+    """Annual regulated fuel cost (gbp/yr).
+
+    Skips standing charges; sums delivered kWh * unit price across the
+    three included end uses. Lighting always uses standard electricity.
+    """
+    space_p_per_kwh = fuel_unit_price_p_per_kwh(main_fuel_code)
+    dhw_p_per_kwh = fuel_unit_price_p_per_kwh(water_heating_fuel_code)
+    light_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE)
+    total_pence = (
+        predicted_space_heating_kwh * space_p_per_kwh
+        + predicted_hot_water_kwh * dhw_p_per_kwh
+        + predicted_lighting_kwh * light_p_per_kwh
+    )
+    return total_pence / 100.0
+
+
+def predicted_ecf(
+    predicted_total_cost_gbp: float,
+    total_floor_area_m2: Optional[float],
+) -> float:
+    """SAP rating Energy Cost Factor: 0.42 * total_cost / (TFA + 45)."""
+    if total_floor_area_m2 is None or total_floor_area_m2 <= 0:
+        return 0.0
+    return _DEFLATOR * predicted_total_cost_gbp / (total_floor_area_m2 + 45.0)
+
+
+def predicted_log10_ecf(predicted_ecf_value: float) -> float:
+    """log10(ECF). Returns 0.0 for non-positive input so the feature is
+    finite for the (rare) all-PV property.
+
+    The SAP rating formula uses log10(ECF) for ECF >= 3.5 (low-SAP region);
+    in the high-SAP linear region the model can still use log10_ecf as a
+    monotone proxy for SAP."""
+    if predicted_ecf_value <= 0:
+        return 0.0
+    return log10(predicted_ecf_value)
--- a/packages/domain/src/domain/ml/tests/test_ecf.py
+++ b/packages/domain/src/domain/ml/tests/test_ecf.py
@ -0,0 +1,93 @@
+"""Tests for the predicted_total_cost / predicted_ecf / predicted_log10_ecf
+features (slice 16e, ADR-0008)."""
+
+from math import log10
+
+import pytest
+
+from domain.ml.ecf import (
+    predicted_ecf,
+    predicted_log10_ecf,
+    predicted_total_fuel_cost_gbp,
+)
+
+
+def test_predicted_total_fuel_cost_gas_heated_returns_expected_gbp() -> None:
+    # Arrange — 12,000 kWh gas heat, 3,000 kWh gas DHW, 800 kWh lighting.
+    # Gas (code 1) 3.48 p/kWh, electricity (30) 13.19 p/kWh.
+    # Expected total: (12000*3.48 + 3000*3.48 + 800*13.19) / 100 = (41760 + 10440 + 10552) / 100 = 627.52
+
+    # Act
+    result = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=12000.0,
+        predicted_hot_water_kwh=3000.0,
+        predicted_lighting_kwh=800.0,
+        main_fuel_code=1,
+        water_heating_fuel_code=1,
+    )
+
+    # Assert
+    assert result == pytest.approx(627.52, abs=0.05)
+
+
+def test_predicted_total_fuel_cost_electric_heated_higher_than_gas() -> None:
+    # Arrange — same kWh demand on electricity vs gas.
+
+    # Act
+    gas = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
+        predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
+    )
+    elec = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
+        predicted_lighting_kwh=600.0, main_fuel_code=30, water_heating_fuel_code=30,
+    )
+
+    # Assert
+    assert elec > gas * 2.0
+
+
+def test_predicted_ecf_uses_sap_deflator_and_tfa_plus_45() -> None:
+    # Arrange — total cost 627.52, TFA 100.
+    # ECF = 0.42 * 627.52 / (100 + 45) = 263.56 / 145 = 1.817
+
+    # Act
+    result = predicted_ecf(predicted_total_cost_gbp=627.52, total_floor_area_m2=100.0)
+
+    # Assert
+    assert result == pytest.approx(1.817, abs=0.005)
+
+
+def test_predicted_ecf_returns_zero_for_unspecified_floor_area() -> None:
+    # Arrange / Act / Assert
+    assert predicted_ecf(predicted_total_cost_gbp=627.52, total_floor_area_m2=None) == 0.0
+
+
+def test_predicted_log10_ecf_matches_log10_for_positive_input() -> None:
+    # Arrange / Act / Assert
+    assert predicted_log10_ecf(1.817) == pytest.approx(log10(1.817), abs=0.0001)
+
+
+def test_predicted_log10_ecf_returns_zero_for_nonpositive_input() -> None:
+    # Arrange / Act / Assert
+    assert predicted_log10_ecf(0.0) == 0.0
+    assert predicted_log10_ecf(-1.5) == 0.0
+
+
+def test_predicted_ecf_grows_when_more_expensive_fuel() -> None:
+    # Arrange — same kWh, different fuel; electricity ECF >> gas ECF.
+
+    # Act
+    gas_cost = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
+        predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
+    )
+    elec_cost = predicted_total_fuel_cost_gbp(
+        predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
+        predicted_lighting_kwh=600.0, main_fuel_code=30, water_heating_fuel_code=30,
+    )
+    gas_ecf = predicted_ecf(gas_cost, total_floor_area_m2=100.0)
+    elec_ecf = predicted_ecf(elec_cost, total_floor_area_m2=100.0)
+
+    # Assert — higher ECF -> worse SAP, matches intuition for resistive-electric heating.
+    assert elec_ecf > gas_ecf
--- a/packages/domain/src/domain/ml/tests/test_transform.py
+++ b/packages/domain/src/domain/ml/tests/test_transform.py
@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:

    # Assert
    assert isinstance(schema, TransformSchema)
-    assert schema.transform_version == "0.3.0"
+    assert schema.transform_version == "0.4.0"
    assert schema.transform_version == EpcMlTransform.VERSION
    assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
    for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():
--- a/packages/domain/src/domain/ml/transform.py
+++ b/packages/domain/src/domain/ml/transform.py
@ -27,6 +27,11 @@ from domain.ml.demand import (
    predicted_lighting_kwh,
    predicted_space_heating_kwh,
 )
+from domain.ml.ecf import (
+    predicted_ecf,
+    predicted_log10_ecf,
+    predicted_total_fuel_cost_gbp,
+)
 from domain.ml.envelope import envelope_heat_loss_w_per_k
 from domain.ml.sap_efficiencies import seasonal_efficiency, water_heating_efficiency
 from domain.ml.schema import ColumnSpec, TransformSchema
@ -813,6 +818,31 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
            "9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008."
        ),
    ),
+    "predicted_total_fuel_cost_gbp": ColumnSpec(
+        dtype=float, nullable=False,
+        description=(
+            "Annual regulated fuel cost (gbp/yr): space + DHW + lighting kWh "
+            "multiplied by Table 32 unit prices. Standing charges omitted "
+            "(approximately a constant fuel-mix offset the model can learn). "
+            "ADR-0008 '+ Lighting' scope."
+        ),
+    ),
+    "predicted_ecf": ColumnSpec(
+        dtype=float, nullable=False,
+        description=(
+            "SAP10 §20.1 Energy Cost Factor: 0.42 * predicted_total_fuel_cost / "
+            "(TFA + 45). SAP score is a piecewise log/linear function of ECF. "
+            "ADR-0008."
+        ),
+    ),
+    "predicted_log10_ecf": ColumnSpec(
+        dtype=float, nullable=False,
+        description=(
+            "log10 of predicted_ecf. Monotone with sap_score so a tree-based "
+            "model can use this as a near-target feature; the SAP rating's "
+            "piecewise kink at ECF=3.5 is one further split. ADR-0008."
+        ),
+    ),
 }


@ -867,7 +897,7 @@ class EpcMlTransform:
    Version 0.1.0 — schema contract only; feature columns added in subsequent slices.
    """

-    VERSION: str = "0.3.0"
+    VERSION: str = "0.4.0"

    def schema(self) -> TransformSchema:
        """The cross-repo ML data contract.
@ -945,6 +975,20 @@ class EpcMlTransform:
            led_count=epc.led_fixed_lighting_bulbs_count,
            incandescent_count=epc.incandescent_fixed_lighting_bulbs_count,
        )
+        main_fuel_code = heating_aggregates.get("primary_main_fuel_type")
+        water_fuel_code = heating_aggregates.get("water_heating_fuel")
+        pred_cost = predicted_total_fuel_cost_gbp(
+            predicted_space_heating_kwh=pred_space_kwh,
+            predicted_hot_water_kwh=pred_hw_kwh,
+            predicted_lighting_kwh=pred_light_kwh,
+            main_fuel_code=main_fuel_code if isinstance(main_fuel_code, int) else None,
+            water_heating_fuel_code=water_fuel_code if isinstance(water_fuel_code, int) else None,
+        )
+        pred_ecf_v = predicted_ecf(
+            predicted_total_cost_gbp=pred_cost,
+            total_floor_area_m2=epc.total_floor_area_m2,
+        )
+        pred_log10_ecf_v = predicted_log10_ecf(pred_ecf_v)
        return {
            # Features — geometry
            "total_floor_area_m2": epc.total_floor_area_m2,
@ -984,6 +1028,9 @@ class EpcMlTransform:
            "predicted_space_heating_kwh": pred_space_kwh,
            "predicted_hot_water_kwh": pred_hw_kwh,
            "predicted_lighting_kwh": pred_light_kwh,
+            "predicted_total_fuel_cost_gbp": pred_cost,
+            "predicted_ecf": pred_ecf_v,
+            "predicted_log10_ecf": pred_log10_ecf_v,
            # Features — heating system (primary slot + water + secondary)
            **heating_aggregates,
            # Features — PV (capacity source + array aggregates by SAP octant)