slice 16e: predicted_total_fuel_cost / predicted_ecf / predicted_log10_ecf

ECF reconstruction per SAP10 §20.1 (Mid physics, ADR-0008):

  total_cost_gbp = (space_kwh*p_space + dhw_kwh*p_dhw + light_kwh*p_elec) / 100
  ECF = 0.42 * total_cost / (TFA + 45)
  log10_ecf = log10(ECF)   [0 for non-positive]

p_* are Table 32 unit prices via fuel_unit_price_p_per_kwh. Standing
charges deliberately omitted (constant fuel-mix offset; ADR-0008).

predicted_sap_score is NOT emitted as a feature (ADR-0008 Mid not Deep):
the model is left to learn the piecewise log/linear transform from
log10_ecf -> SAP itself, keeping the data layer SAP-version-agnostic.

VERSION 0.3.0 -> 0.4.0 (MINOR).
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 12:00:06 +00:00
parent eee5421112
commit cda469dd7d
4 changed files with 212 additions and 2 deletions

View file

@ -0,0 +1,70 @@
"""SAP10 §20.1 cost reconstruction: predicted total fuel cost + ECF.
ECF = 0.42 * total_cost / (TFA + 45) (SAP rating relationship)
Total cost (gbp/yr) = (space_kwh * space_fuel_price + dhw_kwh * dhw_fuel_price
+ lighting_kwh * elec_price) / 100 [pence -> pounds]
Standing charges are deliberately omitted at this slice -- they add a
fuel-mix-conditional offset the tree-based model can learn (ADR-0008,
"+ Lighting" scope).
"""
from __future__ import annotations
from math import log10
from typing import Optional
from domain.ml.sap_efficiencies import fuel_unit_price_p_per_kwh
# SAP10 deflator applied to total cost before the rating equation (Table 32).
_DEFLATOR: float = 0.42
# Electricity standard tariff fuel code (Table 32) — used for lighting.
_ELECTRICITY_STANDARD_CODE: int = 30
def predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh: float,
predicted_hot_water_kwh: float,
predicted_lighting_kwh: float,
main_fuel_code: Optional[int],
water_heating_fuel_code: Optional[int],
) -> float:
"""Annual regulated fuel cost (gbp/yr).
Skips standing charges; sums delivered kWh * unit price across the
three included end uses. Lighting always uses standard electricity.
"""
space_p_per_kwh = fuel_unit_price_p_per_kwh(main_fuel_code)
dhw_p_per_kwh = fuel_unit_price_p_per_kwh(water_heating_fuel_code)
light_p_per_kwh = fuel_unit_price_p_per_kwh(_ELECTRICITY_STANDARD_CODE)
total_pence = (
predicted_space_heating_kwh * space_p_per_kwh
+ predicted_hot_water_kwh * dhw_p_per_kwh
+ predicted_lighting_kwh * light_p_per_kwh
)
return total_pence / 100.0
def predicted_ecf(
predicted_total_cost_gbp: float,
total_floor_area_m2: Optional[float],
) -> float:
"""SAP rating Energy Cost Factor: 0.42 * total_cost / (TFA + 45)."""
if total_floor_area_m2 is None or total_floor_area_m2 <= 0:
return 0.0
return _DEFLATOR * predicted_total_cost_gbp / (total_floor_area_m2 + 45.0)
def predicted_log10_ecf(predicted_ecf_value: float) -> float:
"""log10(ECF). Returns 0.0 for non-positive input so the feature is
finite for the (rare) all-PV property.
The SAP rating formula uses log10(ECF) for ECF >= 3.5 (low-SAP region);
in the high-SAP linear region the model can still use log10_ecf as a
monotone proxy for SAP."""
if predicted_ecf_value <= 0:
return 0.0
return log10(predicted_ecf_value)

View file

@ -0,0 +1,93 @@
"""Tests for the predicted_total_cost / predicted_ecf / predicted_log10_ecf
features (slice 16e, ADR-0008)."""
from math import log10
import pytest
from domain.ml.ecf import (
predicted_ecf,
predicted_log10_ecf,
predicted_total_fuel_cost_gbp,
)
def test_predicted_total_fuel_cost_gas_heated_returns_expected_gbp() -> None:
# Arrange — 12,000 kWh gas heat, 3,000 kWh gas DHW, 800 kWh lighting.
# Gas (code 1) 3.48 p/kWh, electricity (30) 13.19 p/kWh.
# Expected total: (12000*3.48 + 3000*3.48 + 800*13.19) / 100 = (41760 + 10440 + 10552) / 100 = 627.52
# Act
result = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=12000.0,
predicted_hot_water_kwh=3000.0,
predicted_lighting_kwh=800.0,
main_fuel_code=1,
water_heating_fuel_code=1,
)
# Assert
assert result == pytest.approx(627.52, abs=0.05)
def test_predicted_total_fuel_cost_electric_heated_higher_than_gas() -> None:
# Arrange — same kWh demand on electricity vs gas.
# Act
gas = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
)
elec = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
predicted_lighting_kwh=600.0, main_fuel_code=30, water_heating_fuel_code=30,
)
# Assert
assert elec > gas * 2.0
def test_predicted_ecf_uses_sap_deflator_and_tfa_plus_45() -> None:
# Arrange — total cost 627.52, TFA 100.
# ECF = 0.42 * 627.52 / (100 + 45) = 263.56 / 145 = 1.817
# Act
result = predicted_ecf(predicted_total_cost_gbp=627.52, total_floor_area_m2=100.0)
# Assert
assert result == pytest.approx(1.817, abs=0.005)
def test_predicted_ecf_returns_zero_for_unspecified_floor_area() -> None:
# Arrange / Act / Assert
assert predicted_ecf(predicted_total_cost_gbp=627.52, total_floor_area_m2=None) == 0.0
def test_predicted_log10_ecf_matches_log10_for_positive_input() -> None:
# Arrange / Act / Assert
assert predicted_log10_ecf(1.817) == pytest.approx(log10(1.817), abs=0.0001)
def test_predicted_log10_ecf_returns_zero_for_nonpositive_input() -> None:
# Arrange / Act / Assert
assert predicted_log10_ecf(0.0) == 0.0
assert predicted_log10_ecf(-1.5) == 0.0
def test_predicted_ecf_grows_when_more_expensive_fuel() -> None:
# Arrange — same kWh, different fuel; electricity ECF >> gas ECF.
# Act
gas_cost = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
predicted_lighting_kwh=600.0, main_fuel_code=1, water_heating_fuel_code=1,
)
elec_cost = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=10000.0, predicted_hot_water_kwh=2500.0,
predicted_lighting_kwh=600.0, main_fuel_code=30, water_heating_fuel_code=30,
)
gas_ecf = predicted_ecf(gas_cost, total_floor_area_m2=100.0)
elec_ecf = predicted_ecf(elec_cost, total_floor_area_m2=100.0)
# Assert — higher ECF -> worse SAP, matches intuition for resistive-electric heating.
assert elec_ecf > gas_ecf

View file

@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:
# Assert
assert isinstance(schema, TransformSchema)
assert schema.transform_version == "0.3.0"
assert schema.transform_version == "0.4.0"
assert schema.transform_version == EpcMlTransform.VERSION
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():

View file

@ -27,6 +27,11 @@ from domain.ml.demand import (
predicted_lighting_kwh,
predicted_space_heating_kwh,
)
from domain.ml.ecf import (
predicted_ecf,
predicted_log10_ecf,
predicted_total_fuel_cost_gbp,
)
from domain.ml.envelope import envelope_heat_loss_w_per_k
from domain.ml.sap_efficiencies import seasonal_efficiency, water_heating_efficiency
from domain.ml.schema import ColumnSpec, TransformSchema
@ -813,6 +818,31 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
"9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008."
),
),
"predicted_total_fuel_cost_gbp": ColumnSpec(
dtype=float, nullable=False,
description=(
"Annual regulated fuel cost (gbp/yr): space + DHW + lighting kWh "
"multiplied by Table 32 unit prices. Standing charges omitted "
"(approximately a constant fuel-mix offset the model can learn). "
"ADR-0008 '+ Lighting' scope."
),
),
"predicted_ecf": ColumnSpec(
dtype=float, nullable=False,
description=(
"SAP10 §20.1 Energy Cost Factor: 0.42 * predicted_total_fuel_cost / "
"(TFA + 45). SAP score is a piecewise log/linear function of ECF. "
"ADR-0008."
),
),
"predicted_log10_ecf": ColumnSpec(
dtype=float, nullable=False,
description=(
"log10 of predicted_ecf. Monotone with sap_score so a tree-based "
"model can use this as a near-target feature; the SAP rating's "
"piecewise kink at ECF=3.5 is one further split. ADR-0008."
),
),
}
@ -867,7 +897,7 @@ class EpcMlTransform:
Version 0.1.0 schema contract only; feature columns added in subsequent slices.
"""
VERSION: str = "0.3.0"
VERSION: str = "0.4.0"
def schema(self) -> TransformSchema:
"""The cross-repo ML data contract.
@ -945,6 +975,20 @@ class EpcMlTransform:
led_count=epc.led_fixed_lighting_bulbs_count,
incandescent_count=epc.incandescent_fixed_lighting_bulbs_count,
)
main_fuel_code = heating_aggregates.get("primary_main_fuel_type")
water_fuel_code = heating_aggregates.get("water_heating_fuel")
pred_cost = predicted_total_fuel_cost_gbp(
predicted_space_heating_kwh=pred_space_kwh,
predicted_hot_water_kwh=pred_hw_kwh,
predicted_lighting_kwh=pred_light_kwh,
main_fuel_code=main_fuel_code if isinstance(main_fuel_code, int) else None,
water_heating_fuel_code=water_fuel_code if isinstance(water_fuel_code, int) else None,
)
pred_ecf_v = predicted_ecf(
predicted_total_cost_gbp=pred_cost,
total_floor_area_m2=epc.total_floor_area_m2,
)
pred_log10_ecf_v = predicted_log10_ecf(pred_ecf_v)
return {
# Features — geometry
"total_floor_area_m2": epc.total_floor_area_m2,
@ -984,6 +1028,9 @@ class EpcMlTransform:
"predicted_space_heating_kwh": pred_space_kwh,
"predicted_hot_water_kwh": pred_hw_kwh,
"predicted_lighting_kwh": pred_light_kwh,
"predicted_total_fuel_cost_gbp": pred_cost,
"predicted_ecf": pred_ecf_v,
"predicted_log10_ecf": pred_log10_ecf_v,
# Features — heating system (primary slot + water + secondary)
**heating_aggregates,
# Features — PV (capacity source + array aggregates by SAP octant)