slice 17b: SAP Appendix J port for predicted_hot_water_kwh (v2.2.0)

The 17a-baseline residuals showed cylinder_insulation_thickness_mm,
cylinder_size and cylinder_insulation_type at ranks 3/6/9 for hot_water_kwh
because the crude 16d formula didn't use them -- the model had to learn
storage physics from raw features.

Now predicted_hot_water_kwh sums:
  useful_demand   (existing, unchanged)
+ distribution_loss     = useful * 0.15
+ storage_loss          = volume * insulation_factor * 365 * 0.6
                          (volume from cylinder_size, factor from
                           cylinder_insulation_thickness_mm or age-default)
+ primary_circuit_loss  = 245 (age A-J) / 60 (age K-M)
- wwhrs_credit          = useful * 0.12  if number_baths_wwhrs > 0
- solar_hw_credit       = 250            if solar_water_heating
all / efficiency_water  = delivered kWh

Same inputs we already extract; just plumbed through. Expected:
predicted_hot_water_kwh feature usage jumps from rank 10 to top tier,
hot_water_kwh MAPE drops from 7.17%, and predicted_ecf gets tighter for
gas-heat + electric-DHW mid-band homes -> SAP MAPE marginally better.

5 new AAA tests; VERSION 2.1.0 -> 2.2.0 (MINOR; column semantics enriched).
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 15:54:42 +00:00
parent 06ce3205b1
commit 4df1ee78b7
4 changed files with 209 additions and 9 deletions

View file

@ -99,11 +99,25 @@ def _default_occupants_sap_j(total_floor_area_m2: float) -> float:
def predicted_hot_water_kwh(
total_floor_area_m2: Optional[float],
seasonal_efficiency_water: float,
*,
cylinder_size: Optional[int] = None,
cylinder_insulation_thickness_mm: Optional[int] = None,
cylinder_insulation_type: Optional[int] = None,
age_band: Optional[str] = None,
has_wwhrs: bool = False,
has_solar_water_heating: bool = False,
) -> float:
"""Annual delivered hot-water kWh (SAP10.2 Appendix J simplified).
"""Annual delivered hot-water kWh per SAP10.2 Appendix J (slice 17b).
Uses default occupancy from TFA, daily volume 25*N+36 litres, delta-T
55 - 12 = 43 K, no FGHRS / WWHRS adjustment.
Components (all kWh useful, sum then divided by efficiency for delivered):
useful_demand = 4.18 * Vd * 43 * 365 / 3600 (Vd in litres/day)
distribution_loss = useful_demand * 0.15
storage_loss = volume * insulation_factor * 365 * 0.6
primary_loss(age) = 245 (A-J) or 60 (K-M)
wwhrs_credit = useful_demand * 0.12 if has_wwhrs
solar_hw_credit = 250 if has_solar_water_heating
Defaults follow RdSAP10 §11 / Table 29 for missing cylinder fields.
"""
if total_floor_area_m2 is None or total_floor_area_m2 <= 0:
return 0.0
@ -111,11 +125,94 @@ def predicted_hot_water_kwh(
return 0.0
n = _default_occupants_sap_j(total_floor_area_m2)
vd_litres = 25.0 * n + 36.0
# 4.18 kJ/(kg K) * litres/day * delta-K * days = kJ/yr; /3600 -> kWh/yr.
useful_kwh = 4.18 * vd_litres * (55.0 - 12.0) * 365.0 / 3600.0
# Add ~10% distribution + storage losses (SAP10.2 §L Table 3a typical).
useful_with_losses = useful_kwh * 1.10
return useful_with_losses / seasonal_efficiency_water
distribution_loss = useful_kwh * 0.15
storage_loss = _cylinder_storage_loss_kwh(
cylinder_size=cylinder_size,
cylinder_insulation_thickness_mm=cylinder_insulation_thickness_mm,
cylinder_insulation_type=cylinder_insulation_type,
age_band=age_band,
)
primary_loss = _primary_circuit_loss_kwh(age_band)
wwhrs_credit = useful_kwh * 0.12 if has_wwhrs else 0.0
solar_credit = 250.0 if has_solar_water_heating else 0.0
total_useful = max(
0.0,
useful_kwh + distribution_loss + storage_loss + primary_loss - wwhrs_credit - solar_credit,
)
return total_useful / seasonal_efficiency_water
# SAP10.2 cylinder volume by RdSAP10 size code (Table 28).
_CYLINDER_VOLUME_L: Final[dict[int, float]] = {1: 110.0, 2: 160.0, 3: 210.0}
# SAP10.2 Table 2 storage loss factor (kWh / litre / day) by insulation
# thickness in mm. Lower number = better insulation.
_STORAGE_LOSS_FACTOR: Final[dict[int, float]] = {
0: 0.0203, # uninsulated -> high loss
12: 0.0152, # 12 mm jacket
25: 0.0078, # 25 mm foam
38: 0.0056,
50: 0.0043,
80: 0.0025,
100: 0.0022,
150: 0.0014,
200: 0.0011,
}
# RdSAP10 Table 29 cylinder-insulation default by age band when unknown:
# A-F -> 12 mm jacket, G-H -> 25 mm foam, I-M -> 38 mm foam.
_AGE_TO_DEFAULT_CYLINDER_INS_MM: Final[dict[str, int]] = {
"A": 12, "B": 12, "C": 12, "D": 12, "E": 12, "F": 12,
"G": 25, "H": 25,
"I": 38, "J": 38, "K": 38, "L": 38, "M": 38,
}
def _cylinder_storage_loss_kwh(
cylinder_size: Optional[int],
cylinder_insulation_thickness_mm: Optional[int],
cylinder_insulation_type: Optional[int],
age_band: Optional[str],
) -> float:
"""Annual cylinder storage loss (kWh useful, before efficiency division).
Returns 0 when no cylinder is described AND age_band is unknown (assume
instantaneous / combi without storage). Heated-space modifier 0.6.
"""
if cylinder_size is None and age_band is None:
return 0.0
volume = _CYLINDER_VOLUME_L.get(cylinder_size or 1, 110.0)
thickness = cylinder_insulation_thickness_mm
if thickness is None and age_band is not None:
thickness = _AGE_TO_DEFAULT_CYLINDER_INS_MM.get(age_band.upper())
if thickness is None:
thickness = 38
factor = _nearest_storage_loss_factor(thickness)
heated_space_modifier = 0.6
return volume * factor * 365.0 * heated_space_modifier
def _nearest_storage_loss_factor(thickness_mm: int) -> float:
"""Pick the SAP10.2 Table 2 row with thickness closest <= the supplied
value. For thicknesses below 12 mm, uses the uninsulated 0-row."""
candidates = sorted(_STORAGE_LOSS_FACTOR.keys())
chosen = candidates[0]
for t in candidates:
if t <= thickness_mm:
chosen = t
return _STORAGE_LOSS_FACTOR[chosen]
def _primary_circuit_loss_kwh(age_band: Optional[str]) -> float:
"""Annual primary-pipework loss (kWh useful) by age band.
RdSAP10 Table 29: pre-2007 (A-J) no primary insulation -> 245 kWh/yr;
K, L, M -> full insulation -> 60 kWh/yr. Unknown -> 245.
"""
if age_band is None:
return 245.0
return 60.0 if age_band.upper() in ("K", "L", "M") else 245.0
def predicted_lighting_kwh(

View file

@ -62,6 +62,99 @@ def test_predicted_hot_water_returns_zero_for_unspecified_floor_area() -> None:
assert predicted_hot_water_kwh(total_floor_area_m2=None, seasonal_efficiency_water=0.84) == 0.0
def test_predicted_hot_water_kwh_adds_storage_loss_when_cylinder_described() -> None:
# Arrange — SAP10.2 Appendix J / Table 2: cylinder storage loss adds to
# the delivered DHW load. For a 110L cylinder with 38 mm foam (typical
# post-1992) the loss factor is 0.0056 kWh/L/day; annual loss in heated
# space = 110 * 0.0056 * 365 * 0.6 = 135 kWh useful -> delivered loss
# /efficiency. Same home without cylinder description gets the simple
# formula (no storage term).
# Act
with_cylinder = predicted_hot_water_kwh(
total_floor_area_m2=80.0,
seasonal_efficiency_water=0.84,
cylinder_size=1,
cylinder_insulation_thickness_mm=38,
cylinder_insulation_type=2, # foam
)
without_cylinder = predicted_hot_water_kwh(
total_floor_area_m2=80.0,
seasonal_efficiency_water=0.84,
)
# Assert
assert with_cylinder > without_cylinder
# storage_loss = 110 * 0.0056 * 365 * 0.6 / 0.84 ≈ 161 kWh delivered.
assert (with_cylinder - without_cylinder) == pytest.approx(161.0, abs=15.0)
def test_predicted_hot_water_kwh_lower_storage_loss_for_thicker_insulation() -> None:
# Arrange — same cylinder size, 12mm jacket vs 100mm foam.
# Act
jacket = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84,
cylinder_size=1, cylinder_insulation_thickness_mm=12, cylinder_insulation_type=1,
)
foam_100mm = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84,
cylinder_size=1, cylinder_insulation_thickness_mm=100, cylinder_insulation_type=2,
)
# Assert
assert jacket > foam_100mm
def test_predicted_hot_water_kwh_drops_with_wwhrs() -> None:
# Arrange — WWHRS recovers ~15% of bath energy.
# Act
no_wwhrs = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84, has_wwhrs=False,
)
with_wwhrs = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84, has_wwhrs=True,
)
# Assert
assert with_wwhrs < no_wwhrs
def test_predicted_hot_water_kwh_drops_with_solar_water_heating() -> None:
# Arrange — solar HW saves ~250 kWh/yr (SAP10.2 Appendix G simplified).
# Act
no_solar = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84, has_solar_water_heating=False,
)
with_solar = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84, has_solar_water_heating=True,
)
# Assert
assert with_solar < no_solar
def test_predicted_hot_water_kwh_uses_age_band_default_when_insulation_unspecified() -> None:
# Arrange — RdSAP10 Table 29: A-F -> 12mm jacket; G-H -> 25mm foam; I-M -> 38mm foam.
# Age G cylinder with no explicit insulation should default to 25mm foam,
# giving a lower loss than age A (12mm jacket).
# Act
age_a = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84,
cylinder_size=1, age_band="A",
)
age_g = predicted_hot_water_kwh(
total_floor_area_m2=80.0, seasonal_efficiency_water=0.84,
cylinder_size=1, age_band="G",
)
# Assert
assert age_g < age_a
def test_predicted_hot_water_typical_uk_home_falls_in_sensible_range() -> None:
# Arrange — 80 m^2 home, gas-combi efficiency.

View file

@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:
# Assert
assert isinstance(schema, TransformSchema)
assert schema.transform_version == "2.1.0"
assert schema.transform_version == "2.2.0"
assert schema.transform_version == EpcMlTransform.VERSION
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():

View file

@ -901,7 +901,7 @@ class EpcMlTransform:
Version 0.1.0 schema contract only; feature columns added in subsequent slices.
"""
VERSION: str = "2.1.0"
VERSION: str = "2.2.0"
def schema(self) -> TransformSchema:
"""The cross-repo ML data contract.
@ -969,9 +969,19 @@ class EpcMlTransform:
region_code=epc.region_code,
seasonal_efficiency_main=space_eff,
)
cylinder_size_val = heating_aggregates.get("cylinder_size")
cylinder_ins_thk = heating_aggregates.get("cylinder_insulation_thickness_mm")
cylinder_ins_type = heating_aggregates.get("cylinder_insulation_type")
main_age = building_part_aggregates.get("main_dwelling_construction_age_band")
pred_hw_kwh = predicted_hot_water_kwh(
total_floor_area_m2=epc.total_floor_area_m2,
seasonal_efficiency_water=water_eff,
cylinder_size=cylinder_size_val if isinstance(cylinder_size_val, int) else None,
cylinder_insulation_thickness_mm=cylinder_ins_thk if isinstance(cylinder_ins_thk, int) else None,
cylinder_insulation_type=cylinder_ins_type if isinstance(cylinder_ins_type, int) else None,
age_band=main_age if isinstance(main_age, str) else None,
has_wwhrs=bool(epc.sap_heating.number_baths_wwhrs and epc.sap_heating.number_baths_wwhrs > 0),
has_solar_water_heating=epc.solar_water_heating,
)
pred_light_kwh = predicted_lighting_kwh(
total_floor_area_m2=epc.total_floor_area_m2,