slice 16d: predicted_space/hot_water/lighting_kwh + seasonal-efficiency features

New module domain.ml.demand emits crude annual demand approximations
(ADR-0008 "crude annual"):

  predicted_space_heating_kwh = HLC * HDH_region * 1e-3 / efficiency_main
  predicted_hot_water_kwh     = SAP10.2 J simplified (Vd, dT, +10% losses)
  predicted_lighting_kwh      = 9.3 * TFA reduced by LED/CFL share

HDH lookup covers SAP10.2's 22 regions; fallback UK avg = 53,000 K*h/yr.

Plus two seasonal-efficiency features straight off the Table 4a/4b lookup
from slice 16b (seasonal_efficiency_main_heating /
seasonal_efficiency_water_heating).

Wired into to_row; VERSION 0.2.0 -> 0.3.0 (MINOR).
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 11:57:29 +00:00
parent fca8815991
commit eee5421112
4 changed files with 311 additions and 3 deletions

View file

@ -0,0 +1,138 @@
"""Crude annual heat/hot-water/lighting demand approximations.
Used by `transform.py` to populate the `predicted_*_kwh` features (slice 16d).
These are deliberately coarse: they give the model a physics-shaped starting
point that it can adjust against the cert's real kWh labels. See ADR-0008
for the "crude annual" rationale.
Formulas:
- predicted_space_heating_kwh
~= envelope_heat_loss_w_per_k * HDH_region / efficiency_main_heating
where HDH_region is heating degree hours per year by SAP region.
- predicted_hot_water_kwh (SAP10.2 Appendix J simplified)
V_d ~= 25 * N_occupants + 36 (litres / day)
Q_HW_useful ~= 4.18 * V_d * (55 - 12) * 365 * 1e-3 (kWh / yr)
N_occupants defaulted from total_floor_area_m2 per SAP J Table 1b.
- predicted_lighting_kwh (SAP10.2 Section L simplified)
base ~= 9.3 * TFA * (1 - 0.5 * led_share - 0.4 * cfl_share)
"""
from __future__ import annotations
from math import exp
from typing import Final, Optional
# SAP10.2 Table 6 / U6 heating degree hours per year by SAP region (K * h).
# Coarse grouping: most regions cluster ~50-60k; using region_code if known.
_HDH_BY_REGION: Final[dict[int, float]] = {
1: 51000, # Thames
2: 52000, # SE England
3: 50000, # Southern
4: 51000, # SW England
5: 53000, # Severn (5E + 5W treated together)
6: 54000, # Midlands
7: 55000, # W Pennines / Lancashire
8: 55000, # NW England / SW Scotland
9: 56000, # Borders / North Tyne
10: 56000, # NE England
11: 56000, # E Pennines / Yorkshire
12: 54000, # E Anglia
13: 56000, # Wales (Mid)
14: 58000, # W Scotland
15: 59000, # E Scotland
16: 60000, # NE Scotland
17: 62000, # Highland
18: 60000, # Western Isles
19: 60000, # Orkney
20: 62000, # Shetland
21: 54000, # Northern Ireland
22: 53000, # Isle of Man
}
_HDH_UK_AVG: Final[float] = 53000.0
def _hdh_for_region(region_code: Optional[str]) -> float:
if region_code is None:
return _HDH_UK_AVG
try:
code = int(region_code)
except (TypeError, ValueError):
return _HDH_UK_AVG
return _HDH_BY_REGION.get(code, _HDH_UK_AVG)
def predicted_space_heating_kwh(
envelope_heat_loss_w_per_k: float,
region_code: Optional[str],
seasonal_efficiency_main: float,
) -> float:
"""Annual delivered space-heating kWh.
delivered_kWh = HLC * HDH_region * 1e-3 / efficiency
where HLC is W/K, HDH is K*hours/year (so the product is Wh, /1000 = kWh,
/efficiency converts useful demand to delivered fuel).
"""
if envelope_heat_loss_w_per_k <= 0 or seasonal_efficiency_main <= 0:
return 0.0
hdh = _hdh_for_region(region_code)
useful_kwh = envelope_heat_loss_w_per_k * hdh * 1e-3
return useful_kwh / seasonal_efficiency_main
def _default_occupants_sap_j(total_floor_area_m2: float) -> float:
"""SAP10.2 Appendix J Table 1b default occupancy.
N = 1 + 1.76 * (1 - exp(-0.000349 * (TFA - 13.9)^2)) + 0.0013 * (TFA - 13.9)
for TFA > 13.9 m^2; otherwise N = 1.
"""
if total_floor_area_m2 <= 13.9:
return 1.0
x = total_floor_area_m2 - 13.9
return 1.0 + 1.76 * (1.0 - exp(-0.000349 * x * x)) + 0.0013 * x
def predicted_hot_water_kwh(
total_floor_area_m2: Optional[float],
seasonal_efficiency_water: float,
) -> float:
"""Annual delivered hot-water kWh (SAP10.2 Appendix J simplified).
Uses default occupancy from TFA, daily volume 25*N+36 litres, delta-T
55 - 12 = 43 K, no FGHRS / WWHRS adjustment.
"""
if total_floor_area_m2 is None or total_floor_area_m2 <= 0:
return 0.0
if seasonal_efficiency_water <= 0:
return 0.0
n = _default_occupants_sap_j(total_floor_area_m2)
vd_litres = 25.0 * n + 36.0
# 4.18 kJ/(kg K) * litres/day * delta-K * days = kJ/yr; /3600 -> kWh/yr.
useful_kwh = 4.18 * vd_litres * (55.0 - 12.0) * 365.0 / 3600.0
# Add ~10% distribution + storage losses (SAP10.2 §L Table 3a typical).
useful_with_losses = useful_kwh * 1.10
return useful_with_losses / seasonal_efficiency_water
def predicted_lighting_kwh(
total_floor_area_m2: Optional[float],
cfl_count: int,
led_count: int,
incandescent_count: int,
) -> float:
"""Annual lighting kWh (SAP10.2 Section L simplified).
Base demand ~ 9.3 * TFA kWh/yr; reduced by low-energy bulb share. LED
bulbs cut consumption by ~50%, CFL by ~40%, incandescent by 0%.
"""
if total_floor_area_m2 is None or total_floor_area_m2 <= 0:
return 0.0
total_bulbs = max(1, cfl_count + led_count + incandescent_count)
led_share = led_count / total_bulbs
cfl_share = cfl_count / total_bulbs
reduction = 0.5 * led_share + 0.4 * cfl_share
return 9.3 * total_floor_area_m2 * (1.0 - reduction)

View file

@ -0,0 +1,98 @@
"""Tests for crude annual demand approximations (slice 16d)."""
import pytest
from domain.ml.demand import (
predicted_hot_water_kwh,
predicted_lighting_kwh,
predicted_space_heating_kwh,
)
def test_predicted_space_heating_scales_with_envelope_w_per_k() -> None:
# Arrange — same region, same efficiency, double the HLC -> double the kWh.
# Act
low = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=100.0, region_code="1", seasonal_efficiency_main=0.84)
high = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="1", seasonal_efficiency_main=0.84)
# Assert
assert high == pytest.approx(2.0 * low, abs=0.01)
def test_predicted_space_heating_returns_zero_when_efficiency_zero() -> None:
# Arrange / Act / Assert
assert predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="1", seasonal_efficiency_main=0.0) == 0.0
def test_predicted_space_heating_falls_back_to_uk_average_when_region_unknown() -> None:
# Arrange — region None should still produce a finite positive kWh.
# Act
result = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code=None, seasonal_efficiency_main=0.84)
# Assert
assert result > 0.0
def test_predicted_space_heating_scotland_higher_than_thames() -> None:
# Arrange — same HLC, same efficiency; Scotland's HDH > Thames's.
# Act
thames = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="1", seasonal_efficiency_main=0.84)
scotland = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="14", seasonal_efficiency_main=0.84)
# Assert
assert scotland > thames
def test_predicted_hot_water_scales_with_floor_area() -> None:
# Arrange — same efficiency, larger TFA -> more occupants -> more kWh.
# Act
small = predicted_hot_water_kwh(total_floor_area_m2=50.0, seasonal_efficiency_water=0.84)
large = predicted_hot_water_kwh(total_floor_area_m2=150.0, seasonal_efficiency_water=0.84)
# Assert
assert large > small
def test_predicted_hot_water_returns_zero_for_unspecified_floor_area() -> None:
# Arrange / Act / Assert
assert predicted_hot_water_kwh(total_floor_area_m2=None, seasonal_efficiency_water=0.84) == 0.0
def test_predicted_hot_water_typical_uk_home_falls_in_sensible_range() -> None:
# Arrange — 80 m^2 home, gas-combi efficiency.
# Act
result = predicted_hot_water_kwh(total_floor_area_m2=80.0, seasonal_efficiency_water=0.84)
# Assert — typical UK home DHW is 2000-3500 kWh/yr.
assert 1500.0 < result < 4500.0
def test_predicted_lighting_drops_with_led_bulbs() -> None:
# Arrange — same TFA, all-incandescent vs all-LED.
# Act
incandescent = predicted_lighting_kwh(total_floor_area_m2=100.0, cfl_count=0, led_count=0, incandescent_count=10)
all_led = predicted_lighting_kwh(total_floor_area_m2=100.0, cfl_count=0, led_count=10, incandescent_count=0)
# Assert
assert all_led < incandescent
def test_predicted_lighting_returns_zero_for_unspecified_floor_area() -> None:
# Arrange / Act / Assert
assert predicted_lighting_kwh(total_floor_area_m2=None, cfl_count=0, led_count=0, incandescent_count=10) == 0.0
def test_predicted_lighting_with_no_bulb_data_uses_base_demand() -> None:
# Arrange — TFA known but no bulb counts (all zero -> treat as full incandescent).
# Act
result = predicted_lighting_kwh(total_floor_area_m2=100.0, cfl_count=0, led_count=0, incandescent_count=0)
# Assert — base demand 9.3 * 100 = 930 kWh.
assert result == pytest.approx(930.0, abs=10.0)

View file

@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None:
# Assert
assert isinstance(schema, TransformSchema)
assert schema.transform_version == "0.2.0"
assert schema.transform_version == "0.3.0"
assert schema.transform_version == EpcMlTransform.VERSION
assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys())
for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items():

View file

@ -22,7 +22,13 @@ from datatypes.epc.domain.epc_property_data import (
SapHeating,
SapWindow,
)
from domain.ml.demand import (
predicted_hot_water_kwh,
predicted_lighting_kwh,
predicted_space_heating_kwh,
)
from domain.ml.envelope import envelope_heat_loss_w_per_k
from domain.ml.sap_efficiencies import seasonal_efficiency, water_heating_efficiency
from domain.ml.schema import ColumnSpec, TransformSchema
from domain.ml.ucl import apply_ucl_correction
@ -768,6 +774,45 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
"conduction loss in W/K."
),
),
"seasonal_efficiency_main_heating": ColumnSpec(
dtype=float, nullable=False,
description=(
"Space-heating seasonal efficiency as a decimal (e.g. 0.84 = 84%), "
"from SAP10.2 Table 4a/4b keyed on primary_sap_main_heating_code. "
"Unknown codes fall back to 0.80 (gas-boiler typical). ADR-0008."
),
),
"seasonal_efficiency_water_heating": ColumnSpec(
dtype=float, nullable=False,
description=(
"Water-heating seasonal efficiency as a decimal. Code 901 ('from main') "
"inherits the main code's efficiency; unknown -> 0.78 (gas-combi). "
"ADR-0008."
),
),
"predicted_space_heating_kwh": ColumnSpec(
dtype=float, nullable=False,
description=(
"Crude annual delivered space-heating kWh: envelope_heat_loss_w_per_k * "
"HDH_region * 1e-3 / seasonal_efficiency_main_heating. HDH from a 22-row "
"SAP-region lookup; UK average ~53,000 K*h/yr. ADR-0008."
),
),
"predicted_hot_water_kwh": ColumnSpec(
dtype=float, nullable=False,
description=(
"Crude annual delivered hot-water kWh from SAP10.2 Appendix J simplified: "
"occupancy from TFA, daily volume 25*N+36 L, delta-T 43 K, +10% losses, "
"divided by water-heating efficiency. ADR-0008."
),
),
"predicted_lighting_kwh": ColumnSpec(
dtype=float, nullable=False,
description=(
"Crude annual lighting kWh from SAP10.2 Section L simplified: "
"9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008."
),
),
}
@ -822,7 +867,7 @@ class EpcMlTransform:
Version 0.1.0 schema contract only; feature columns added in subsequent slices.
"""
VERSION: str = "0.2.0"
VERSION: str = "0.3.0"
def schema(self) -> TransformSchema:
"""The cross-repo ML data contract.
@ -878,6 +923,28 @@ class EpcMlTransform:
insulated_door_count=epc.insulated_door_count,
insulated_door_u_value=epc.insulated_door_u_value,
)
main_heating_code = heating_aggregates.get("primary_sap_main_heating_code")
water_code = heating_aggregates.get("water_heating_code")
space_eff = seasonal_efficiency(main_heating_code if isinstance(main_heating_code, int) else None)
water_eff = water_heating_efficiency(
water_heating_code=water_code if isinstance(water_code, int) else None,
main_heating_code=main_heating_code if isinstance(main_heating_code, int) else None,
)
pred_space_kwh = predicted_space_heating_kwh(
envelope_heat_loss_w_per_k=envelope_w_per_k,
region_code=epc.region_code,
seasonal_efficiency_main=space_eff,
)
pred_hw_kwh = predicted_hot_water_kwh(
total_floor_area_m2=epc.total_floor_area_m2,
seasonal_efficiency_water=water_eff,
)
pred_light_kwh = predicted_lighting_kwh(
total_floor_area_m2=epc.total_floor_area_m2,
cfl_count=epc.cfl_fixed_lighting_bulbs_count,
led_count=epc.led_fixed_lighting_bulbs_count,
incandescent_count=epc.incandescent_fixed_lighting_bulbs_count,
)
return {
# Features — geometry
"total_floor_area_m2": epc.total_floor_area_m2,
@ -910,8 +977,13 @@ class EpcMlTransform:
**window_aggregates,
# Features — building parts aggregates + Main Dwelling carve-out
**building_part_aggregates,
# Features — engineered physics: envelope heat-loss W/K (ADR-0008)
# Features — engineered physics (ADR-0008)
"envelope_heat_loss_w_per_k": envelope_w_per_k,
"seasonal_efficiency_main_heating": space_eff,
"seasonal_efficiency_water_heating": water_eff,
"predicted_space_heating_kwh": pred_space_kwh,
"predicted_hot_water_kwh": pred_hw_kwh,
"predicted_lighting_kwh": pred_light_kwh,
# Features — heating system (primary slot + water + secondary)
**heating_aggregates,
# Features — PV (capacity source + array aggregates by SAP octant)