From eee5421112c29198c1190320b1e76aa1e4ea72d8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 May 2026 11:57:29 +0000 Subject: [PATCH] slice 16d: predicted_space/hot_water/lighting_kwh + seasonal-efficiency features New module domain.ml.demand emits crude annual demand approximations (ADR-0008 "crude annual"): predicted_space_heating_kwh = HLC * HDH_region * 1e-3 / efficiency_main predicted_hot_water_kwh = SAP10.2 J simplified (Vd, dT, +10% losses) predicted_lighting_kwh = 9.3 * TFA reduced by LED/CFL share HDH lookup covers SAP10.2's 22 regions; fallback UK avg = 53,000 K*h/yr. Plus two seasonal-efficiency features straight off the Table 4a/4b lookup from slice 16b (seasonal_efficiency_main_heating / seasonal_efficiency_water_heating). Wired into to_row; VERSION 0.2.0 -> 0.3.0 (MINOR). --- packages/domain/src/domain/ml/demand.py | 138 ++++++++++++++++++ .../domain/src/domain/ml/tests/test_demand.py | 98 +++++++++++++ .../src/domain/ml/tests/test_transform.py | 2 +- packages/domain/src/domain/ml/transform.py | 76 +++++++++- 4 files changed, 311 insertions(+), 3 deletions(-) create mode 100644 packages/domain/src/domain/ml/demand.py create mode 100644 packages/domain/src/domain/ml/tests/test_demand.py diff --git a/packages/domain/src/domain/ml/demand.py b/packages/domain/src/domain/ml/demand.py new file mode 100644 index 00000000..7a4810ba --- /dev/null +++ b/packages/domain/src/domain/ml/demand.py @@ -0,0 +1,138 @@ +"""Crude annual heat/hot-water/lighting demand approximations. + +Used by `transform.py` to populate the `predicted_*_kwh` features (slice 16d). +These are deliberately coarse: they give the model a physics-shaped starting +point that it can adjust against the cert's real kWh labels. See ADR-0008 +for the "crude annual" rationale. + +Formulas: + +- predicted_space_heating_kwh + ~= envelope_heat_loss_w_per_k * HDH_region / efficiency_main_heating + where HDH_region is heating degree hours per year by SAP region. + +- predicted_hot_water_kwh (SAP10.2 Appendix J simplified) + V_d ~= 25 * N_occupants + 36 (litres / day) + Q_HW_useful ~= 4.18 * V_d * (55 - 12) * 365 * 1e-3 (kWh / yr) + N_occupants defaulted from total_floor_area_m2 per SAP J Table 1b. + +- predicted_lighting_kwh (SAP10.2 Section L simplified) + base ~= 9.3 * TFA * (1 - 0.5 * led_share - 0.4 * cfl_share) +""" + +from __future__ import annotations + +from math import exp +from typing import Final, Optional + + +# SAP10.2 Table 6 / U6 heating degree hours per year by SAP region (K * h). +# Coarse grouping: most regions cluster ~50-60k; using region_code if known. +_HDH_BY_REGION: Final[dict[int, float]] = { + 1: 51000, # Thames + 2: 52000, # SE England + 3: 50000, # Southern + 4: 51000, # SW England + 5: 53000, # Severn (5E + 5W treated together) + 6: 54000, # Midlands + 7: 55000, # W Pennines / Lancashire + 8: 55000, # NW England / SW Scotland + 9: 56000, # Borders / North Tyne + 10: 56000, # NE England + 11: 56000, # E Pennines / Yorkshire + 12: 54000, # E Anglia + 13: 56000, # Wales (Mid) + 14: 58000, # W Scotland + 15: 59000, # E Scotland + 16: 60000, # NE Scotland + 17: 62000, # Highland + 18: 60000, # Western Isles + 19: 60000, # Orkney + 20: 62000, # Shetland + 21: 54000, # Northern Ireland + 22: 53000, # Isle of Man +} + +_HDH_UK_AVG: Final[float] = 53000.0 + + +def _hdh_for_region(region_code: Optional[str]) -> float: + if region_code is None: + return _HDH_UK_AVG + try: + code = int(region_code) + except (TypeError, ValueError): + return _HDH_UK_AVG + return _HDH_BY_REGION.get(code, _HDH_UK_AVG) + + +def predicted_space_heating_kwh( + envelope_heat_loss_w_per_k: float, + region_code: Optional[str], + seasonal_efficiency_main: float, +) -> float: + """Annual delivered space-heating kWh. + + delivered_kWh = HLC * HDH_region * 1e-3 / efficiency + where HLC is W/K, HDH is K*hours/year (so the product is Wh, /1000 = kWh, + /efficiency converts useful demand to delivered fuel). + """ + if envelope_heat_loss_w_per_k <= 0 or seasonal_efficiency_main <= 0: + return 0.0 + hdh = _hdh_for_region(region_code) + useful_kwh = envelope_heat_loss_w_per_k * hdh * 1e-3 + return useful_kwh / seasonal_efficiency_main + + +def _default_occupants_sap_j(total_floor_area_m2: float) -> float: + """SAP10.2 Appendix J Table 1b default occupancy. + + N = 1 + 1.76 * (1 - exp(-0.000349 * (TFA - 13.9)^2)) + 0.0013 * (TFA - 13.9) + for TFA > 13.9 m^2; otherwise N = 1. + """ + if total_floor_area_m2 <= 13.9: + return 1.0 + x = total_floor_area_m2 - 13.9 + return 1.0 + 1.76 * (1.0 - exp(-0.000349 * x * x)) + 0.0013 * x + + +def predicted_hot_water_kwh( + total_floor_area_m2: Optional[float], + seasonal_efficiency_water: float, +) -> float: + """Annual delivered hot-water kWh (SAP10.2 Appendix J simplified). + + Uses default occupancy from TFA, daily volume 25*N+36 litres, delta-T + 55 - 12 = 43 K, no FGHRS / WWHRS adjustment. + """ + if total_floor_area_m2 is None or total_floor_area_m2 <= 0: + return 0.0 + if seasonal_efficiency_water <= 0: + return 0.0 + n = _default_occupants_sap_j(total_floor_area_m2) + vd_litres = 25.0 * n + 36.0 + # 4.18 kJ/(kg K) * litres/day * delta-K * days = kJ/yr; /3600 -> kWh/yr. + useful_kwh = 4.18 * vd_litres * (55.0 - 12.0) * 365.0 / 3600.0 + # Add ~10% distribution + storage losses (SAP10.2 §L Table 3a typical). + useful_with_losses = useful_kwh * 1.10 + return useful_with_losses / seasonal_efficiency_water + + +def predicted_lighting_kwh( + total_floor_area_m2: Optional[float], + cfl_count: int, + led_count: int, + incandescent_count: int, +) -> float: + """Annual lighting kWh (SAP10.2 Section L simplified). + + Base demand ~ 9.3 * TFA kWh/yr; reduced by low-energy bulb share. LED + bulbs cut consumption by ~50%, CFL by ~40%, incandescent by 0%. + """ + if total_floor_area_m2 is None or total_floor_area_m2 <= 0: + return 0.0 + total_bulbs = max(1, cfl_count + led_count + incandescent_count) + led_share = led_count / total_bulbs + cfl_share = cfl_count / total_bulbs + reduction = 0.5 * led_share + 0.4 * cfl_share + return 9.3 * total_floor_area_m2 * (1.0 - reduction) diff --git a/packages/domain/src/domain/ml/tests/test_demand.py b/packages/domain/src/domain/ml/tests/test_demand.py new file mode 100644 index 00000000..10df0804 --- /dev/null +++ b/packages/domain/src/domain/ml/tests/test_demand.py @@ -0,0 +1,98 @@ +"""Tests for crude annual demand approximations (slice 16d).""" + +import pytest + +from domain.ml.demand import ( + predicted_hot_water_kwh, + predicted_lighting_kwh, + predicted_space_heating_kwh, +) + + +def test_predicted_space_heating_scales_with_envelope_w_per_k() -> None: + # Arrange — same region, same efficiency, double the HLC -> double the kWh. + + # Act + low = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=100.0, region_code="1", seasonal_efficiency_main=0.84) + high = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="1", seasonal_efficiency_main=0.84) + + # Assert + assert high == pytest.approx(2.0 * low, abs=0.01) + + +def test_predicted_space_heating_returns_zero_when_efficiency_zero() -> None: + # Arrange / Act / Assert + assert predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="1", seasonal_efficiency_main=0.0) == 0.0 + + +def test_predicted_space_heating_falls_back_to_uk_average_when_region_unknown() -> None: + # Arrange — region None should still produce a finite positive kWh. + + # Act + result = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code=None, seasonal_efficiency_main=0.84) + + # Assert + assert result > 0.0 + + +def test_predicted_space_heating_scotland_higher_than_thames() -> None: + # Arrange — same HLC, same efficiency; Scotland's HDH > Thames's. + + # Act + thames = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="1", seasonal_efficiency_main=0.84) + scotland = predicted_space_heating_kwh(envelope_heat_loss_w_per_k=200.0, region_code="14", seasonal_efficiency_main=0.84) + + # Assert + assert scotland > thames + + +def test_predicted_hot_water_scales_with_floor_area() -> None: + # Arrange — same efficiency, larger TFA -> more occupants -> more kWh. + + # Act + small = predicted_hot_water_kwh(total_floor_area_m2=50.0, seasonal_efficiency_water=0.84) + large = predicted_hot_water_kwh(total_floor_area_m2=150.0, seasonal_efficiency_water=0.84) + + # Assert + assert large > small + + +def test_predicted_hot_water_returns_zero_for_unspecified_floor_area() -> None: + # Arrange / Act / Assert + assert predicted_hot_water_kwh(total_floor_area_m2=None, seasonal_efficiency_water=0.84) == 0.0 + + +def test_predicted_hot_water_typical_uk_home_falls_in_sensible_range() -> None: + # Arrange — 80 m^2 home, gas-combi efficiency. + + # Act + result = predicted_hot_water_kwh(total_floor_area_m2=80.0, seasonal_efficiency_water=0.84) + + # Assert — typical UK home DHW is 2000-3500 kWh/yr. + assert 1500.0 < result < 4500.0 + + +def test_predicted_lighting_drops_with_led_bulbs() -> None: + # Arrange — same TFA, all-incandescent vs all-LED. + + # Act + incandescent = predicted_lighting_kwh(total_floor_area_m2=100.0, cfl_count=0, led_count=0, incandescent_count=10) + all_led = predicted_lighting_kwh(total_floor_area_m2=100.0, cfl_count=0, led_count=10, incandescent_count=0) + + # Assert + assert all_led < incandescent + + +def test_predicted_lighting_returns_zero_for_unspecified_floor_area() -> None: + # Arrange / Act / Assert + assert predicted_lighting_kwh(total_floor_area_m2=None, cfl_count=0, led_count=0, incandescent_count=10) == 0.0 + + +def test_predicted_lighting_with_no_bulb_data_uses_base_demand() -> None: + # Arrange — TFA known but no bulb counts (all zero -> treat as full incandescent). + + # Act + result = predicted_lighting_kwh(total_floor_area_m2=100.0, cfl_count=0, led_count=0, incandescent_count=0) + + # Assert — base demand 9.3 * 100 = 930 kWh. + assert result == pytest.approx(930.0, abs=10.0) diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index 5fad2da7..fdd71655 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -36,7 +36,7 @@ def test_transform_advertises_version_and_target_columns() -> None: # Assert assert isinstance(schema, TransformSchema) - assert schema.transform_version == "0.2.0" + assert schema.transform_version == "0.3.0" assert schema.transform_version == EpcMlTransform.VERSION assert set(schema.target_columns.keys()) == set(_EXPECTED_TARGET_DTYPES.keys()) for target_name, expected_dtype in _EXPECTED_TARGET_DTYPES.items(): diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 85c46227..6249adc1 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -22,7 +22,13 @@ from datatypes.epc.domain.epc_property_data import ( SapHeating, SapWindow, ) +from domain.ml.demand import ( + predicted_hot_water_kwh, + predicted_lighting_kwh, + predicted_space_heating_kwh, +) from domain.ml.envelope import envelope_heat_loss_w_per_k +from domain.ml.sap_efficiencies import seasonal_efficiency, water_heating_efficiency from domain.ml.schema import ColumnSpec, TransformSchema from domain.ml.ucl import apply_ucl_correction @@ -768,6 +774,45 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { "conduction loss in W/K." ), ), + "seasonal_efficiency_main_heating": ColumnSpec( + dtype=float, nullable=False, + description=( + "Space-heating seasonal efficiency as a decimal (e.g. 0.84 = 84%), " + "from SAP10.2 Table 4a/4b keyed on primary_sap_main_heating_code. " + "Unknown codes fall back to 0.80 (gas-boiler typical). ADR-0008." + ), + ), + "seasonal_efficiency_water_heating": ColumnSpec( + dtype=float, nullable=False, + description=( + "Water-heating seasonal efficiency as a decimal. Code 901 ('from main') " + "inherits the main code's efficiency; unknown -> 0.78 (gas-combi). " + "ADR-0008." + ), + ), + "predicted_space_heating_kwh": ColumnSpec( + dtype=float, nullable=False, + description=( + "Crude annual delivered space-heating kWh: envelope_heat_loss_w_per_k * " + "HDH_region * 1e-3 / seasonal_efficiency_main_heating. HDH from a 22-row " + "SAP-region lookup; UK average ~53,000 K*h/yr. ADR-0008." + ), + ), + "predicted_hot_water_kwh": ColumnSpec( + dtype=float, nullable=False, + description=( + "Crude annual delivered hot-water kWh from SAP10.2 Appendix J simplified: " + "occupancy from TFA, daily volume 25*N+36 L, delta-T 43 K, +10% losses, " + "divided by water-heating efficiency. ADR-0008." + ), + ), + "predicted_lighting_kwh": ColumnSpec( + dtype=float, nullable=False, + description=( + "Crude annual lighting kWh from SAP10.2 Section L simplified: " + "9.3 * TFA reduced by 50% LED share + 40% CFL share. ADR-0008." + ), + ), } @@ -822,7 +867,7 @@ class EpcMlTransform: Version 0.1.0 — schema contract only; feature columns added in subsequent slices. """ - VERSION: str = "0.2.0" + VERSION: str = "0.3.0" def schema(self) -> TransformSchema: """The cross-repo ML data contract. @@ -878,6 +923,28 @@ class EpcMlTransform: insulated_door_count=epc.insulated_door_count, insulated_door_u_value=epc.insulated_door_u_value, ) + main_heating_code = heating_aggregates.get("primary_sap_main_heating_code") + water_code = heating_aggregates.get("water_heating_code") + space_eff = seasonal_efficiency(main_heating_code if isinstance(main_heating_code, int) else None) + water_eff = water_heating_efficiency( + water_heating_code=water_code if isinstance(water_code, int) else None, + main_heating_code=main_heating_code if isinstance(main_heating_code, int) else None, + ) + pred_space_kwh = predicted_space_heating_kwh( + envelope_heat_loss_w_per_k=envelope_w_per_k, + region_code=epc.region_code, + seasonal_efficiency_main=space_eff, + ) + pred_hw_kwh = predicted_hot_water_kwh( + total_floor_area_m2=epc.total_floor_area_m2, + seasonal_efficiency_water=water_eff, + ) + pred_light_kwh = predicted_lighting_kwh( + total_floor_area_m2=epc.total_floor_area_m2, + cfl_count=epc.cfl_fixed_lighting_bulbs_count, + led_count=epc.led_fixed_lighting_bulbs_count, + incandescent_count=epc.incandescent_fixed_lighting_bulbs_count, + ) return { # Features — geometry "total_floor_area_m2": epc.total_floor_area_m2, @@ -910,8 +977,13 @@ class EpcMlTransform: **window_aggregates, # Features — building parts aggregates + Main Dwelling carve-out **building_part_aggregates, - # Features — engineered physics: envelope heat-loss W/K (ADR-0008) + # Features — engineered physics (ADR-0008) "envelope_heat_loss_w_per_k": envelope_w_per_k, + "seasonal_efficiency_main_heating": space_eff, + "seasonal_efficiency_water_heating": water_eff, + "predicted_space_heating_kwh": pred_space_kwh, + "predicted_hot_water_kwh": pred_hw_kwh, + "predicted_lighting_kwh": pred_light_kwh, # Features — heating system (primary slot + water + secondary) **heating_aggregates, # Features — PV (capacity source + array aggregates by SAP octant)