slice 15b: +18 features — heating type code, hot water, windows, flat, supply

Heating: primary_sap_main_heating_code (the SAP10 heating-system enum was the
single biggest missing input), primary_emitter_temperature,
primary_main_heating_fraction.

Hot water: immersion_heating_type, shower_outlet_count.

Windows: window_pct_living, window_pct_external, window_pct_permanent_shutters
(area-weighted shares parallel to existing window aggregates).

Dwelling: conservatory_type, has_heated_separate_conservatory.

Flat-only block (sap_flat_details): flat_level, flat_top_storey,
flat_storey_count, flat_location, flat_heat_loss_corridor (int sentinels
like '20+' coerce to None for the categorical features).

Energy supply: meter_type, pv_connection, wind_turbines_terrain_type.

Also plumbs `air_tightness` EnergyElement, `sap_flat_details` and
`has_heated_separate_conservatory` through the 21.0.1 mapper path (they were
silently None before).

Results at N=25,000 2026 RdSAP certs:
  sap_score          MAPE=0.044  sMAPE=0.038  R^2=0.884  (+0.045 R^2 vs 15a)
  co2_emissions      sMAPE=0.108  R^2=0.925
  peui_raw           MAPE=0.092  sMAPE=0.088  R^2=0.849
  peui_ucl           MAPE=0.081  sMAPE=0.078  R^2=0.860
  space_heating_kwh  MAPE=0.111  sMAPE=0.108  R^2=0.945
  hot_water_kwh      MAPE=0.081  sMAPE=0.079  R^2=0.772

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-17 00:08:11 +00:00
parent 0ffda529ec
commit 9f6f7608b9
4 changed files with 183 additions and 0 deletions

View file

@ -360,6 +360,10 @@ class EpcPropertyData:
main_heating_controls: Optional[EnergyElement] = (
None # site notes has heating_and_hot_water.main_heating.controls: str - doesn't map to EnergyElement
)
# Air-tightness EnergyElement (description + ratings) — kept as input even though
# ratings are derived, because the `.description` text categorizes the building's
# permeability class when no pressure test was carried out.
air_tightness: Optional[EnergyElement] = None
current_energy_efficiency_band: Optional[Epc] = None # not available in site notes?
environmental_impact_current: Optional[int] = None
heating_cost_current: Optional[float] = None

View file

@ -16,6 +16,7 @@ from datatypes.epc.domain.epc_property_data import (
SapAlternativeWall,
SapBuildingPart,
SapEnergySource,
SapFlatDetails,
SapFloorDimension,
SapHeating,
SapRoomInRoof,
@ -1313,6 +1314,11 @@ class EpcPropertyDataMapper:
has_fixed_air_conditioning=schema.has_fixed_air_conditioning == "true",
conservatory_type=schema.conservatory_type,
has_conservatory=schema.conservatory_type != 1,
has_heated_separate_conservatory=(
schema.has_heated_separate_conservatory == "true"
if schema.has_heated_separate_conservatory is not None
else None
),
# Counts
door_count=schema.door_count,
habitable_rooms_count=schema.habitable_room_count,
@ -1529,6 +1535,35 @@ class EpcPropertyDataMapper:
# Dwelling-level inputs used as ML features.
multiple_glazed_proportion=schema.multiple_glazed_proportion,
extract_fans_count=schema.extract_fans_count,
# Air-tightness EnergyElement (description carries permeability class).
air_tightness=(
EpcPropertyDataMapper._map_energy_element(schema.air_tightness)
if schema.air_tightness is not None
else None
),
# Main-heating-controls EnergyElement (first control system if multiple).
main_heating_controls=(
EpcPropertyDataMapper._map_energy_element(schema.main_heating_controls[0])
if schema.main_heating_controls
else None
),
# Flat-only nested block: present in ~33% of certs.
sap_flat_details=(
SapFlatDetails(
level=schema.sap_flat_details.level,
top_storey=schema.sap_flat_details.top_storey,
flat_location=schema.sap_flat_details.flat_location,
heat_loss_corridor=schema.sap_flat_details.heat_loss_corridor,
storey_count=schema.sap_flat_details.storey_count,
unheated_corridor_length_m=(
int(_measurement_value(schema.sap_flat_details.unheated_corridor_length))
if schema.sap_flat_details.unheated_corridor_length is not None
else None
),
)
if schema.sap_flat_details is not None
else None
),
)
@staticmethod

View file

@ -343,6 +343,7 @@ class RdSapSchema21_0_1:
incandescent_fixed_lighting_bulbs_count: int
# Fields below are present in some certs but absent in many real-world responses;
# see datatypes/epc/schema/tests/fixtures/21_0_1_real.json for a representative cert.
air_tightness: Optional[EnergyElement] = None
extract_fans_count: Optional[int] = None
wet_rooms_count: Optional[int] = None
open_chimneys_count: Optional[int] = None

View file

@ -498,6 +498,83 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
dtype=int, nullable=True,
description="Number of extract fans (ventilation/heat-loss proxy).",
),
# Heating — heating-system identity + flow temp + multi-system fraction.
"primary_sap_main_heating_code": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="SAP10 main heating type code (canonical heating-system enum).",
),
"primary_emitter_temperature": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Primary heating emitter temperature class (0=standard, 1=low-temp).",
),
"primary_main_heating_fraction": ColumnSpec(
dtype=float, nullable=True,
description="Fraction of space heating delivered by the primary main heating system.",
),
# Hot water — immersion type + presence of shower outlet block.
"immersion_heating_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Electric immersion heater type SAP10 code.",
),
"shower_outlet_count": ColumnSpec(
dtype=int, nullable=False,
description="1 if any shower_outlet block is declared on sap_heating, else 0.",
),
# Windows — per-window-type share aggregates.
"window_pct_living": ColumnSpec(
dtype=float, nullable=True,
description="Area share of windows with window_type == 1 (living room).",
),
"window_pct_external": ColumnSpec(
dtype=float, nullable=True,
description="Area share of windows with window_location == 0 (external).",
),
"window_pct_permanent_shutters": ColumnSpec(
dtype=float, nullable=True,
description="Area share of windows with permanent_shutters_present truthy.",
),
# Dwelling — conservatory + flat-only block.
"conservatory_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Conservatory SAP10 code (1=none, 2=heated, 3=unheated, ...).",
),
"has_heated_separate_conservatory": ColumnSpec(
dtype=bool, nullable=True,
description="Whether the dwelling has a heated separate conservatory.",
),
"flat_level": ColumnSpec(
dtype=int, nullable=True,
description="Flat-only: floor number on which the flat sits.",
),
"flat_top_storey": ColumnSpec(
dtype=str, nullable=True, categorical=True,
description="Flat-only: Y/N flag indicating whether this is the top storey.",
),
"flat_storey_count": ColumnSpec(
dtype=int, nullable=True,
description="Flat-only: storey count of the building containing the flat.",
),
"flat_location": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Flat-only: location SAP10 code (corner/middle/...).",
),
"flat_heat_loss_corridor": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Flat-only: heat-loss-corridor SAP10 code.",
),
# Energy supply categoricals.
"meter_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Electricity meter type SAP10 code (1=Standard, 2=Off-peak, ...).",
),
"pv_connection": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="PV connection topology SAP10 code.",
),
"wind_turbines_terrain_type": ColumnSpec(
dtype=int, nullable=True, categorical=True,
description="Wind-turbine terrain type SAP10 code.",
),
}
@ -649,6 +726,28 @@ class EpcMlTransform:
"number_baths": epc.sap_heating.number_baths,
"number_baths_wwhrs": epc.sap_heating.number_baths_wwhrs,
"extract_fans_count": epc.extract_fans_count,
# Features — conservatory + flat-only block
"conservatory_type": epc.conservatory_type,
"has_heated_separate_conservatory": epc.has_heated_separate_conservatory,
"flat_level": (
_int_or_none(epc.sap_flat_details.level) if epc.sap_flat_details else None
),
"flat_top_storey": (
epc.sap_flat_details.top_storey if epc.sap_flat_details else None
),
"flat_storey_count": (
_int_or_none(epc.sap_flat_details.storey_count) if epc.sap_flat_details else None
),
"flat_location": (
_int_or_none(epc.sap_flat_details.flat_location) if epc.sap_flat_details else None
),
"flat_heat_loss_corridor": (
_int_or_none(epc.sap_flat_details.heat_loss_corridor) if epc.sap_flat_details else None
),
# Features — energy supply categoricals
"meter_type": _meter_type_int(epc.sap_energy_source.meter_type),
"pv_connection": epc.sap_energy_source.pv_connection,
"wind_turbines_terrain_type": _wind_terrain_int(epc.sap_energy_source.wind_turbines_terrain_type),
# Targets
"sap_score": epc.energy_rating_current,
"co2_emissions": epc.co2_emissions_current,
@ -762,6 +861,8 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
domain object Union-int values pass through as int categoricals; str
values (from site notes) coerce to None.
"""
shower_outlets = sap_heating.shower_outlets
shower_outlet_count = 1 if shower_outlets is not None else 0
aggregates: dict[str, Any] = {
"main_heating_count": len(sap_heating.main_heating_details),
"primary_main_fuel_type": None,
@ -772,6 +873,9 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
"primary_fan_flue_present": None,
"primary_boiler_flue_type": None,
"primary_central_heating_pump_age": None,
"primary_sap_main_heating_code": None,
"primary_emitter_temperature": None,
"primary_main_heating_fraction": None,
"water_heating_code": sap_heating.water_heating_code,
"water_heating_fuel": sap_heating.water_heating_fuel,
"cylinder_size": (
@ -782,6 +886,8 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
"cylinder_insulation_thickness_mm": sap_heating.cylinder_insulation_thickness_mm,
"has_secondary_heating": sap_heating.secondary_fuel_type is not None,
"secondary_fuel_type": sap_heating.secondary_fuel_type,
"immersion_heating_type": _int_or_none(sap_heating.immersion_heating_type),
"shower_outlet_count": shower_outlet_count,
}
if sap_heating.main_heating_details:
@ -806,6 +912,9 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
aggregates["primary_central_heating_pump_age"] = (
primary.central_heating_pump_age
)
aggregates["primary_sap_main_heating_code"] = primary.sap_main_heating_code
aggregates["primary_emitter_temperature"] = _int_or_none(primary.emitter_temperature)
aggregates["primary_main_heating_fraction"] = primary.main_heating_fraction
return aggregates
@ -854,6 +963,25 @@ def _int_or_none(value: Any) -> Optional[int]:
return value if isinstance(value, int) else None
def _meter_type_int(value: Any) -> Optional[int]:
"""Domain mapper coerces sap_energy_source.meter_type to str(int) for site-notes
compatibility ("1", "2", ...). Parse back to int for the categorical feature."""
if isinstance(value, int):
return value
if isinstance(value, str) and value.isdigit():
return int(value)
return None
def _wind_terrain_int(value: Any) -> Optional[int]:
"""Same shape as meter_type — int coerced to str by the 21.0.x mapper."""
if isinstance(value, int):
return value
if isinstance(value, str) and value.isdigit():
return int(value)
return None
def _ground_floor(part: SapBuildingPart) -> Optional[Any]:
"""Pick the ground-floor `SapFloorDimension` (floor==0) for a building part.
@ -987,6 +1115,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
"window_avg_solar_transmittance": None,
**glazed_type_areas,
"window_pct_pvc_frame": None,
"window_pct_living": None,
"window_pct_external": None,
"window_pct_permanent_shutters": None,
}
if not windows:
return aggregates
@ -994,6 +1125,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
total_area = 0.0
draught_proofed_area = 0.0
pvc_frame_area = 0.0
living_area = 0.0
external_area = 0.0
shutters_area = 0.0
transmission_area = 0.0
weighted_u_value = 0.0
weighted_solar_transmittance = 0.0
@ -1004,6 +1138,12 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
draught_proofed_area += area
if w.frame_material == "PVC":
pvc_frame_area += area
if w.window_type == 1: # living room
living_area += area
if w.window_location == 0: # external (not conservatory)
external_area += area
if w.permanent_shutters_present is True or w.permanent_shutters_present == "Y":
shutters_area += area
if isinstance(w.orientation, int) and w.orientation in _OCTANT_NAMES:
octant_areas[_OCTANT_NAMES[w.orientation]] += area
if isinstance(w.glazing_type, int) and w.glazing_type in _GLAZED_TYPE_CODES:
@ -1025,6 +1165,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
draught_proofed_area / total_area * 100.0
)
aggregates["window_pct_pvc_frame"] = pvc_frame_area / total_area
aggregates["window_pct_living"] = living_area / total_area
aggregates["window_pct_external"] = external_area / total_area
aggregates["window_pct_permanent_shutters"] = shutters_area / total_area
for column, area in glazed_type_areas.items():
aggregates[column] = area / total_area
if transmission_area > 0: