mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
slice 15b: +18 features — heating type code, hot water, windows, flat, supply
Heating: primary_sap_main_heating_code (the SAP10 heating-system enum was the single biggest missing input), primary_emitter_temperature, primary_main_heating_fraction. Hot water: immersion_heating_type, shower_outlet_count. Windows: window_pct_living, window_pct_external, window_pct_permanent_shutters (area-weighted shares parallel to existing window aggregates). Dwelling: conservatory_type, has_heated_separate_conservatory. Flat-only block (sap_flat_details): flat_level, flat_top_storey, flat_storey_count, flat_location, flat_heat_loss_corridor (int sentinels like '20+' coerce to None for the categorical features). Energy supply: meter_type, pv_connection, wind_turbines_terrain_type. Also plumbs `air_tightness` EnergyElement, `sap_flat_details` and `has_heated_separate_conservatory` through the 21.0.1 mapper path (they were silently None before). Results at N=25,000 2026 RdSAP certs: sap_score MAPE=0.044 sMAPE=0.038 R^2=0.884 (+0.045 R^2 vs 15a) co2_emissions sMAPE=0.108 R^2=0.925 peui_raw MAPE=0.092 sMAPE=0.088 R^2=0.849 peui_ucl MAPE=0.081 sMAPE=0.078 R^2=0.860 space_heating_kwh MAPE=0.111 sMAPE=0.108 R^2=0.945 hot_water_kwh MAPE=0.081 sMAPE=0.079 R^2=0.772 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
0ffda529ec
commit
9f6f7608b9
4 changed files with 183 additions and 0 deletions
|
|
@ -360,6 +360,10 @@ class EpcPropertyData:
|
|||
main_heating_controls: Optional[EnergyElement] = (
|
||||
None # site notes has heating_and_hot_water.main_heating.controls: str - doesn't map to EnergyElement
|
||||
)
|
||||
# Air-tightness EnergyElement (description + ratings) — kept as input even though
|
||||
# ratings are derived, because the `.description` text categorizes the building's
|
||||
# permeability class when no pressure test was carried out.
|
||||
air_tightness: Optional[EnergyElement] = None
|
||||
current_energy_efficiency_band: Optional[Epc] = None # not available in site notes?
|
||||
environmental_impact_current: Optional[int] = None
|
||||
heating_cost_current: Optional[float] = None
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from datatypes.epc.domain.epc_property_data import (
|
|||
SapAlternativeWall,
|
||||
SapBuildingPart,
|
||||
SapEnergySource,
|
||||
SapFlatDetails,
|
||||
SapFloorDimension,
|
||||
SapHeating,
|
||||
SapRoomInRoof,
|
||||
|
|
@ -1313,6 +1314,11 @@ class EpcPropertyDataMapper:
|
|||
has_fixed_air_conditioning=schema.has_fixed_air_conditioning == "true",
|
||||
conservatory_type=schema.conservatory_type,
|
||||
has_conservatory=schema.conservatory_type != 1,
|
||||
has_heated_separate_conservatory=(
|
||||
schema.has_heated_separate_conservatory == "true"
|
||||
if schema.has_heated_separate_conservatory is not None
|
||||
else None
|
||||
),
|
||||
# Counts
|
||||
door_count=schema.door_count,
|
||||
habitable_rooms_count=schema.habitable_room_count,
|
||||
|
|
@ -1529,6 +1535,35 @@ class EpcPropertyDataMapper:
|
|||
# Dwelling-level inputs used as ML features.
|
||||
multiple_glazed_proportion=schema.multiple_glazed_proportion,
|
||||
extract_fans_count=schema.extract_fans_count,
|
||||
# Air-tightness EnergyElement (description carries permeability class).
|
||||
air_tightness=(
|
||||
EpcPropertyDataMapper._map_energy_element(schema.air_tightness)
|
||||
if schema.air_tightness is not None
|
||||
else None
|
||||
),
|
||||
# Main-heating-controls EnergyElement (first control system if multiple).
|
||||
main_heating_controls=(
|
||||
EpcPropertyDataMapper._map_energy_element(schema.main_heating_controls[0])
|
||||
if schema.main_heating_controls
|
||||
else None
|
||||
),
|
||||
# Flat-only nested block: present in ~33% of certs.
|
||||
sap_flat_details=(
|
||||
SapFlatDetails(
|
||||
level=schema.sap_flat_details.level,
|
||||
top_storey=schema.sap_flat_details.top_storey,
|
||||
flat_location=schema.sap_flat_details.flat_location,
|
||||
heat_loss_corridor=schema.sap_flat_details.heat_loss_corridor,
|
||||
storey_count=schema.sap_flat_details.storey_count,
|
||||
unheated_corridor_length_m=(
|
||||
int(_measurement_value(schema.sap_flat_details.unheated_corridor_length))
|
||||
if schema.sap_flat_details.unheated_corridor_length is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
if schema.sap_flat_details is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -343,6 +343,7 @@ class RdSapSchema21_0_1:
|
|||
incandescent_fixed_lighting_bulbs_count: int
|
||||
# Fields below are present in some certs but absent in many real-world responses;
|
||||
# see datatypes/epc/schema/tests/fixtures/21_0_1_real.json for a representative cert.
|
||||
air_tightness: Optional[EnergyElement] = None
|
||||
extract_fans_count: Optional[int] = None
|
||||
wet_rooms_count: Optional[int] = None
|
||||
open_chimneys_count: Optional[int] = None
|
||||
|
|
|
|||
|
|
@ -498,6 +498,83 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = {
|
|||
dtype=int, nullable=True,
|
||||
description="Number of extract fans (ventilation/heat-loss proxy).",
|
||||
),
|
||||
# Heating — heating-system identity + flow temp + multi-system fraction.
|
||||
"primary_sap_main_heating_code": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="SAP10 main heating type code (canonical heating-system enum).",
|
||||
),
|
||||
"primary_emitter_temperature": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Primary heating emitter temperature class (0=standard, 1=low-temp).",
|
||||
),
|
||||
"primary_main_heating_fraction": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Fraction of space heating delivered by the primary main heating system.",
|
||||
),
|
||||
# Hot water — immersion type + presence of shower outlet block.
|
||||
"immersion_heating_type": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Electric immersion heater type SAP10 code.",
|
||||
),
|
||||
"shower_outlet_count": ColumnSpec(
|
||||
dtype=int, nullable=False,
|
||||
description="1 if any shower_outlet block is declared on sap_heating, else 0.",
|
||||
),
|
||||
# Windows — per-window-type share aggregates.
|
||||
"window_pct_living": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Area share of windows with window_type == 1 (living room).",
|
||||
),
|
||||
"window_pct_external": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Area share of windows with window_location == 0 (external).",
|
||||
),
|
||||
"window_pct_permanent_shutters": ColumnSpec(
|
||||
dtype=float, nullable=True,
|
||||
description="Area share of windows with permanent_shutters_present truthy.",
|
||||
),
|
||||
# Dwelling — conservatory + flat-only block.
|
||||
"conservatory_type": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Conservatory SAP10 code (1=none, 2=heated, 3=unheated, ...).",
|
||||
),
|
||||
"has_heated_separate_conservatory": ColumnSpec(
|
||||
dtype=bool, nullable=True,
|
||||
description="Whether the dwelling has a heated separate conservatory.",
|
||||
),
|
||||
"flat_level": ColumnSpec(
|
||||
dtype=int, nullable=True,
|
||||
description="Flat-only: floor number on which the flat sits.",
|
||||
),
|
||||
"flat_top_storey": ColumnSpec(
|
||||
dtype=str, nullable=True, categorical=True,
|
||||
description="Flat-only: Y/N flag indicating whether this is the top storey.",
|
||||
),
|
||||
"flat_storey_count": ColumnSpec(
|
||||
dtype=int, nullable=True,
|
||||
description="Flat-only: storey count of the building containing the flat.",
|
||||
),
|
||||
"flat_location": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Flat-only: location SAP10 code (corner/middle/...).",
|
||||
),
|
||||
"flat_heat_loss_corridor": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Flat-only: heat-loss-corridor SAP10 code.",
|
||||
),
|
||||
# Energy supply categoricals.
|
||||
"meter_type": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Electricity meter type SAP10 code (1=Standard, 2=Off-peak, ...).",
|
||||
),
|
||||
"pv_connection": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="PV connection topology SAP10 code.",
|
||||
),
|
||||
"wind_turbines_terrain_type": ColumnSpec(
|
||||
dtype=int, nullable=True, categorical=True,
|
||||
description="Wind-turbine terrain type SAP10 code.",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -649,6 +726,28 @@ class EpcMlTransform:
|
|||
"number_baths": epc.sap_heating.number_baths,
|
||||
"number_baths_wwhrs": epc.sap_heating.number_baths_wwhrs,
|
||||
"extract_fans_count": epc.extract_fans_count,
|
||||
# Features — conservatory + flat-only block
|
||||
"conservatory_type": epc.conservatory_type,
|
||||
"has_heated_separate_conservatory": epc.has_heated_separate_conservatory,
|
||||
"flat_level": (
|
||||
_int_or_none(epc.sap_flat_details.level) if epc.sap_flat_details else None
|
||||
),
|
||||
"flat_top_storey": (
|
||||
epc.sap_flat_details.top_storey if epc.sap_flat_details else None
|
||||
),
|
||||
"flat_storey_count": (
|
||||
_int_or_none(epc.sap_flat_details.storey_count) if epc.sap_flat_details else None
|
||||
),
|
||||
"flat_location": (
|
||||
_int_or_none(epc.sap_flat_details.flat_location) if epc.sap_flat_details else None
|
||||
),
|
||||
"flat_heat_loss_corridor": (
|
||||
_int_or_none(epc.sap_flat_details.heat_loss_corridor) if epc.sap_flat_details else None
|
||||
),
|
||||
# Features — energy supply categoricals
|
||||
"meter_type": _meter_type_int(epc.sap_energy_source.meter_type),
|
||||
"pv_connection": epc.sap_energy_source.pv_connection,
|
||||
"wind_turbines_terrain_type": _wind_terrain_int(epc.sap_energy_source.wind_turbines_terrain_type),
|
||||
# Targets
|
||||
"sap_score": epc.energy_rating_current,
|
||||
"co2_emissions": epc.co2_emissions_current,
|
||||
|
|
@ -762,6 +861,8 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
|
|||
domain object — Union-int values pass through as int categoricals; str
|
||||
values (from site notes) coerce to None.
|
||||
"""
|
||||
shower_outlets = sap_heating.shower_outlets
|
||||
shower_outlet_count = 1 if shower_outlets is not None else 0
|
||||
aggregates: dict[str, Any] = {
|
||||
"main_heating_count": len(sap_heating.main_heating_details),
|
||||
"primary_main_fuel_type": None,
|
||||
|
|
@ -772,6 +873,9 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
|
|||
"primary_fan_flue_present": None,
|
||||
"primary_boiler_flue_type": None,
|
||||
"primary_central_heating_pump_age": None,
|
||||
"primary_sap_main_heating_code": None,
|
||||
"primary_emitter_temperature": None,
|
||||
"primary_main_heating_fraction": None,
|
||||
"water_heating_code": sap_heating.water_heating_code,
|
||||
"water_heating_fuel": sap_heating.water_heating_fuel,
|
||||
"cylinder_size": (
|
||||
|
|
@ -782,6 +886,8 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
|
|||
"cylinder_insulation_thickness_mm": sap_heating.cylinder_insulation_thickness_mm,
|
||||
"has_secondary_heating": sap_heating.secondary_fuel_type is not None,
|
||||
"secondary_fuel_type": sap_heating.secondary_fuel_type,
|
||||
"immersion_heating_type": _int_or_none(sap_heating.immersion_heating_type),
|
||||
"shower_outlet_count": shower_outlet_count,
|
||||
}
|
||||
|
||||
if sap_heating.main_heating_details:
|
||||
|
|
@ -806,6 +912,9 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]:
|
|||
aggregates["primary_central_heating_pump_age"] = (
|
||||
primary.central_heating_pump_age
|
||||
)
|
||||
aggregates["primary_sap_main_heating_code"] = primary.sap_main_heating_code
|
||||
aggregates["primary_emitter_temperature"] = _int_or_none(primary.emitter_temperature)
|
||||
aggregates["primary_main_heating_fraction"] = primary.main_heating_fraction
|
||||
|
||||
return aggregates
|
||||
|
||||
|
|
@ -854,6 +963,25 @@ def _int_or_none(value: Any) -> Optional[int]:
|
|||
return value if isinstance(value, int) else None
|
||||
|
||||
|
||||
def _meter_type_int(value: Any) -> Optional[int]:
|
||||
"""Domain mapper coerces sap_energy_source.meter_type to str(int) for site-notes
|
||||
compatibility ("1", "2", ...). Parse back to int for the categorical feature."""
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, str) and value.isdigit():
|
||||
return int(value)
|
||||
return None
|
||||
|
||||
|
||||
def _wind_terrain_int(value: Any) -> Optional[int]:
|
||||
"""Same shape as meter_type — int coerced to str by the 21.0.x mapper."""
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, str) and value.isdigit():
|
||||
return int(value)
|
||||
return None
|
||||
|
||||
|
||||
def _ground_floor(part: SapBuildingPart) -> Optional[Any]:
|
||||
"""Pick the ground-floor `SapFloorDimension` (floor==0) for a building part.
|
||||
|
||||
|
|
@ -987,6 +1115,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
|||
"window_avg_solar_transmittance": None,
|
||||
**glazed_type_areas,
|
||||
"window_pct_pvc_frame": None,
|
||||
"window_pct_living": None,
|
||||
"window_pct_external": None,
|
||||
"window_pct_permanent_shutters": None,
|
||||
}
|
||||
if not windows:
|
||||
return aggregates
|
||||
|
|
@ -994,6 +1125,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
|||
total_area = 0.0
|
||||
draught_proofed_area = 0.0
|
||||
pvc_frame_area = 0.0
|
||||
living_area = 0.0
|
||||
external_area = 0.0
|
||||
shutters_area = 0.0
|
||||
transmission_area = 0.0
|
||||
weighted_u_value = 0.0
|
||||
weighted_solar_transmittance = 0.0
|
||||
|
|
@ -1004,6 +1138,12 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
|||
draught_proofed_area += area
|
||||
if w.frame_material == "PVC":
|
||||
pvc_frame_area += area
|
||||
if w.window_type == 1: # living room
|
||||
living_area += area
|
||||
if w.window_location == 0: # external (not conservatory)
|
||||
external_area += area
|
||||
if w.permanent_shutters_present is True or w.permanent_shutters_present == "Y":
|
||||
shutters_area += area
|
||||
if isinstance(w.orientation, int) and w.orientation in _OCTANT_NAMES:
|
||||
octant_areas[_OCTANT_NAMES[w.orientation]] += area
|
||||
if isinstance(w.glazing_type, int) and w.glazing_type in _GLAZED_TYPE_CODES:
|
||||
|
|
@ -1025,6 +1165,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]:
|
|||
draught_proofed_area / total_area * 100.0
|
||||
)
|
||||
aggregates["window_pct_pvc_frame"] = pvc_frame_area / total_area
|
||||
aggregates["window_pct_living"] = living_area / total_area
|
||||
aggregates["window_pct_external"] = external_area / total_area
|
||||
aggregates["window_pct_permanent_shutters"] = shutters_area / total_area
|
||||
for column, area in glazed_type_areas.items():
|
||||
aggregates[column] = area / total_area
|
||||
if transmission_area > 0:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue