From 195336b7e15b88c5537bbc6a90dc6e37da1d6405 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 May 2026 10:13:03 +0000 Subject: [PATCH] slice 15d: +50 features (gap fill + secondary building part); drop 2 derived Removes: - environmental_impact_current (SAP-derived rating, leaks into co2 target) - energy_rating_average (average of sap_score + potential, direct leak) Adds: Doors draughtproofed_door_count, insulated_door_u_value Hot water cylinder_insulation_type, cylinder_thermostat, secondary_heating_type Ventilation mechanical_vent_duct_placement, _duct_insulation, _duct_insulation_level, _measured_installation Lighting low_energy_fixed_lighting_bulbs_count, fixed_lighting_outlets_count, low_energy_fixed_lighting_outlets_count Windows window_avg_glazing_gap_mm, window_avg_frame_factor, window_pct_permanent_shutters_insulated Main dwelling room_in_roof_floor_area_m2, alternative_wall_count, alternative_wall_area_m2, flat_roof_insulation_thickness_mm, wall_thickness_measured Element counts wall_count, roof_count, floor_count, main_heating_count_elements, main_heating_controls_present Wind wind_turbine_hub_height_m, wind_turbine_rotor_diameter_m Flat flat_unheated_corridor_length_m Addendum addendum_stone_walls, addendum_system_build, addendum_numbers_count LZC lzc_energy_sources_count Secondary part secondary_dwelling_present + 11 fabric features (wall/roof/floor construction + insulation + thickness + area + heat-loss perimeter) + other_building_parts_count Wires through schema -> domain -> mapper: adds Addendum dataclass, lzc_energy_sources, mechanical_vent_duct_insulation_level. Also fixes _measurement_value to accept raw dicts (from_dict left some Measurement fields as dict when they weren't typed as a dataclass). Results at N=25,000 2026 RdSAP certs: sap_score MAPE=0.043 sMAPE=0.036 R^2=0.891 co2_emissions sMAPE=0.106 R^2=0.929 peui_raw MAPE=0.087 sMAPE=0.084 R^2=0.860 peui_ucl MAPE=0.079 sMAPE=0.076 R^2=0.866 space_heating_kwh MAPE=0.112 sMAPE=0.108 R^2=0.947 hot_water_kwh MAPE=0.071 sMAPE=0.069 R^2=0.854 (+0.082 R^2 vs 15b) Co-Authored-By: Claude Opus 4.7 --- datatypes/epc/domain/epc_property_data.py | 16 + datatypes/epc/domain/mapper.py | 34 +- datatypes/epc/schema/rdsap_schema_21_0_1.py | 2 + .../src/domain/ml/tests/test_transform.py | 6 - packages/domain/src/domain/ml/transform.py | 372 +++++++++++++++++- 5 files changed, 407 insertions(+), 23 deletions(-) diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index a84d20db..3cd7473e 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -12,6 +12,18 @@ class EnergyElement: environmental_efficiency_rating: int +@dataclass +class Addendum: + """Optional cert-level addendum carrying construction-detail flags. + + Present on ~43% of real RdSAP certs (stone-walls / system-build / a list of + numeric improvement codes the assessor wanted to call out). + """ + stone_walls: Optional[bool] = None + system_build: Optional[bool] = None + addendum_numbers: Optional[List[int]] = None + + @dataclass class InstantaneousWwhrs: wwhrs_index_number1: Optional[int] = None @@ -395,12 +407,16 @@ class EpcPropertyData: windows_transmission_details: Optional[WindowsTransmissionDetails] = None multiple_glazed_proportion: Optional[int] = None extract_fans_count: Optional[int] = None + # Optional cert-level addendum + LZC source codes. + addendum: Optional[Addendum] = None + lzc_energy_sources: Optional[List[int]] = None calculation_software_version: Optional[str] = None # Do we care about this? mechanical_vent_duct_placement: Optional[int] = None mechanical_vent_duct_insulation: Optional[int] = None pressure_test_certificate_number: Optional[int] = None mechanical_ventilation_index_number: Optional[int] = None mechanical_vent_measured_installation: Optional[str] = None + mechanical_vent_duct_insulation_level: Optional[int] = None co2_emissions_current_per_floor_area: Optional[int] = None low_energy_fixed_lighting_bulbs_count: Optional[int] = None sap_flat_details: Optional[SapFlatDetails] = None diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index a3e85b0e..8a3d6454 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3,6 +3,7 @@ from typing import List, Optional, Sequence, Union, Dict, Any from datatypes.epc.schema.helpers import from_dict from datatypes.epc.domain.epc_property_data import ( + Addendum, EnergyElement, EpcPropertyData, InstantaneousWwhrs, @@ -1040,7 +1041,7 @@ class EpcPropertyDataMapper: roof_insulation_thickness=bp.roof_insulation_thickness, sap_room_in_roof=( SapRoomInRoof( - floor_area=bp.sap_room_in_roof.floor_area, + floor_area=_measurement_value(bp.sap_room_in_roof.floor_area), construction_age_band=bp.sap_room_in_roof.construction_age_band, ) if bp.sap_room_in_roof @@ -1229,7 +1230,7 @@ class EpcPropertyDataMapper: roof_insulation_thickness=bp.roof_insulation_thickness, sap_room_in_roof=( SapRoomInRoof( - floor_area=bp.sap_room_in_roof.floor_area, + floor_area=_measurement_value(bp.sap_room_in_roof.floor_area), construction_age_band=bp.sap_room_in_roof.construction_age_band, ) if bp.sap_room_in_roof @@ -1474,7 +1475,7 @@ class EpcPropertyDataMapper: roof_insulation_thickness=bp.roof_insulation_thickness, sap_room_in_roof=( SapRoomInRoof( - floor_area=bp.sap_room_in_roof.floor_area, + floor_area=_measurement_value(bp.sap_room_in_roof.floor_area), construction_age_band=bp.sap_room_in_roof.construction_age_band, ) if bp.sap_room_in_roof @@ -1535,6 +1536,26 @@ class EpcPropertyDataMapper: # Dwelling-level inputs used as ML features. multiple_glazed_proportion=schema.multiple_glazed_proportion, extract_fans_count=schema.extract_fans_count, + insulated_door_u_value=schema.insulated_door_u_value, + mechanical_vent_duct_placement=schema.mechanical_vent_duct_placement, + mechanical_vent_duct_insulation=schema.mechanical_vent_duct_insulation, + mechanical_vent_duct_insulation_level=schema.mechanical_vent_duct_insulation_level, + mechanical_vent_measured_installation=schema.mechanical_vent_measured_installation, + low_energy_fixed_lighting_bulbs_count=schema.low_energy_fixed_lighting_bulbs_count, + fixed_lighting_outlets_count=schema.fixed_lighting_outlets_count, + low_energy_fixed_lighting_outlets_count=schema.low_energy_fixed_lighting_outlets_count, + addendum=( + Addendum( + stone_walls=schema.addendum.stone_walls == "true" + if schema.addendum.stone_walls is not None else None, + system_build=schema.addendum.system_build == "true" + if schema.addendum.system_build is not None else None, + addendum_numbers=schema.addendum.addendum_numbers, + ) + if schema.addendum is not None + else None + ), + lzc_energy_sources=schema.lzc_energy_sources, # Air-tightness EnergyElement (description carries permeability class). air_tightness=( EpcPropertyDataMapper._map_energy_element(schema.air_tightness) @@ -1635,10 +1656,13 @@ class EpcPropertyDataMapper: def _measurement_value(field: Any) -> float: - """SAP floor-dim measurements arrive as either a `Measurement` (with `.value`) - or a plain int/float (real-API certs). Coerce to float either way.""" + """SAP measurements arrive as a `Measurement` (with `.value`), a raw dict + {'value': N, 'quantity': '...'} when `from_dict` didn't coerce, or a plain + int/float (real-API certs). Coerce to float across all three shapes.""" if hasattr(field, "value"): return float(field.value) + if isinstance(field, dict) and "value" in field: + return float(field["value"]) return float(field) diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index 8563180c..f9c6125b 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -367,3 +367,5 @@ class RdSapSchema21_0_1: has_heated_separate_conservatory: Optional[str] = None fixed_lighting_outlets_count: Optional[int] = None low_energy_fixed_lighting_outlets_count: Optional[int] = None + # LZC (low-carbon) energy-source codes flagged on the cert. + lzc_energy_sources: Optional[List[int]] = None diff --git a/packages/domain/src/domain/ml/tests/test_transform.py b/packages/domain/src/domain/ml/tests/test_transform.py index a0b67a2a..cfc00894 100644 --- a/packages/domain/src/domain/ml/tests/test_transform.py +++ b/packages/domain/src/domain/ml/tests/test_transform.py @@ -203,8 +203,6 @@ _EXPECTED_FLAT_BOOLEAN_FEATURES: tuple[str, ...] = ( _EXPECTED_OPTIONAL_INT_FEATURES: tuple[str, ...] = ( "percent_draughtproofed", - "energy_rating_average", - "environmental_impact_current", ) @@ -236,8 +234,6 @@ def test_to_row_extracts_boolean_and_optional_int_features() -> None: has_hot_water_cylinder=True, has_fixed_air_conditioning=False, percent_draughtproofed=100, - energy_rating_average=60, - environmental_impact_current=72, ) transform = EpcMlTransform() @@ -249,8 +245,6 @@ def test_to_row_extracts_boolean_and_optional_int_features() -> None: assert row["has_hot_water_cylinder"] is True assert row["has_fixed_air_conditioning"] is False assert row["percent_draughtproofed"] == 100 - assert row["energy_rating_average"] == 60 - assert row["environmental_impact_current"] == 72 _NULLABLE_CATEGORICAL_FEATURES: tuple[str, ...] = ( diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index aa9aaa40..bc23ff1b 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -114,16 +114,6 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { nullable=True, description="Percentage of windows / doors with draught proofing.", ), - "energy_rating_average": ColumnSpec( - dtype=int, - nullable=True, - description="Average SAP score for comparable properties (neighbourhood comparator).", - ), - "environmental_impact_current": ColumnSpec( - dtype=int, - nullable=True, - description="Environmental impact rating; separate from energy efficiency SAP score.", - ), # Categoricals — emitted as raw strings; downstream casts to pd.Categorical "dwelling_type": ColumnSpec( dtype=str, @@ -575,6 +565,198 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=True, categorical=True, description="Wind-turbine terrain type SAP10 code.", ), + # Doors. + "draughtproofed_door_count": ColumnSpec( + dtype=int, nullable=True, + description="Number of draught-proofed doors.", + ), + "insulated_door_u_value": ColumnSpec( + dtype=float, nullable=True, + description="U-value of insulated doors (W/m^2K).", + ), + # Hot water extras. + "cylinder_insulation_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Cylinder insulation type SAP10 code (string sentinels -> None).", + ), + "cylinder_thermostat": ColumnSpec( + dtype=str, nullable=True, categorical=True, + description="Cylinder-thermostat flag (Y/N/missing).", + ), + "secondary_heating_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Secondary heating type SAP10 code (distinct from secondary_fuel_type).", + ), + # Mechanical ventilation extras. + "mechanical_vent_duct_placement": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Mechanical-vent duct placement SAP10 code.", + ), + "mechanical_vent_duct_insulation": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Mechanical-vent duct insulation SAP10 code.", + ), + "mechanical_vent_duct_insulation_level": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Mechanical-vent duct insulation level SAP10 code.", + ), + "mechanical_vent_measured_installation": ColumnSpec( + dtype=bool, nullable=True, + description="Whether mechanical ventilation was measured at installation.", + ), + # Lighting extras. + "low_energy_fixed_lighting_bulbs_count": ColumnSpec( + dtype=int, nullable=True, + description="Number of low-energy fixed-lighting bulbs (separate from CFL/LED).", + ), + "fixed_lighting_outlets_count": ColumnSpec( + dtype=int, nullable=True, + description="Total number of fixed-lighting outlets.", + ), + "low_energy_fixed_lighting_outlets_count": ColumnSpec( + dtype=int, nullable=True, + description="Number of low-energy fixed-lighting outlets.", + ), + # Window extras (per-window scalars area-weighted across windows). + "window_avg_glazing_gap_mm": ColumnSpec( + dtype=float, nullable=True, + description="Area-weighted average glazing gap in mm (non-numeric sentinels excluded).", + ), + "window_avg_frame_factor": ColumnSpec( + dtype=float, nullable=True, + description="Area-weighted average frame factor across windows.", + ), + "window_pct_permanent_shutters_insulated": ColumnSpec( + dtype=float, nullable=True, + description="Area share of windows with permanent_shutters_insulated == 'Y'.", + ), + # Main-dwelling extras: room-in-roof + alternative walls + flat-roof + measured flag. + "main_dwelling_room_in_roof_floor_area_m2": ColumnSpec( + dtype=float, nullable=True, + description="Floor area of main dwelling room-in-roof block (when present).", + ), + "main_dwelling_alternative_wall_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of sap_alternative_wall_* blocks on the main dwelling (0-2).", + ), + "main_dwelling_alternative_wall_area_m2": ColumnSpec( + dtype=float, nullable=False, + description="Sum of sap_alternative_wall_*.wall_area for the main dwelling.", + ), + "main_dwelling_flat_roof_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Main dwelling flat-roof insulation thickness in mm (rare).", + ), + "main_dwelling_wall_thickness_measured": ColumnSpec( + dtype=bool, nullable=True, + description="Main dwelling wall_thickness_measured flag.", + ), + # Element list counts (split-fabric discriminator). + "wall_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of entries in the top-level walls EnergyElement list.", + ), + "roof_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of entries in the top-level roofs EnergyElement list.", + ), + "floor_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of entries in the top-level floors EnergyElement list.", + ), + "main_heating_count_elements": ColumnSpec( + dtype=int, nullable=False, + description="Number of entries in the top-level main_heating EnergyElement list.", + ), + "main_heating_controls_present": ColumnSpec( + dtype=bool, nullable=False, + description="Whether the cert carries a main_heating_controls EnergyElement.", + ), + # Wind turbine geometry. + "wind_turbine_hub_height_m": ColumnSpec( + dtype=float, nullable=True, + description="Hub height of the (first) wind turbine, metres.", + ), + "wind_turbine_rotor_diameter_m": ColumnSpec( + dtype=float, nullable=True, + description="Rotor diameter of the (first) wind turbine, metres.", + ), + # Flat extras. + "flat_unheated_corridor_length_m": ColumnSpec( + dtype=int, nullable=True, + description="Flat-only: length (m) of any unheated corridor adjacent to the dwelling.", + ), + # Addendum (~43% present). + "addendum_stone_walls": ColumnSpec( + dtype=bool, nullable=True, + description="Addendum: stone-wall construction flagged by assessor.", + ), + "addendum_system_build": ColumnSpec( + dtype=bool, nullable=True, + description="Addendum: system-build construction flagged by assessor.", + ), + "addendum_numbers_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of addendum codes flagged.", + ), + # Low-carbon energy sources. + "lzc_energy_sources_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of LZC energy-source codes declared (0 if none).", + ), + # Secondary building part (first non-main building part; ~36% of certs). + "secondary_dwelling_present": ColumnSpec( + dtype=bool, nullable=False, + description="True if there is a building part beyond the Main Dwelling.", + ), + "secondary_dwelling_wall_construction": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Secondary building part wall construction SAP10 code.", + ), + "secondary_dwelling_wall_insulation_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Secondary building part wall insulation type SAP10 code.", + ), + "secondary_dwelling_wall_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Secondary building part wall insulation thickness in mm.", + ), + "secondary_dwelling_wall_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Secondary building part external wall thickness in mm.", + ), + "secondary_dwelling_roof_construction": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Secondary building part roof construction SAP10 code.", + ), + "secondary_dwelling_roof_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Secondary building part roof insulation thickness in mm.", + ), + "secondary_dwelling_floor_construction": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Secondary building part ground-floor construction SAP10 code.", + ), + "secondary_dwelling_floor_insulation": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Secondary building part ground-floor insulation SAP10 code.", + ), + "secondary_dwelling_floor_insulation_thickness_mm": ColumnSpec( + dtype=int, nullable=True, + description="Secondary building part floor insulation thickness in mm.", + ), + "secondary_dwelling_total_floor_area_m2": ColumnSpec( + dtype=float, nullable=True, + description="Secondary building part total floor area (sum of its sap_floor_dimensions).", + ), + "secondary_dwelling_heat_loss_perimeter_m": ColumnSpec( + dtype=float, nullable=True, + description="Secondary building part heat-loss perimeter (sum of its sap_floor_dimensions).", + ), + "other_building_parts_count": ColumnSpec( + dtype=int, nullable=False, + description="Number of building parts beyond Main Dwelling and the secondary part.", + ), } @@ -696,8 +878,6 @@ class EpcMlTransform: "has_fixed_air_conditioning": epc.has_fixed_air_conditioning, # Features — optional integer indicators "percent_draughtproofed": epc.percent_draughtproofed, - "energy_rating_average": epc.energy_rating_average, - "environmental_impact_current": epc.environmental_impact_current, # Features — categoricals (raw strings; cast at parquet write time) "dwelling_type": epc.dwelling_type, "tenure": epc.tenure, @@ -748,6 +928,57 @@ class EpcMlTransform: "meter_type": _meter_type_int(epc.sap_energy_source.meter_type), "pv_connection": epc.sap_energy_source.pv_connection, "wind_turbines_terrain_type": _wind_terrain_int(epc.sap_energy_source.wind_turbines_terrain_type), + # Features — doors + "draughtproofed_door_count": epc.draughtproofed_door_count, + "insulated_door_u_value": epc.insulated_door_u_value, + # Features — hot water extras + "cylinder_insulation_type": _int_or_none(epc.sap_heating.cylinder_insulation_type), + "cylinder_thermostat": epc.sap_heating.cylinder_thermostat, + "secondary_heating_type": _int_or_none(epc.sap_heating.secondary_heating_type), + # Features — mechanical ventilation extras + "mechanical_vent_duct_placement": epc.mechanical_vent_duct_placement, + "mechanical_vent_duct_insulation": epc.mechanical_vent_duct_insulation, + "mechanical_vent_duct_insulation_level": epc.mechanical_vent_duct_insulation_level, + "mechanical_vent_measured_installation": _truthy_yn(epc.mechanical_vent_measured_installation), + # Features — lighting extras + "low_energy_fixed_lighting_bulbs_count": epc.low_energy_fixed_lighting_bulbs_count, + "fixed_lighting_outlets_count": epc.fixed_lighting_outlets_count, + "low_energy_fixed_lighting_outlets_count": epc.low_energy_fixed_lighting_outlets_count, + # Features — element list counts (split-fabric discriminators) + "wall_count": len(epc.walls), + "roof_count": len(epc.roofs), + "floor_count": len(epc.floors), + "main_heating_count_elements": len(epc.main_heating), + "main_heating_controls_present": epc.main_heating_controls is not None, + # Features — wind turbine geometry + "wind_turbine_hub_height_m": ( + epc.sap_energy_source.wind_turbine_details.hub_height + if epc.sap_energy_source.wind_turbine_details is not None else None + ), + "wind_turbine_rotor_diameter_m": ( + epc.sap_energy_source.wind_turbine_details.rotor_diameter + if epc.sap_energy_source.wind_turbine_details is not None else None + ), + # Features — flat unheated corridor length + "flat_unheated_corridor_length_m": ( + epc.sap_flat_details.unheated_corridor_length_m + if epc.sap_flat_details is not None else None + ), + # Features — addendum + LZC + "addendum_stone_walls": ( + epc.addendum.stone_walls if epc.addendum is not None else None + ), + "addendum_system_build": ( + epc.addendum.system_build if epc.addendum is not None else None + ), + "addendum_numbers_count": ( + len(epc.addendum.addendum_numbers) + if epc.addendum is not None and epc.addendum.addendum_numbers is not None + else 0 + ), + "lzc_energy_sources_count": ( + len(epc.lzc_energy_sources) if epc.lzc_energy_sources is not None else 0 + ), # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, @@ -931,6 +1162,23 @@ _MAIN_DWELLING_FABRIC_COLUMNS = ( "main_dwelling_floor_insulation", "main_dwelling_floor_insulation_thickness_mm", "main_dwelling_floor_heat_loss", + "main_dwelling_room_in_roof_floor_area_m2", + "main_dwelling_flat_roof_insulation_thickness_mm", + "main_dwelling_wall_thickness_measured", +) + +_SECONDARY_DWELLING_FABRIC_COLUMNS = ( + "secondary_dwelling_wall_construction", + "secondary_dwelling_wall_insulation_type", + "secondary_dwelling_wall_insulation_thickness_mm", + "secondary_dwelling_wall_thickness_mm", + "secondary_dwelling_roof_construction", + "secondary_dwelling_roof_insulation_thickness_mm", + "secondary_dwelling_floor_construction", + "secondary_dwelling_floor_insulation", + "secondary_dwelling_floor_insulation_thickness_mm", + "secondary_dwelling_total_floor_area_m2", + "secondary_dwelling_heat_loss_perimeter_m", ) @@ -982,6 +1230,21 @@ def _wind_terrain_int(value: Any) -> Optional[int]: return None +def _truthy_yn(value: Any) -> Optional[bool]: + """Map 'Y'/'true'/True to True, 'N'/'false'/False to False, anything else to None.""" + if value is None: + return None + if isinstance(value, bool): + return value + if isinstance(value, str): + v = value.strip().lower() + if v in ("y", "true", "yes", "1"): + return True + if v in ("n", "false", "no", "0"): + return False + return None + + def _ground_floor(part: SapBuildingPart) -> Optional[Any]: """Pick the ground-floor `SapFloorDimension` (floor==0) for a building part. @@ -1020,9 +1283,15 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: "main_dwelling_construction_age_band": None, "main_dwelling_wall_construction": None, "main_dwelling_roof_construction": None, + "main_dwelling_alternative_wall_count": 0, + "main_dwelling_alternative_wall_area_m2": 0.0, + "secondary_dwelling_present": False, + "other_building_parts_count": 0, } for col in _MAIN_DWELLING_FABRIC_COLUMNS: aggregates[col] = None + for col in _SECONDARY_DWELLING_FABRIC_COLUMNS: + aggregates[col] = None if not parts: return aggregates @@ -1089,6 +1358,62 @@ def _building_part_aggregates(parts: list[SapBuildingPart]) -> dict[str, Any]: if ground_floor is not None: aggregates["main_dwelling_floor_construction"] = ground_floor.floor_construction aggregates["main_dwelling_floor_insulation"] = ground_floor.floor_insulation + # Main dwelling extras: room-in-roof, alternative walls, flat-roof, measured flag. + if main.sap_room_in_roof is not None: + aggregates["main_dwelling_room_in_roof_floor_area_m2"] = float( + main.sap_room_in_roof.floor_area + ) + alt_count = 0 + alt_area = 0.0 + for alt in (main.sap_alternative_wall_1, main.sap_alternative_wall_2): + if alt is not None: + alt_count += 1 + alt_area += float(alt.wall_area) + aggregates["main_dwelling_alternative_wall_count"] = alt_count + aggregates["main_dwelling_alternative_wall_area_m2"] = alt_area + aggregates["main_dwelling_flat_roof_insulation_thickness_mm"] = _parse_thickness_mm( + main.flat_roof_insulation_thickness + ) + aggregates["main_dwelling_wall_thickness_measured"] = main.wall_thickness_measured + + # Secondary building part — first non-main entry in the list. + secondary = next( + (p for p in parts if p.identifier != _MAIN_DWELLING_IDENTIFIER), None + ) + if secondary is not None: + aggregates["secondary_dwelling_present"] = True + aggregates["secondary_dwelling_wall_construction"] = _int_or_none( + secondary.wall_construction + ) + aggregates["secondary_dwelling_wall_insulation_type"] = _int_or_none( + secondary.wall_insulation_type + ) + aggregates["secondary_dwelling_wall_insulation_thickness_mm"] = _parse_thickness_mm( + secondary.wall_insulation_thickness + ) + aggregates["secondary_dwelling_wall_thickness_mm"] = secondary.wall_thickness_mm + aggregates["secondary_dwelling_roof_construction"] = secondary.roof_construction + aggregates["secondary_dwelling_roof_insulation_thickness_mm"] = _parse_thickness_mm( + secondary.roof_insulation_thickness + ) + sec_ground = _ground_floor(secondary) + if sec_ground is not None: + aggregates["secondary_dwelling_floor_construction"] = sec_ground.floor_construction + aggregates["secondary_dwelling_floor_insulation"] = sec_ground.floor_insulation + aggregates["secondary_dwelling_floor_insulation_thickness_mm"] = _parse_thickness_mm( + secondary.floor_insulation_thickness + ) + sec_floor_area = 0.0 + sec_hlp = 0.0 + if secondary.sap_floor_dimensions: + for fd in secondary.sap_floor_dimensions: + sec_floor_area += fd.total_floor_area_m2 + sec_hlp += fd.heat_loss_perimeter_m + aggregates["secondary_dwelling_total_floor_area_m2"] = sec_floor_area + aggregates["secondary_dwelling_heat_loss_perimeter_m"] = sec_hlp + + # Anything beyond main + secondary just gets counted (extension chains, etc.). + aggregates["other_building_parts_count"] = max(0, len(parts) - (1 if main else 0) - (1 if secondary else 0)) return aggregates @@ -1118,6 +1443,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: "window_pct_living": None, "window_pct_external": None, "window_pct_permanent_shutters": None, + "window_avg_glazing_gap_mm": None, + "window_avg_frame_factor": None, + "window_pct_permanent_shutters_insulated": None, } if not windows: return aggregates @@ -1128,9 +1456,14 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: living_area = 0.0 external_area = 0.0 shutters_area = 0.0 + shutters_insulated_area = 0.0 transmission_area = 0.0 weighted_u_value = 0.0 weighted_solar_transmittance = 0.0 + glazing_gap_area = 0.0 + weighted_glazing_gap = 0.0 + frame_factor_area = 0.0 + weighted_frame_factor = 0.0 for w in windows: area = w.window_width * w.window_height total_area += area @@ -1144,6 +1477,14 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: external_area += area if w.permanent_shutters_present is True or w.permanent_shutters_present == "Y": shutters_area += area + if w.permanent_shutters_insulated == "Y": + shutters_insulated_area += area + if isinstance(w.glazing_gap, int): + glazing_gap_area += area + weighted_glazing_gap += float(w.glazing_gap) * area + if w.frame_factor is not None: + frame_factor_area += area + weighted_frame_factor += float(w.frame_factor) * area if isinstance(w.orientation, int) and w.orientation in _OCTANT_NAMES: octant_areas[_OCTANT_NAMES[w.orientation]] += area if isinstance(w.glazing_type, int) and w.glazing_type in _GLAZED_TYPE_CODES: @@ -1168,6 +1509,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: aggregates["window_pct_living"] = living_area / total_area aggregates["window_pct_external"] = external_area / total_area aggregates["window_pct_permanent_shutters"] = shutters_area / total_area + aggregates["window_pct_permanent_shutters_insulated"] = ( + shutters_insulated_area / total_area + ) for column, area in glazed_type_areas.items(): aggregates[column] = area / total_area if transmission_area > 0: @@ -1175,4 +1519,8 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: aggregates["window_avg_solar_transmittance"] = ( weighted_solar_transmittance / transmission_area ) + if glazing_gap_area > 0: + aggregates["window_avg_glazing_gap_mm"] = weighted_glazing_gap / glazing_gap_area + if frame_factor_area > 0: + aggregates["window_avg_frame_factor"] = weighted_frame_factor / frame_factor_area return aggregates