diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index c7cc4f0f..a84d20db 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -360,6 +360,10 @@ class EpcPropertyData: main_heating_controls: Optional[EnergyElement] = ( None # site notes has heating_and_hot_water.main_heating.controls: str - doesn't map to EnergyElement ) + # Air-tightness EnergyElement (description + ratings) — kept as input even though + # ratings are derived, because the `.description` text categorizes the building's + # permeability class when no pressure test was carried out. + air_tightness: Optional[EnergyElement] = None current_energy_efficiency_band: Optional[Epc] = None # not available in site notes? environmental_impact_current: Optional[int] = None heating_cost_current: Optional[float] = None diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index a2bc1a02..a3e85b0e 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -16,6 +16,7 @@ from datatypes.epc.domain.epc_property_data import ( SapAlternativeWall, SapBuildingPart, SapEnergySource, + SapFlatDetails, SapFloorDimension, SapHeating, SapRoomInRoof, @@ -1313,6 +1314,11 @@ class EpcPropertyDataMapper: has_fixed_air_conditioning=schema.has_fixed_air_conditioning == "true", conservatory_type=schema.conservatory_type, has_conservatory=schema.conservatory_type != 1, + has_heated_separate_conservatory=( + schema.has_heated_separate_conservatory == "true" + if schema.has_heated_separate_conservatory is not None + else None + ), # Counts door_count=schema.door_count, habitable_rooms_count=schema.habitable_room_count, @@ -1529,6 +1535,35 @@ class EpcPropertyDataMapper: # Dwelling-level inputs used as ML features. multiple_glazed_proportion=schema.multiple_glazed_proportion, extract_fans_count=schema.extract_fans_count, + # Air-tightness EnergyElement (description carries permeability class). + air_tightness=( + EpcPropertyDataMapper._map_energy_element(schema.air_tightness) + if schema.air_tightness is not None + else None + ), + # Main-heating-controls EnergyElement (first control system if multiple). + main_heating_controls=( + EpcPropertyDataMapper._map_energy_element(schema.main_heating_controls[0]) + if schema.main_heating_controls + else None + ), + # Flat-only nested block: present in ~33% of certs. + sap_flat_details=( + SapFlatDetails( + level=schema.sap_flat_details.level, + top_storey=schema.sap_flat_details.top_storey, + flat_location=schema.sap_flat_details.flat_location, + heat_loss_corridor=schema.sap_flat_details.heat_loss_corridor, + storey_count=schema.sap_flat_details.storey_count, + unheated_corridor_length_m=( + int(_measurement_value(schema.sap_flat_details.unheated_corridor_length)) + if schema.sap_flat_details.unheated_corridor_length is not None + else None + ), + ) + if schema.sap_flat_details is not None + else None + ), ) @staticmethod diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index f4531c52..8563180c 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -343,6 +343,7 @@ class RdSapSchema21_0_1: incandescent_fixed_lighting_bulbs_count: int # Fields below are present in some certs but absent in many real-world responses; # see datatypes/epc/schema/tests/fixtures/21_0_1_real.json for a representative cert. + air_tightness: Optional[EnergyElement] = None extract_fans_count: Optional[int] = None wet_rooms_count: Optional[int] = None open_chimneys_count: Optional[int] = None diff --git a/packages/domain/src/domain/ml/transform.py b/packages/domain/src/domain/ml/transform.py index 9f9fd79f..aa9aaa40 100644 --- a/packages/domain/src/domain/ml/transform.py +++ b/packages/domain/src/domain/ml/transform.py @@ -498,6 +498,83 @@ _FEATURE_COLUMNS: dict[str, ColumnSpec] = { dtype=int, nullable=True, description="Number of extract fans (ventilation/heat-loss proxy).", ), + # Heating — heating-system identity + flow temp + multi-system fraction. + "primary_sap_main_heating_code": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="SAP10 main heating type code (canonical heating-system enum).", + ), + "primary_emitter_temperature": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Primary heating emitter temperature class (0=standard, 1=low-temp).", + ), + "primary_main_heating_fraction": ColumnSpec( + dtype=float, nullable=True, + description="Fraction of space heating delivered by the primary main heating system.", + ), + # Hot water — immersion type + presence of shower outlet block. + "immersion_heating_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Electric immersion heater type SAP10 code.", + ), + "shower_outlet_count": ColumnSpec( + dtype=int, nullable=False, + description="1 if any shower_outlet block is declared on sap_heating, else 0.", + ), + # Windows — per-window-type share aggregates. + "window_pct_living": ColumnSpec( + dtype=float, nullable=True, + description="Area share of windows with window_type == 1 (living room).", + ), + "window_pct_external": ColumnSpec( + dtype=float, nullable=True, + description="Area share of windows with window_location == 0 (external).", + ), + "window_pct_permanent_shutters": ColumnSpec( + dtype=float, nullable=True, + description="Area share of windows with permanent_shutters_present truthy.", + ), + # Dwelling — conservatory + flat-only block. + "conservatory_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Conservatory SAP10 code (1=none, 2=heated, 3=unheated, ...).", + ), + "has_heated_separate_conservatory": ColumnSpec( + dtype=bool, nullable=True, + description="Whether the dwelling has a heated separate conservatory.", + ), + "flat_level": ColumnSpec( + dtype=int, nullable=True, + description="Flat-only: floor number on which the flat sits.", + ), + "flat_top_storey": ColumnSpec( + dtype=str, nullable=True, categorical=True, + description="Flat-only: Y/N flag indicating whether this is the top storey.", + ), + "flat_storey_count": ColumnSpec( + dtype=int, nullable=True, + description="Flat-only: storey count of the building containing the flat.", + ), + "flat_location": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Flat-only: location SAP10 code (corner/middle/...).", + ), + "flat_heat_loss_corridor": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Flat-only: heat-loss-corridor SAP10 code.", + ), + # Energy supply categoricals. + "meter_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Electricity meter type SAP10 code (1=Standard, 2=Off-peak, ...).", + ), + "pv_connection": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="PV connection topology SAP10 code.", + ), + "wind_turbines_terrain_type": ColumnSpec( + dtype=int, nullable=True, categorical=True, + description="Wind-turbine terrain type SAP10 code.", + ), } @@ -649,6 +726,28 @@ class EpcMlTransform: "number_baths": epc.sap_heating.number_baths, "number_baths_wwhrs": epc.sap_heating.number_baths_wwhrs, "extract_fans_count": epc.extract_fans_count, + # Features — conservatory + flat-only block + "conservatory_type": epc.conservatory_type, + "has_heated_separate_conservatory": epc.has_heated_separate_conservatory, + "flat_level": ( + _int_or_none(epc.sap_flat_details.level) if epc.sap_flat_details else None + ), + "flat_top_storey": ( + epc.sap_flat_details.top_storey if epc.sap_flat_details else None + ), + "flat_storey_count": ( + _int_or_none(epc.sap_flat_details.storey_count) if epc.sap_flat_details else None + ), + "flat_location": ( + _int_or_none(epc.sap_flat_details.flat_location) if epc.sap_flat_details else None + ), + "flat_heat_loss_corridor": ( + _int_or_none(epc.sap_flat_details.heat_loss_corridor) if epc.sap_flat_details else None + ), + # Features — energy supply categoricals + "meter_type": _meter_type_int(epc.sap_energy_source.meter_type), + "pv_connection": epc.sap_energy_source.pv_connection, + "wind_turbines_terrain_type": _wind_terrain_int(epc.sap_energy_source.wind_turbines_terrain_type), # Targets "sap_score": epc.energy_rating_current, "co2_emissions": epc.co2_emissions_current, @@ -762,6 +861,8 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]: domain object — Union-int values pass through as int categoricals; str values (from site notes) coerce to None. """ + shower_outlets = sap_heating.shower_outlets + shower_outlet_count = 1 if shower_outlets is not None else 0 aggregates: dict[str, Any] = { "main_heating_count": len(sap_heating.main_heating_details), "primary_main_fuel_type": None, @@ -772,6 +873,9 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]: "primary_fan_flue_present": None, "primary_boiler_flue_type": None, "primary_central_heating_pump_age": None, + "primary_sap_main_heating_code": None, + "primary_emitter_temperature": None, + "primary_main_heating_fraction": None, "water_heating_code": sap_heating.water_heating_code, "water_heating_fuel": sap_heating.water_heating_fuel, "cylinder_size": ( @@ -782,6 +886,8 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]: "cylinder_insulation_thickness_mm": sap_heating.cylinder_insulation_thickness_mm, "has_secondary_heating": sap_heating.secondary_fuel_type is not None, "secondary_fuel_type": sap_heating.secondary_fuel_type, + "immersion_heating_type": _int_or_none(sap_heating.immersion_heating_type), + "shower_outlet_count": shower_outlet_count, } if sap_heating.main_heating_details: @@ -806,6 +912,9 @@ def _heating_aggregates(sap_heating: SapHeating) -> dict[str, Any]: aggregates["primary_central_heating_pump_age"] = ( primary.central_heating_pump_age ) + aggregates["primary_sap_main_heating_code"] = primary.sap_main_heating_code + aggregates["primary_emitter_temperature"] = _int_or_none(primary.emitter_temperature) + aggregates["primary_main_heating_fraction"] = primary.main_heating_fraction return aggregates @@ -854,6 +963,25 @@ def _int_or_none(value: Any) -> Optional[int]: return value if isinstance(value, int) else None +def _meter_type_int(value: Any) -> Optional[int]: + """Domain mapper coerces sap_energy_source.meter_type to str(int) for site-notes + compatibility ("1", "2", ...). Parse back to int for the categorical feature.""" + if isinstance(value, int): + return value + if isinstance(value, str) and value.isdigit(): + return int(value) + return None + + +def _wind_terrain_int(value: Any) -> Optional[int]: + """Same shape as meter_type — int coerced to str by the 21.0.x mapper.""" + if isinstance(value, int): + return value + if isinstance(value, str) and value.isdigit(): + return int(value) + return None + + def _ground_floor(part: SapBuildingPart) -> Optional[Any]: """Pick the ground-floor `SapFloorDimension` (floor==0) for a building part. @@ -987,6 +1115,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: "window_avg_solar_transmittance": None, **glazed_type_areas, "window_pct_pvc_frame": None, + "window_pct_living": None, + "window_pct_external": None, + "window_pct_permanent_shutters": None, } if not windows: return aggregates @@ -994,6 +1125,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: total_area = 0.0 draught_proofed_area = 0.0 pvc_frame_area = 0.0 + living_area = 0.0 + external_area = 0.0 + shutters_area = 0.0 transmission_area = 0.0 weighted_u_value = 0.0 weighted_solar_transmittance = 0.0 @@ -1004,6 +1138,12 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: draught_proofed_area += area if w.frame_material == "PVC": pvc_frame_area += area + if w.window_type == 1: # living room + living_area += area + if w.window_location == 0: # external (not conservatory) + external_area += area + if w.permanent_shutters_present is True or w.permanent_shutters_present == "Y": + shutters_area += area if isinstance(w.orientation, int) and w.orientation in _OCTANT_NAMES: octant_areas[_OCTANT_NAMES[w.orientation]] += area if isinstance(w.glazing_type, int) and w.glazing_type in _GLAZED_TYPE_CODES: @@ -1025,6 +1165,9 @@ def _window_aggregates(windows: list[SapWindow]) -> dict[str, Any]: draught_proofed_area / total_area * 100.0 ) aggregates["window_pct_pvc_frame"] = pvc_frame_area / total_area + aggregates["window_pct_living"] = living_area / total_area + aggregates["window_pct_external"] = external_area / total_area + aggregates["window_pct_permanent_shutters"] = shutters_area / total_area for column, area in glazed_type_areas.items(): aggregates[column] = area / total_area if transmission_area > 0: