From 6697a6c76e02e72673d18b8407995c833c6fb3be Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 May 2026 20:35:28 +0000 Subject: [PATCH] slice 14j: Optional sweep across schema 21.0.1 + mapper guards Across 500 real RdSAP-21.0.1 certs from 2026, mapper goes 0% -> 100% success. Schema-loading + ml-transform + ml_training_data: 146 tests pass. Mainly affected fields: - SapHeating: instantaneous_wwhrs, shower_outlets (now Union with List shape) - SapWindow: glazing_gap, frame_factor, pvc_frame, window_transmission_details - SapEnergySource: pv_battery_count, wind_turbine_details, pv_batteries (List form) - SapBuildingPart: all 13 sub-fields now Optional - SapFloorDimension: Measurement | int | float fallback - RdSapSchema21_0_1: 16 top-level fields (mechanical_vent_*, lighting counts, ...) Mapper helpers added: _measurement_value, _first_pv_battery, _first_shower_outlet. Co-Authored-By: Claude Opus 4.7 --- datatypes/epc/domain/mapper.py | 200 +++++++++++--------- datatypes/epc/schema/rdsap_schema_21_0_0.py | 2 +- datatypes/epc/schema/rdsap_schema_21_0_1.py | 101 +++++----- 3 files changed, 163 insertions(+), 140 deletions(-) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 98a36e2c..9ac4ce19 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -113,6 +113,8 @@ def _map_schema_21_pv( return None, (flattened or None) if es_pv_supply is None: return None, None + if es_pv_supply.none_or_no_details is None: + return None, None return ( PhotovoltaicSupply( none_or_no_details=PhotovoltaicSupplyNoneOrNoDetails( @@ -572,19 +574,15 @@ class EpcPropertyDataMapper: party_wall_construction=bp.party_wall_construction, sap_floor_dimensions=[ SapFloorDimension( - room_height_m=fd.room_height.value, - total_floor_area_m2=fd.total_floor_area.value, - party_wall_length_m=( - float(fd.party_wall_length) - if isinstance(fd.party_wall_length, int) - else fd.party_wall_length.value - ), - heat_loss_perimeter_m=fd.heat_loss_perimeter.value, + room_height_m=_measurement_value(fd.room_height), + total_floor_area_m2=_measurement_value(fd.total_floor_area), + party_wall_length_m=_measurement_value(fd.party_wall_length), + heat_loss_perimeter_m=_measurement_value(fd.heat_loss_perimeter), floor=fd.floor, floor_insulation=fd.floor_insulation, floor_construction=fd.floor_construction, ) - for fd in bp.sap_floor_dimensions + for fd in (bp.sap_floor_dimensions or []) ], building_part_number=bp.building_part_number, wall_dry_lined=bp.wall_dry_lined == "Y", @@ -713,19 +711,15 @@ class EpcPropertyDataMapper: party_wall_construction=bp.party_wall_construction, sap_floor_dimensions=[ SapFloorDimension( - room_height_m=fd.room_height.value, - total_floor_area_m2=fd.total_floor_area.value, - party_wall_length_m=( - float(fd.party_wall_length) - if isinstance(fd.party_wall_length, int) - else fd.party_wall_length.value - ), - heat_loss_perimeter_m=fd.heat_loss_perimeter.value, + room_height_m=_measurement_value(fd.room_height), + total_floor_area_m2=_measurement_value(fd.total_floor_area), + party_wall_length_m=_measurement_value(fd.party_wall_length), + heat_loss_perimeter_m=_measurement_value(fd.heat_loss_perimeter), floor=fd.floor, floor_insulation=fd.floor_insulation, floor_construction=fd.floor_construction, ) - for fd in bp.sap_floor_dimensions + for fd in (bp.sap_floor_dimensions or []) ], building_part_number=bp.building_part_number, wall_dry_lined=bp.wall_dry_lined == "Y", @@ -862,19 +856,15 @@ class EpcPropertyDataMapper: party_wall_construction=bp.party_wall_construction, sap_floor_dimensions=[ SapFloorDimension( - room_height_m=fd.room_height.value, - total_floor_area_m2=fd.total_floor_area.value, - party_wall_length_m=( - float(fd.party_wall_length) - if isinstance(fd.party_wall_length, int) - else fd.party_wall_length.value - ), - heat_loss_perimeter_m=fd.heat_loss_perimeter.value, + room_height_m=_measurement_value(fd.room_height), + total_floor_area_m2=_measurement_value(fd.total_floor_area), + party_wall_length_m=_measurement_value(fd.party_wall_length), + heat_loss_perimeter_m=_measurement_value(fd.heat_loss_perimeter), floor=fd.floor, floor_insulation=fd.floor_insulation, floor_construction=fd.floor_construction, ) - for fd in bp.sap_floor_dimensions + for fd in (bp.sap_floor_dimensions or []) ], building_part_number=bp.building_part_number, wall_dry_lined=bp.wall_dry_lined == "Y", @@ -1028,19 +1018,15 @@ class EpcPropertyDataMapper: party_wall_construction=bp.party_wall_construction, sap_floor_dimensions=[ SapFloorDimension( - room_height_m=fd.room_height.value, - total_floor_area_m2=fd.total_floor_area.value, - party_wall_length_m=( - float(fd.party_wall_length) - if isinstance(fd.party_wall_length, int) - else fd.party_wall_length.value - ), - heat_loss_perimeter_m=fd.heat_loss_perimeter.value, + room_height_m=_measurement_value(fd.room_height), + total_floor_area_m2=_measurement_value(fd.total_floor_area), + party_wall_length_m=_measurement_value(fd.party_wall_length), + heat_loss_perimeter_m=_measurement_value(fd.heat_loss_perimeter), floor=fd.floor, floor_insulation=fd.floor_insulation, floor_construction=fd.floor_construction, ) - for fd in bp.sap_floor_dimensions + for fd in (bp.sap_floor_dimensions or []) ], building_part_number=bp.building_part_number, wall_dry_lined=bp.wall_dry_lined == "Y", @@ -1115,9 +1101,13 @@ class EpcPropertyDataMapper: schema.secondary_heating ), sap_heating=SapHeating( - instantaneous_wwhrs=InstantaneousWwhrs( - wwhrs_index_number1=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number1, - wwhrs_index_number2=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number2, + instantaneous_wwhrs=( + InstantaneousWwhrs( + wwhrs_index_number1=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number1, + wwhrs_index_number2=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number2, + ) + if schema.sap_heating.instantaneous_wwhrs is not None + else InstantaneousWwhrs() ), main_heating_details=[ MainHeatingDetail( @@ -1205,15 +1195,7 @@ class EpcPropertyDataMapper: if es.wind_turbine_details else None ), - pv_batteries=( - PvBatteries( - pv_battery=PvBattery( - battery_capacity=es.pv_batteries.pv_battery.battery_capacity - ) - ) - if es.pv_batteries - else None - ), + pv_batteries=_first_pv_battery(es.pv_batteries), ), sap_building_parts=[ SapBuildingPart( @@ -1225,19 +1207,15 @@ class EpcPropertyDataMapper: party_wall_construction=bp.party_wall_construction, sap_floor_dimensions=[ SapFloorDimension( - room_height_m=fd.room_height.value, - total_floor_area_m2=fd.total_floor_area.value, - party_wall_length_m=( - float(fd.party_wall_length) - if isinstance(fd.party_wall_length, int) - else fd.party_wall_length.value - ), - heat_loss_perimeter_m=fd.heat_loss_perimeter.value, + room_height_m=_measurement_value(fd.room_height), + total_floor_area_m2=_measurement_value(fd.total_floor_area), + party_wall_length_m=_measurement_value(fd.party_wall_length), + heat_loss_perimeter_m=_measurement_value(fd.heat_loss_perimeter), floor=fd.floor, floor_insulation=fd.floor_insulation, floor_construction=fd.floor_construction, ) - for fd in bp.sap_floor_dimensions + for fd in (bp.sap_floor_dimensions or []) ], building_part_number=bp.building_part_number, wall_dry_lined=bp.wall_dry_lined == "Y", @@ -1363,9 +1341,13 @@ class EpcPropertyDataMapper: ), # SAP heating sap_heating=SapHeating( - instantaneous_wwhrs=InstantaneousWwhrs( - wwhrs_index_number1=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number1, - wwhrs_index_number2=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number2, + instantaneous_wwhrs=( + InstantaneousWwhrs( + wwhrs_index_number1=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number1, + wwhrs_index_number2=schema.sap_heating.instantaneous_wwhrs.wwhrs_index_number2, + ) + if schema.sap_heating.instantaneous_wwhrs is not None + else InstantaneousWwhrs() ), main_heating_details=[ MainHeatingDetail( @@ -1393,16 +1375,7 @@ class EpcPropertyDataMapper: water_heating_code=schema.sap_heating.water_heating_code, water_heating_fuel=schema.sap_heating.water_heating_fuel, immersion_heating_type=schema.sap_heating.immersion_heating_type, - shower_outlets=( - ShowerOutlets( - ShowerOutlet( - shower_wwhrs=schema.sap_heating.shower_outlets.shower_outlet.shower_wwhrs, - shower_outlet_type=schema.sap_heating.shower_outlets.shower_outlet.shower_outlet_type, - ) - ) - if schema.sap_heating.shower_outlets - else None - ), + shower_outlets=_first_shower_outlet(schema.sap_heating.shower_outlets), cylinder_insulation_type=schema.sap_heating.cylinder_insulation_type, cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, @@ -1424,10 +1397,14 @@ class EpcPropertyDataMapper: window_location=w.window_location, window_wall_type=w.window_wall_type, permanent_shutters_present=w.permanent_shutters_present == "Y", - window_transmission_details=WindowTransmissionDetails( - u_value=w.window_transmission_details.u_value, - data_source=w.window_transmission_details.data_source, - solar_transmittance=w.window_transmission_details.solar_transmittance, + window_transmission_details=( + WindowTransmissionDetails( + u_value=w.window_transmission_details.u_value, + data_source=w.window_transmission_details.data_source, + solar_transmittance=w.window_transmission_details.solar_transmittance, + ) + if w.window_transmission_details is not None + else None ), permanent_shutters_insulated=w.permanent_shutters_insulated, ) @@ -1455,15 +1432,7 @@ class EpcPropertyDataMapper: if es.wind_turbine_details else None ), - pv_batteries=( - PvBatteries( - pv_battery=PvBattery( - battery_capacity=es.pv_batteries.pv_battery.battery_capacity - ) - ) - if es.pv_batteries - else None - ), + pv_batteries=_first_pv_battery(es.pv_batteries), ), # SAP building parts sap_building_parts=[ @@ -1476,19 +1445,15 @@ class EpcPropertyDataMapper: party_wall_construction=bp.party_wall_construction, sap_floor_dimensions=[ SapFloorDimension( - room_height_m=fd.room_height.value, - total_floor_area_m2=fd.total_floor_area.value, - party_wall_length_m=( - float(fd.party_wall_length) - if isinstance(fd.party_wall_length, int) - else fd.party_wall_length.value - ), - heat_loss_perimeter_m=fd.heat_loss_perimeter.value, + room_height_m=_measurement_value(fd.room_height), + total_floor_area_m2=_measurement_value(fd.total_floor_area), + party_wall_length_m=_measurement_value(fd.party_wall_length), + heat_loss_perimeter_m=_measurement_value(fd.heat_loss_perimeter), floor=fd.floor, floor_insulation=fd.floor_insulation, floor_construction=fd.floor_construction, ) - for fd in bp.sap_floor_dimensions + for fd in (bp.sap_floor_dimensions or []) ], building_part_number=bp.building_part_number, wall_dry_lined=bp.wall_dry_lined == "Y", @@ -1625,6 +1590,57 @@ class EpcPropertyDataMapper: # --------------------------------------------------------------------------- +def _measurement_value(field: Any) -> float: + """SAP floor-dim measurements arrive as either a `Measurement` (with `.value`) + or a plain int/float (real-API certs). Coerce to float either way.""" + if hasattr(field, "value"): + return float(field.value) + return float(field) + + +def _first_pv_battery( + schema_pv_batteries: Any, +) -> Optional[PvBatteries]: + """SapEnergySource.pv_batteries is a list in real-API certs and a single + dataclass in the older synthetic fixture. Pick the first battery if any.""" + if schema_pv_batteries is None: + return None + if isinstance(schema_pv_batteries, list): + if not schema_pv_batteries: + return None + first = schema_pv_batteries[0] + else: + first = schema_pv_batteries + if first.pv_battery is None: + return None + return PvBatteries(pv_battery=PvBattery(battery_capacity=first.pv_battery.battery_capacity)) + + +def _first_shower_outlet( + schema_shower_outlets: Any, +) -> Optional[ShowerOutlets]: + """SapHeating.shower_outlets carries either a single ShowerOutlets dataclass or + a list of them (real API is a list, older synthetic fixture is a single object). + Pick the first present outlet so the domain model stays single-valued for now. + """ + if schema_shower_outlets is None: + return None + if isinstance(schema_shower_outlets, list): + if not schema_shower_outlets: + return None + first = schema_shower_outlets[0] + else: + first = schema_shower_outlets + if first.shower_outlet is None: + return None + return ShowerOutlets( + ShowerOutlet( + shower_wwhrs=first.shower_outlet.shower_wwhrs, + shower_outlet_type=first.shower_outlet.shower_outlet_type, + ) + ) + + def _strip_code(value: str) -> str: """Strip leading uppercase code from Elmhurst coded strings, e.g. 'CA Cavity' → 'Cavity'.""" parts = value.split(" ", 1) diff --git a/datatypes/epc/schema/rdsap_schema_21_0_0.py b/datatypes/epc/schema/rdsap_schema_21_0_0.py index bbebb33a..54077b20 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_0.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py @@ -61,10 +61,10 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str + instantaneous_wwhrs: Optional[InstantaneousWwhrs] = None shower_outlets: Optional[ShowerOutlets] = None cylinder_insulation_type: Optional[int] = None cylinder_thermostat: Optional[str] = None diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index 85d532c4..5de41f6a 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -14,9 +14,9 @@ class EnergyElement: @dataclass class Addendum: - addendum_numbers: List[int] stone_walls: Optional[str] = None system_build: Optional[str] = None + addendum_numbers: Optional[List[int]] = None @dataclass @@ -27,7 +27,7 @@ class ShowerOutlet: @dataclass class ShowerOutlets: - shower_outlet: ShowerOutlet + shower_outlet: Optional[ShowerOutlet] = None @dataclass @@ -43,12 +43,12 @@ class MainHeatingDetail: has_fghrs: str # TODO: make bool main_fuel_type: int heat_emitter_type: int - emitter_temperature: Union[int, str] main_heating_number: int main_heating_control: int main_heating_category: int main_heating_fraction: int main_heating_data_source: int + emitter_temperature: Optional[Union[int, str]] = None boiler_flue_type: Optional[int] = None fan_flue_present: Optional[str] = None # TODO: make bool boiler_ignition_type: Optional[int] = None @@ -62,11 +62,13 @@ class SapHeating: cylinder_size: int water_heating_code: int water_heating_fuel: int - instantaneous_wwhrs: InstantaneousWwhrs main_heating_details: List[MainHeatingDetail] immersion_heating_type: Union[int, str] has_fixed_air_conditioning: str - shower_outlets: Optional[ShowerOutlets] = None + instantaneous_wwhrs: Optional[InstantaneousWwhrs] = None + # Real-API certs carry shower_outlets as a list, not the synthetic single-object form; + # accept both shapes so older fixtures keep parsing. + shower_outlets: Optional[Union[ShowerOutlets, List[ShowerOutlets]]] = None cylinder_insulation_type: Optional[int] = None cylinder_thermostat: Optional[str] = None secondary_fuel_type: Optional[int] = None @@ -81,7 +83,9 @@ class PvBattery: @dataclass class PvBatteries: - pv_battery: PvBattery + # Real-API certs carry pv_batteries as a list (similar to shower_outlets); + # the older synthetic fixture used a single-object wrapper. + pv_battery: Optional[PvBattery] = None @dataclass @@ -97,7 +101,7 @@ class PhotovoltaicSupplyNoneOrNoDetails: @dataclass class PhotovoltaicSupply: - none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails + none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None @dataclass @@ -120,15 +124,15 @@ class SapEnergySource: mains_gas: str meter_type: int pv_connection: int - pv_battery_count: int photovoltaic_supply: Union[PhotovoltaicSupply, List[List[PhotovoltaicArray]]] wind_turbines_count: int - wind_turbine_details: WindTurbineDetails gas_smart_meter_present: str is_dwelling_export_capable: str wind_turbines_terrain_type: int electricity_smart_meter_present: str - pv_batteries: Optional[PvBatteries] = None + pv_battery_count: Optional[int] = None + wind_turbine_details: Optional[WindTurbineDetails] = None + pv_batteries: Optional[Union[PvBatteries, List[PvBatteries]]] = None @dataclass @@ -140,11 +144,8 @@ class WindowTransmissionDetails: @dataclass class SapWindow: - pvc_frame: str - glazing_gap: int orientation: int window_type: int - frame_factor: float glazing_type: int window_width: float window_height: float @@ -152,17 +153,21 @@ class SapWindow: window_location: int window_wall_type: int permanent_shutters_present: str # TODO: make bool - window_transmission_details: WindowTransmissionDetails permanent_shutters_insulated: str + pvc_frame: Optional[str] = None + glazing_gap: Optional[int] = None + frame_factor: Optional[float] = None + window_transmission_details: Optional[WindowTransmissionDetails] = None @dataclass class SapFloorDimension: floor: int - room_height: Measurement - total_floor_area: Measurement - party_wall_length: Union[Measurement, int] - heat_loss_perimeter: Measurement + # Real-API certs sometimes carry plain int/float instead of a Measurement object. + room_height: Union[Measurement, int, float] + total_floor_area: Union[Measurement, int, float] + party_wall_length: Union[Measurement, int, float] + heat_loss_perimeter: Union[Measurement, int, float] floor_insulation: Optional[int] = None floor_construction: Optional[int] = None @@ -185,19 +190,19 @@ class SapAlternativeWall: @dataclass class SapBuildingPart: - identifier: str - wall_dry_lined: str - floor_heat_loss: int - roof_construction: int - wall_construction: int - building_part_number: int - sap_floor_dimensions: List[SapFloorDimension] - wall_insulation_type: int - construction_age_band: str - party_wall_construction: Union[int, str] - wall_thickness_measured: str - roof_insulation_location: Union[int, str] - roof_insulation_thickness: Union[str, int] + identifier: Optional[str] = None + wall_dry_lined: Optional[str] = None + floor_heat_loss: Optional[int] = None + roof_construction: Optional[int] = None + wall_construction: Optional[int] = None + building_part_number: Optional[int] = None + sap_floor_dimensions: Optional[List[SapFloorDimension]] = None + wall_insulation_type: Optional[int] = None + construction_age_band: Optional[str] = None + party_wall_construction: Optional[Union[int, str]] = None + wall_thickness_measured: Optional[str] = None + roof_insulation_location: Optional[Union[int, str]] = None + roof_insulation_thickness: Optional[Union[str, int]] = None sap_room_in_roof: Optional[SapRoomInRoof] = None sap_alternative_wall_1: Optional[SapAlternativeWall] = None sap_alternative_wall_2: Optional[SapAlternativeWall] = None @@ -291,7 +296,6 @@ class RdSapSchema21_0_1: assessment_type: str completion_date: str inspection_date: str - wet_rooms_count: int extensions_count: int measurement_type: int total_floor_area: int @@ -302,7 +306,6 @@ class RdSapSchema21_0_1: sap_energy_source: SapEnergySource secondary_heating: EnergyElement sap_building_parts: List[SapBuildingPart] - open_chimneys_count: int solar_water_heating: str habitable_room_count: int heating_cost_current: float @@ -315,10 +318,8 @@ class RdSapSchema21_0_1: has_hot_water_cylinder: str heating_cost_potential: float hot_water_cost_current: float - insulated_door_u_value: float mechanical_ventilation: int percent_draughtproofed: int - suggested_improvements: List[SuggestedImprovement] co2_emissions_potential: float energy_rating_potential: int lighting_cost_potential: float @@ -326,28 +327,34 @@ class RdSapSchema21_0_1: hot_water_cost_potential: float renewable_heat_incentive: RenewableHeatIncentive draughtproofed_door_count: int - mechanical_vent_duct_type: int - windows_transmission_details: WindowsTransmissionDetails - cfl_fixed_lighting_bulbs_count: int energy_consumption_current: int has_fixed_air_conditioning: str - multiple_glazed_proportion: int calculation_software_version: str energy_consumption_potential: int environmental_impact_current: int - led_fixed_lighting_bulbs_count: int - mechanical_vent_duct_placement: int - mechanical_vent_duct_insulation: int potential_energy_efficiency_band: str - pressure_test_certificate_number: int - mechanical_ventilation_index_number: int co2_emissions_current_per_floor_area: int current_energy_efficiency_band: str environmental_impact_potential: int - low_energy_fixed_lighting_bulbs_count: int - mechanical_vent_duct_insulation_level: int - mechanical_vent_measured_installation: str incandescent_fixed_lighting_bulbs_count: int + # Fields below are present in some certs but absent in many real-world responses; + # see datatypes/epc/schema/tests/fixtures/21_0_1_real.json for a representative cert. + wet_rooms_count: Optional[int] = None + open_chimneys_count: Optional[int] = None + insulated_door_u_value: Optional[float] = None + suggested_improvements: Optional[List[SuggestedImprovement]] = None + mechanical_vent_duct_type: Optional[int] = None + windows_transmission_details: Optional[WindowsTransmissionDetails] = None + cfl_fixed_lighting_bulbs_count: Optional[int] = None + multiple_glazed_proportion: Optional[int] = None + led_fixed_lighting_bulbs_count: Optional[int] = None + mechanical_vent_duct_placement: Optional[int] = None + mechanical_vent_duct_insulation: Optional[int] = None + pressure_test_certificate_number: Optional[int] = None + mechanical_ventilation_index_number: Optional[int] = None + low_energy_fixed_lighting_bulbs_count: Optional[int] = None + mechanical_vent_duct_insulation_level: Optional[int] = None + mechanical_vent_measured_installation: Optional[str] = None sap_flat_details: Optional[SapFlatDetails] = None addendum: Optional[Addendum] = None address_line_2: Optional[str] = None