From 792f76f2faa85de657a03d3432bd5f1dbe4a79ca Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 12 Jun 2026 12:29:36 +0000 Subject: [PATCH] =?UTF-8?q?Map=20RdSAP-Schema-19.0=20certs=20to=20EpcPrope?= =?UTF-8?q?rtyData=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dispatch RdSAP-Schema-19.0 through from_api_response, parse-fix the schema (data-driven required->optional, validated against the 1000-cert 19.0 corpus per ADR-0028), and port 18.0's defensive mapper reads (dwelling_type str/dict/ number, photovoltaic_supply guard, sap_room_in_roof Measurement coercion). All 1000 corpus certs now parse and map. Co-Authored-By: Claude Opus 4.8 (1M context) --- datatypes/epc/domain/mapper.py | 29 +++++- datatypes/epc/schema/rdsap_schema_19_0.py | 108 +++++++++++++++------- 2 files changed, 100 insertions(+), 37 deletions(-) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index d8754292..4b854ae9 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1023,7 +1023,15 @@ class EpcPropertyDataMapper: uprn=schema.uprn, assessment_type=schema.assessment_type, sap_version=schema.sap_version, - dwelling_type=schema.dwelling_type.value, + # ADR-0028: 19.0 lodges dwelling_type as str (503/1000), localised + # dict, or a plain number — coerce all three to str. + dwelling_type=( + schema.dwelling_type + if isinstance(schema.dwelling_type, str) + else schema.dwelling_type.value + if hasattr(schema.dwelling_type, "value") + else str(schema.dwelling_type) + ), property_type=str(schema.property_type), built_form=str(schema.built_form), address_line_1=schema.address_line_1, @@ -1118,7 +1126,9 @@ class EpcPropertyDataMapper: percent_roof_area=es.photovoltaic_supply.none_or_no_details.percent_roof_area, ) ) - if es.photovoltaic_supply + # ADR-0028: photovoltaic_supply can be absent, None, or a + # sparse shape without none_or_no_details — guard the read. + if getattr(es.photovoltaic_supply, "none_or_no_details", None) else None ), ), @@ -1172,8 +1182,11 @@ class EpcPropertyDataMapper: ), sap_room_in_roof=( SapRoomInRoof( - # floor_area is a Measurement in 19.0 - floor_area=bp.sap_room_in_roof.floor_area.value, + # ADR-0028: floor_area is usually a Measurement but + # some certs lodge a plain number — coerce both. + floor_area=_measurement_value( + bp.sap_room_in_roof.floor_area + ), construction_age_band=bp.sap_room_in_roof.construction_age_band, ) if bp.sap_room_in_roof @@ -2173,6 +2186,14 @@ class EpcPropertyDataMapper: from_dict(RdSapSchema20_0_0, data) ) ) + if schema == "RdSAP-Schema-19.0": + from datatypes.epc.schema.rdsap_schema_19_0 import RdSapSchema19_0 + + return _clear_basement_flag_when_system_built( + EpcPropertyDataMapper.from_rdsap_schema_19_0( + from_dict(RdSapSchema19_0, data) + ) + ) if schema == "RdSAP-Schema-18.0": from datatypes.epc.schema.rdsap_schema_18_0 import RdSapSchema18_0 diff --git a/datatypes/epc/schema/rdsap_schema_19_0.py b/datatypes/epc/schema/rdsap_schema_19_0.py index b3c77ec4..b91dff35 100644 --- a/datatypes/epc/schema/rdsap_schema_19_0.py +++ b/datatypes/epc/schema/rdsap_schema_19_0.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import List, Optional, Union from .common import CostAmount, DescriptionV1, Measurement @@ -60,7 +60,9 @@ class PhotovoltaicSupplyNoneOrNoDetails: @dataclass class PhotovoltaicSupply: - none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails + # ADR-0028 data-driven required→optional: the photovoltaic_supply block can + # arrive without its none_or_no_details child (matches 18.0). + none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None @dataclass @@ -85,28 +87,37 @@ class SapFloorDimension: @dataclass class SapRoomInRoof: - floor_area: Measurement + """Room-in-roof details. floor_area is usually a Measurement object but some + certs lodge it as a plain number (ADR-0028, as in 18.0) — read via + `_measurement_value`, which coerces both shapes.""" + + floor_area: Union[Measurement, int, float] insulation: str roof_room_connected: str construction_age_band: str -@dataclass +@dataclass(kw_only=True) class SapBuildingPart: - identifier: str - wall_dry_lined: str - wall_thickness: int - floor_heat_loss: int - roof_construction: int - wall_construction: int - building_part_number: int - sap_floor_dimensions: List[SapFloorDimension] - wall_insulation_type: int - construction_age_band: str - party_wall_construction: Union[int, str] - wall_thickness_measured: str - roof_insulation_location: Union[int, str] - roof_insulation_thickness: Union[str, int] + # Data-driven required→optional (ADR-0028): a conservatory-shaped part can + # carry only a subset of fields (none of the construction fields). Every + # field is Optional (the 21.0.1/20.0.0/18.0 precedent); the sparse part flows + # through harmlessly. 19.0 corpus: 6/1000 omit roof_insulation_thickness, + # 2/1000 omit identifier. + identifier: Optional[str] = None + wall_dry_lined: Optional[str] = None + wall_thickness: Optional[int] = None + floor_heat_loss: Optional[int] = None + roof_construction: Optional[int] = None + wall_construction: Optional[int] = None + building_part_number: Optional[int] = None + sap_floor_dimensions: Optional[List[SapFloorDimension]] = None + wall_insulation_type: Optional[int] = None + construction_age_band: Optional[str] = None + party_wall_construction: Optional[Union[int, str]] = None + wall_thickness_measured: Optional[str] = None + roof_insulation_location: Optional[Union[int, str]] = None + roof_insulation_thickness: Optional[Union[str, int]] = None sap_room_in_roof: Optional[SapRoomInRoof] = None wall_insulation_thickness: Optional[str] = None floor_insulation_thickness: Optional[str] = None @@ -145,15 +156,18 @@ class SuggestedImprovement: environmental_impact_rating: int -@dataclass +@dataclass(kw_only=True) class AlternativeImprovement: - sequence: int - typical_saving: CostAmount - improvement_type: str - improvement_details: ImprovementDetails - improvement_category: int - energy_performance_rating: int - environmental_impact_rating: int + # ADR-0028: some certs lodge a reduced alternative-improvement shape (only + # improvement_details/-type). Parse-only — the mapper does not read + # alternative_improvements — so every field is Optional. + sequence: Optional[int] = None + typical_saving: Optional[CostAmount] = None + improvement_type: Optional[str] = None + improvement_details: Optional[ImprovementDetails] = None + improvement_category: Optional[int] = None + energy_performance_rating: Optional[int] = None + environmental_impact_rating: Optional[int] = None @dataclass @@ -165,6 +179,20 @@ class RenewableHeatIncentive: @dataclass +class SapWindow: + """Per-window geometry. ADR-0028: only 6/1000 19.0 certs lodge this array; + window_area arrives as a Measurement and is read via `_measurement_value`. + Mirrors the 20.0.0/18.0 SapWindow shape. This is the per-spec Validation + Cohort — its lodged geometry is used directly, never synthesised over.""" + + orientation: int + window_area: float + window_type: int + glazing_type: int + window_location: int + + +@dataclass(kw_only=True) class RdSapSchema19_0: uprn: int roofs: List[EnergyElement] @@ -180,6 +208,9 @@ class RdSapSchema19_0: built_form: int door_count: int glazed_area: int + # ADR-0028: glazing_gap is lodged as int (162/1000), str (357/1000), or + # omitted (481/1000) — widen + default, not int-required. + glazing_gap: Optional[Union[int, str]] = None region_code: int report_type: int sap_heating: SapHeating @@ -188,7 +219,9 @@ class RdSapSchema19_0: uprn_source: str country_code: str main_heating: List[EnergyElement] - dwelling_type: DescriptionV1 + # ADR-0028: 503/1000 lodge dwelling_type as a plain str, not a localised + # DescriptionV1 object (matches 20.0.0/18.0). Widen so both shapes parse. + dwelling_type: Union[str, DescriptionV1] language_code: int property_type: int address_line_1: str @@ -201,11 +234,13 @@ class RdSapSchema19_0: transaction_type: int conservatory_type: int heated_room_count: int - pvc_window_frames: str + # ADR-0028: missing in 314/1000 — widen + default. + pvc_window_frames: Optional[str] = None registration_date: str sap_energy_source: SapEnergySource secondary_heating: EnergyElement - lzc_energy_sources: List[int] + # ADR-0028: present in only 35/1000 — default to empty. + lzc_energy_sources: List[int] = field(default_factory=list) sap_building_parts: List[SapBuildingPart] low_energy_lighting: int solar_water_heating: str @@ -217,21 +252,24 @@ class RdSapSchema19_0: energy_rating_current: int lighting_cost_current: CostAmount main_heating_controls: List[EnergyElement] - multiple_glazing_type: int + # ADR-0028: lodged as an int code (1-7) or the string "ND" (Not Defined, + # 50/1000) — widen so both parse; the synthesis maps "ND" to a default. + multiple_glazing_type: Union[int, str] open_fireplaces_count: int has_hot_water_cylinder: str heating_cost_potential: CostAmount hot_water_cost_current: CostAmount mechanical_ventilation: int percent_draughtproofed: int - suggested_improvements: List[SuggestedImprovement] + suggested_improvements: List[SuggestedImprovement] = field(default_factory=list) co2_emissions_potential: float energy_rating_potential: int lighting_cost_potential: CostAmount schema_version_original: str hot_water_cost_potential: CostAmount renewable_heat_incentive: RenewableHeatIncentive - windows_transmission_details: WindowsTransmissionDetails + # 19.0-specific block, absent in 713/1000 — Optional + default. + windows_transmission_details: Optional[WindowsTransmissionDetails] = None energy_consumption_current: int has_fixed_air_conditioning: str multiple_glazed_proportion: int @@ -247,5 +285,9 @@ class RdSapSchema19_0: low_energy_fixed_lighting_outlets_count: int sap_flat_details: Optional[SapFlatDetails] = None address_line_2: Optional[str] = None - glazing_gap: Optional[Union[str, int]] = None alternative_improvements: Optional[List[AlternativeImprovement]] = None + # ADR-0028: additive — the placeholder schema omitted sap_windows entirely, + # silently dropping the 6 rich certs' lodged per-window geometry. Capture it + # so the mapper can use lodged window_area directly (default [] = windowless, + # synthesised from the glazed_area band). + sap_windows: List[SapWindow] = field(default_factory=list)