Map RdSAP-Schema-19.0 certs to EpcPropertyData 🟩

Dispatch RdSAP-Schema-19.0 through from_api_response, parse-fix the schema
(data-driven required->optional, validated against the 1000-cert 19.0 corpus
per ADR-0028), and port 18.0's defensive mapper reads (dwelling_type str/dict/
number, photovoltaic_supply guard, sap_room_in_roof Measurement coercion).
All 1000 corpus certs now parse and map.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jun-te Kim 2026-06-12 12:29:36 +00:00
parent 5178197dc2
commit 792f76f2fa
2 changed files with 100 additions and 37 deletions

View file

@ -1023,7 +1023,15 @@ class EpcPropertyDataMapper:
uprn=schema.uprn,
assessment_type=schema.assessment_type,
sap_version=schema.sap_version,
dwelling_type=schema.dwelling_type.value,
# ADR-0028: 19.0 lodges dwelling_type as str (503/1000), localised
# dict, or a plain number — coerce all three to str.
dwelling_type=(
schema.dwelling_type
if isinstance(schema.dwelling_type, str)
else schema.dwelling_type.value
if hasattr(schema.dwelling_type, "value")
else str(schema.dwelling_type)
),
property_type=str(schema.property_type),
built_form=str(schema.built_form),
address_line_1=schema.address_line_1,
@ -1118,7 +1126,9 @@ class EpcPropertyDataMapper:
percent_roof_area=es.photovoltaic_supply.none_or_no_details.percent_roof_area,
)
)
if es.photovoltaic_supply
# ADR-0028: photovoltaic_supply can be absent, None, or a
# sparse shape without none_or_no_details — guard the read.
if getattr(es.photovoltaic_supply, "none_or_no_details", None)
else None
),
),
@ -1172,8 +1182,11 @@ class EpcPropertyDataMapper:
),
sap_room_in_roof=(
SapRoomInRoof(
# floor_area is a Measurement in 19.0
floor_area=bp.sap_room_in_roof.floor_area.value,
# ADR-0028: floor_area is usually a Measurement but
# some certs lodge a plain number — coerce both.
floor_area=_measurement_value(
bp.sap_room_in_roof.floor_area
),
construction_age_band=bp.sap_room_in_roof.construction_age_band,
)
if bp.sap_room_in_roof
@ -2173,6 +2186,14 @@ class EpcPropertyDataMapper:
from_dict(RdSapSchema20_0_0, data)
)
)
if schema == "RdSAP-Schema-19.0":
from datatypes.epc.schema.rdsap_schema_19_0 import RdSapSchema19_0
return _clear_basement_flag_when_system_built(
EpcPropertyDataMapper.from_rdsap_schema_19_0(
from_dict(RdSapSchema19_0, data)
)
)
if schema == "RdSAP-Schema-18.0":
from datatypes.epc.schema.rdsap_schema_18_0 import RdSapSchema18_0

View file

@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import List, Optional, Union
from .common import CostAmount, DescriptionV1, Measurement
@ -60,7 +60,9 @@ class PhotovoltaicSupplyNoneOrNoDetails:
@dataclass
class PhotovoltaicSupply:
none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails
# ADR-0028 data-driven required→optional: the photovoltaic_supply block can
# arrive without its none_or_no_details child (matches 18.0).
none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None
@dataclass
@ -85,28 +87,37 @@ class SapFloorDimension:
@dataclass
class SapRoomInRoof:
floor_area: Measurement
"""Room-in-roof details. floor_area is usually a Measurement object but some
certs lodge it as a plain number (ADR-0028, as in 18.0) read via
`_measurement_value`, which coerces both shapes."""
floor_area: Union[Measurement, int, float]
insulation: str
roof_room_connected: str
construction_age_band: str
@dataclass
@dataclass(kw_only=True)
class SapBuildingPart:
identifier: str
wall_dry_lined: str
wall_thickness: int
floor_heat_loss: int
roof_construction: int
wall_construction: int
building_part_number: int
sap_floor_dimensions: List[SapFloorDimension]
wall_insulation_type: int
construction_age_band: str
party_wall_construction: Union[int, str]
wall_thickness_measured: str
roof_insulation_location: Union[int, str]
roof_insulation_thickness: Union[str, int]
# Data-driven required→optional (ADR-0028): a conservatory-shaped part can
# carry only a subset of fields (none of the construction fields). Every
# field is Optional (the 21.0.1/20.0.0/18.0 precedent); the sparse part flows
# through harmlessly. 19.0 corpus: 6/1000 omit roof_insulation_thickness,
# 2/1000 omit identifier.
identifier: Optional[str] = None
wall_dry_lined: Optional[str] = None
wall_thickness: Optional[int] = None
floor_heat_loss: Optional[int] = None
roof_construction: Optional[int] = None
wall_construction: Optional[int] = None
building_part_number: Optional[int] = None
sap_floor_dimensions: Optional[List[SapFloorDimension]] = None
wall_insulation_type: Optional[int] = None
construction_age_band: Optional[str] = None
party_wall_construction: Optional[Union[int, str]] = None
wall_thickness_measured: Optional[str] = None
roof_insulation_location: Optional[Union[int, str]] = None
roof_insulation_thickness: Optional[Union[str, int]] = None
sap_room_in_roof: Optional[SapRoomInRoof] = None
wall_insulation_thickness: Optional[str] = None
floor_insulation_thickness: Optional[str] = None
@ -145,15 +156,18 @@ class SuggestedImprovement:
environmental_impact_rating: int
@dataclass
@dataclass(kw_only=True)
class AlternativeImprovement:
sequence: int
typical_saving: CostAmount
improvement_type: str
improvement_details: ImprovementDetails
improvement_category: int
energy_performance_rating: int
environmental_impact_rating: int
# ADR-0028: some certs lodge a reduced alternative-improvement shape (only
# improvement_details/-type). Parse-only — the mapper does not read
# alternative_improvements — so every field is Optional.
sequence: Optional[int] = None
typical_saving: Optional[CostAmount] = None
improvement_type: Optional[str] = None
improvement_details: Optional[ImprovementDetails] = None
improvement_category: Optional[int] = None
energy_performance_rating: Optional[int] = None
environmental_impact_rating: Optional[int] = None
@dataclass
@ -165,6 +179,20 @@ class RenewableHeatIncentive:
@dataclass
class SapWindow:
"""Per-window geometry. ADR-0028: only 6/1000 19.0 certs lodge this array;
window_area arrives as a Measurement and is read via `_measurement_value`.
Mirrors the 20.0.0/18.0 SapWindow shape. This is the per-spec Validation
Cohort its lodged geometry is used directly, never synthesised over."""
orientation: int
window_area: float
window_type: int
glazing_type: int
window_location: int
@dataclass(kw_only=True)
class RdSapSchema19_0:
uprn: int
roofs: List[EnergyElement]
@ -180,6 +208,9 @@ class RdSapSchema19_0:
built_form: int
door_count: int
glazed_area: int
# ADR-0028: glazing_gap is lodged as int (162/1000), str (357/1000), or
# omitted (481/1000) — widen + default, not int-required.
glazing_gap: Optional[Union[int, str]] = None
region_code: int
report_type: int
sap_heating: SapHeating
@ -188,7 +219,9 @@ class RdSapSchema19_0:
uprn_source: str
country_code: str
main_heating: List[EnergyElement]
dwelling_type: DescriptionV1
# ADR-0028: 503/1000 lodge dwelling_type as a plain str, not a localised
# DescriptionV1 object (matches 20.0.0/18.0). Widen so both shapes parse.
dwelling_type: Union[str, DescriptionV1]
language_code: int
property_type: int
address_line_1: str
@ -201,11 +234,13 @@ class RdSapSchema19_0:
transaction_type: int
conservatory_type: int
heated_room_count: int
pvc_window_frames: str
# ADR-0028: missing in 314/1000 — widen + default.
pvc_window_frames: Optional[str] = None
registration_date: str
sap_energy_source: SapEnergySource
secondary_heating: EnergyElement
lzc_energy_sources: List[int]
# ADR-0028: present in only 35/1000 — default to empty.
lzc_energy_sources: List[int] = field(default_factory=list)
sap_building_parts: List[SapBuildingPart]
low_energy_lighting: int
solar_water_heating: str
@ -217,21 +252,24 @@ class RdSapSchema19_0:
energy_rating_current: int
lighting_cost_current: CostAmount
main_heating_controls: List[EnergyElement]
multiple_glazing_type: int
# ADR-0028: lodged as an int code (1-7) or the string "ND" (Not Defined,
# 50/1000) — widen so both parse; the synthesis maps "ND" to a default.
multiple_glazing_type: Union[int, str]
open_fireplaces_count: int
has_hot_water_cylinder: str
heating_cost_potential: CostAmount
hot_water_cost_current: CostAmount
mechanical_ventilation: int
percent_draughtproofed: int
suggested_improvements: List[SuggestedImprovement]
suggested_improvements: List[SuggestedImprovement] = field(default_factory=list)
co2_emissions_potential: float
energy_rating_potential: int
lighting_cost_potential: CostAmount
schema_version_original: str
hot_water_cost_potential: CostAmount
renewable_heat_incentive: RenewableHeatIncentive
windows_transmission_details: WindowsTransmissionDetails
# 19.0-specific block, absent in 713/1000 — Optional + default.
windows_transmission_details: Optional[WindowsTransmissionDetails] = None
energy_consumption_current: int
has_fixed_air_conditioning: str
multiple_glazed_proportion: int
@ -247,5 +285,9 @@ class RdSapSchema19_0:
low_energy_fixed_lighting_outlets_count: int
sap_flat_details: Optional[SapFlatDetails] = None
address_line_2: Optional[str] = None
glazing_gap: Optional[Union[str, int]] = None
alternative_improvements: Optional[List[AlternativeImprovement]] = None
# ADR-0028: additive — the placeholder schema omitted sap_windows entirely,
# silently dropping the 6 rich certs' lodged per-window geometry. Capture it
# so the mapper can use lodged window_area directly (default [] = windowless,
# synthesised from the glazed_area band).
sap_windows: List[SapWindow] = field(default_factory=list)