mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Map RdSAP 20.0.0 certs that omit reduced fields or lodge localised text 🟩
Required->optional defaults (kw_only + data-driven from corpus presence) so 993/1000 certs that omit sap_windows parse, and honest Union[str, DescriptionV1] typing for description/dwelling_type which the corpus lodges as localised dicts in ~half the certs. The never-run 20.0.0 mapper path now produces EpcPropertyData; 974/1000 corpus certs map (xpass), up from 7. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5589a66e7c
commit
8074f4152c
3 changed files with 79 additions and 16 deletions
|
|
@ -1081,7 +1081,12 @@ class EpcPropertyDataMapper:
|
|||
uprn=schema.uprn,
|
||||
assessment_type=schema.assessment_type,
|
||||
sap_version=schema.sap_version,
|
||||
dwelling_type=schema.dwelling_type,
|
||||
# ADR-0027: 20.0.0 lodges dwelling_type as str OR localised dict.
|
||||
dwelling_type=(
|
||||
schema.dwelling_type
|
||||
if isinstance(schema.dwelling_type, str)
|
||||
else schema.dwelling_type.value
|
||||
),
|
||||
property_type=str(schema.property_type),
|
||||
built_form=str(schema.built_form),
|
||||
address_line_1=schema.address_line_1,
|
||||
|
|
|
|||
|
|
@ -1131,3 +1131,49 @@ class TestApiRoofConstructionCode:
|
|||
|
||||
# Assert
|
||||
assert result == "Pitched, sloping ceiling"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema 20.0.0 — Reduced-Field Synthesis (ADR-0027)
|
||||
#
|
||||
# RdSAP 20.0.0 is a pre-SAP10 reduced-data schema: it records as categories or
|
||||
# aggregates the measured fields the calculator needs (a glazed_area *band*, not
|
||||
# window m²; bath/shower *room counts*, not bath counts). The mapper synthesises
|
||||
# the measured form from the cert alone (no neighbour data). Each test name
|
||||
# encodes the synthesis ASSUMPTION it pins, because a pre-SAP10 cert has no
|
||||
# same-spec lodged figure to validate against (Validation-Cohort rule).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CORPUS_20_0_0 = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"../../../../backend/epc_api/json_samples/RdSAP-Schema-20.0.0/corpus.jsonl",
|
||||
)
|
||||
|
||||
|
||||
def _load_20_0_0_corpus() -> list[Dict[str, Any]]:
|
||||
if not os.path.exists(_CORPUS_20_0_0):
|
||||
return []
|
||||
with open(_CORPUS_20_0_0) as f:
|
||||
return [json.loads(line) for line in f if line.strip()]
|
||||
|
||||
|
||||
class TestRdSap20_0_0ReducedFieldSynthesis:
|
||||
|
||||
def test_cert_omitting_sap_windows_maps_without_missing_required_field(
|
||||
self,
|
||||
) -> None:
|
||||
# Arrange — 993/1000 corpus certs omit `sap_windows` entirely; the
|
||||
# placeholder schema declared it required, so every one failed to parse.
|
||||
# Required→optional (default []) must let them through.
|
||||
corpus = _load_20_0_0_corpus()
|
||||
if not corpus:
|
||||
pytest.skip("no RdSAP-Schema-20.0.0 corpus harvested")
|
||||
cert = next((c for c in corpus if "sap_windows" not in c), None)
|
||||
if cert is None:
|
||||
pytest.skip("no corpus cert omits sap_windows")
|
||||
|
||||
# Act
|
||||
result = EpcPropertyDataMapper.from_api_response(cert)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
|
|
|||
|
|
@ -1,20 +1,24 @@
|
|||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .common import Measurement
|
||||
from .common import DescriptionV1, Measurement
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnergyElement:
|
||||
# description is a plain string in schema 20.0.0 onwards (no longer a localised object)
|
||||
description: str
|
||||
# ADR-0027: the corpus lodges description as EITHER a plain str OR a
|
||||
# localised {value,language} dict (DescriptionV1) — not str-only as a
|
||||
# one-example placeholder assumed. Union so _coerce builds the right one.
|
||||
description: Union[str, DescriptionV1]
|
||||
energy_efficiency_rating: int
|
||||
environmental_efficiency_rating: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class Addendum:
|
||||
addendum_numbers: List[int]
|
||||
# ADR-0027: an addendum block can lodge only stone_walls/system_build flags
|
||||
# with no numbers list → optional.
|
||||
addendum_numbers: List[int] = field(default_factory=list)
|
||||
stone_walls: Optional[str] = None
|
||||
system_build: Optional[str] = None
|
||||
|
||||
|
|
@ -134,7 +138,8 @@ class SapBuildingPart:
|
|||
party_wall_construction: Union[int, str]
|
||||
wall_thickness_measured: str
|
||||
roof_insulation_location: Union[int, str]
|
||||
roof_insulation_thickness: Union[str, int]
|
||||
# ADR-0027: absent on 254/1506 building parts (flat-roof / no-loft) → optional.
|
||||
roof_insulation_thickness: Optional[Union[str, int]] = None
|
||||
sap_room_in_roof: Optional[SapRoomInRoof] = None
|
||||
wall_thickness: Optional[int] = None
|
||||
wall_insulation_thickness: Optional[str] = None
|
||||
|
|
@ -194,7 +199,12 @@ class RenewableHeatIncentive:
|
|||
impact_of_solid_wall_insulation: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
# ADR-0027: 20.0.0 is a reduced-data schema generated from a single example, so
|
||||
# it over-constrains — fields the corpus routinely omits were declared required,
|
||||
# failing 993/1000 certs at parse. Required→optional is data-driven (any field
|
||||
# present in <100% of the corpus gets a default); `kw_only=True` lifts the
|
||||
# dataclass non-default-after-default ordering rule so defaults can sit inline.
|
||||
@dataclass(kw_only=True)
|
||||
class RdSapSchema20_0_0:
|
||||
uprn: int
|
||||
roofs: List[EnergyElement]
|
||||
|
|
@ -214,13 +224,14 @@ class RdSapSchema20_0_0:
|
|||
report_type: int
|
||||
sap_heating: SapHeating
|
||||
sap_version: float
|
||||
sap_windows: List[SapWindow]
|
||||
# ADR-0027: 993/1000 omit this; synthesised by Reduced-Field Synthesis.
|
||||
sap_windows: List[SapWindow] = field(default_factory=list)
|
||||
schema_type: str
|
||||
uprn_source: str
|
||||
country_code: str
|
||||
main_heating: List[EnergyElement]
|
||||
# dwelling_type is a plain string in schema 20.0.0 onwards
|
||||
dwelling_type: str
|
||||
# ADR-0027: mixed str / localised-dict in the corpus (see EnergyElement).
|
||||
dwelling_type: Union[str, DescriptionV1]
|
||||
language_code: int
|
||||
property_type: int
|
||||
address_line_1: str
|
||||
|
|
@ -236,7 +247,7 @@ class RdSapSchema20_0_0:
|
|||
registration_date: str
|
||||
sap_energy_source: SapEnergySource
|
||||
secondary_heating: EnergyElement
|
||||
lzc_energy_sources: List[int]
|
||||
lzc_energy_sources: List[int] = field(default_factory=list)
|
||||
sap_building_parts: List[SapBuildingPart]
|
||||
low_energy_lighting: int
|
||||
solar_water_heating: str
|
||||
|
|
@ -252,24 +263,25 @@ class RdSapSchema20_0_0:
|
|||
open_fireplaces_count: int
|
||||
heating_cost_potential: float
|
||||
hot_water_cost_current: float
|
||||
insulated_door_u_value: float
|
||||
insulated_door_u_value: Optional[float] = None
|
||||
mechanical_ventilation: int
|
||||
percent_draughtproofed: int
|
||||
suggested_improvements: List[SuggestedImprovement]
|
||||
suggested_improvements: List[SuggestedImprovement] = field(default_factory=list)
|
||||
co2_emissions_potential: float
|
||||
energy_rating_potential: int
|
||||
lighting_cost_potential: float
|
||||
schema_version_original: str
|
||||
hot_water_cost_potential: float
|
||||
renewable_heat_incentive: RenewableHeatIncentive
|
||||
windows_transmission_details: WindowsTransmissionDetails
|
||||
# ADR-0027: cert-level U/g present in 687/1000; Table-24 default otherwise.
|
||||
windows_transmission_details: Optional[WindowsTransmissionDetails] = None
|
||||
energy_consumption_current: int
|
||||
multiple_glazed_proportion: int
|
||||
calculation_software_version: str
|
||||
energy_consumption_potential: int
|
||||
environmental_impact_current: int
|
||||
fixed_lighting_outlets_count: int
|
||||
multiple_glazed_proportion_nr: Optional[str]
|
||||
multiple_glazed_proportion_nr: Optional[str] = None
|
||||
current_energy_efficiency_band: str
|
||||
environmental_impact_potential: int
|
||||
potential_energy_efficiency_band: str
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue