Merge pull request #1306 from Hestia-Homes/fix/full-sap-16x-cert-mapping

Route full-SAP certs lodged under SAP-Schema-16.x to the full-SAP mapper
This commit is contained in:
Daniel Roth 2026-06-24 17:22:03 +01:00 committed by GitHub
commit a6752c2654
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 593 additions and 2 deletions

View file

@ -1,4 +1,5 @@
import copy
import logging
import re
from dataclasses import replace
from datetime import date
@ -965,13 +966,32 @@ class EpcPropertyDataMapper:
@staticmethod
def _from_sap_schema_16_x(data: Dict[str, Any]) -> EpcPropertyData:
"""Shared body for the SAP-Schema-16.x dedicated mappers: normalise the
reduced-field doc onto the RdSAP-17.1 shape and reuse that mapper."""
reduced-field doc onto the RdSAP-17.1 shape and reuse that mapper.
Only the reduced-RdSAP shape reaches here. A full-SAP cert mis-lodged
under a SAP-Schema-16.x label (the `LIG-*` as-designed new-builds) is a
*broken schema type* `from_api_response` detects the full-SAP shape and
routes it to the full-SAP mapper before this label dispatch (see
`_is_full_sap_assessment`)."""
from datatypes.epc.schema.rdsap_schema_17_1 import RdSapSchema17_1
return EpcPropertyDataMapper.from_rdsap_schema_17_1(
from_dict(RdSapSchema17_1, _normalize_sap_schema_16_x(data))
)
@staticmethod
def _from_full_sap(data: Dict[str, Any]) -> EpcPropertyData:
"""Map a cert whose *structure* is full-SAP (measured `sap_opening_types`)
via the full-SAP 17.1 mapper, regardless of its `schema_type` label the
door/window/fabric come from the real measured openings, no reduced-field
defaulting. The only `SapSchema17_1` field the broken-label (16.x) full-SAP
shape omits is `tenure` (register metadata, no SAP effect), defaulted."""
normalised = copy.deepcopy(data)
normalised.setdefault("tenure", "unknown")
return EpcPropertyDataMapper.from_sap_schema_17_1(
from_dict(SapSchema17_1, normalised)
)
@staticmethod
def from_rdsap_schema_17_1(schema: RdSapSchema17_1) -> EpcPropertyData:
es = schema.sap_energy_source
@ -2635,7 +2655,20 @@ class EpcPropertyDataMapper:
data = _normalize_shower_outlets(data)
data = _default_missing_post_town(data)
schema = data.get("schema_type", "")
if schema == "RdSAP-Schema-21.0.1":
# Shape over label. The `schema_type` is a LABEL that can disagree with the
# cert's STRUCTURE. A cert that is structurally full-SAP (measured
# `sap_opening_types`) is mapped by the full-SAP mapper whatever its label
# claims. When the label is NOT a recognised full-SAP schema it is a
# *broken schema type* — e.g. the LIG as-designed certs mis-lodged under
# SAP-Schema-16.x — recorded so the unreliable labels stay visible and
# coverage grows as new shapes surface. Correctly-labelled full-SAP certs
# fall through to their own dedicated branch below, keeping the
# one-mapper-per-schema convention.
if _is_full_sap_assessment(data) and not _is_full_sap_label(schema):
_record_broken_schema_type(schema, data)
mapped = EpcPropertyDataMapper._from_full_sap(data)
elif schema == "RdSAP-Schema-21.0.1":
from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1
mapped = EpcPropertyDataMapper.from_rdsap_schema_21_0_1(
@ -3317,6 +3350,60 @@ def _derive_built_form_16x(dwelling_type: Any) -> int:
return 4 # flats / unstated form → modal built_form (SAP- and gate-neutral)
logger = logging.getLogger(__name__)
# The recognised full-SAP schema_type labels: a full-SAP *shape* under one of
# these is correctly labelled, not a broken schema type. Anything else carrying
# the full-SAP shape is a mislabel (e.g. the LIG as-designed certs lodged as
# SAP-Schema-16.x).
_FULL_SAP_SCHEMA_LABELS: frozenset[str] = frozenset(
{
"SAP-Schema-17.0",
"SAP-Schema-17.1",
"SAP-Schema-18.0.0",
"SAP-Schema-19.1.0",
}
)
def _is_full_sap_assessment(data: Dict[str, Any]) -> bool:
"""Whether a cert is a full-SAP assessment (vs reduced RdSAP), keyed on the
gov-API's own `assessment_type` declaration — the authoritative SAP-vs-RdSAP
classification, not a structural proxy. A full-SAP assessment lodges fabric
as measured openings + measured U-values; a reduced RdSAP one carries the
top-level count fields.
`assessment_type` separates the entire cert corpus cleanly every full-SAP
schema (SAP-Schema-17.x/18.x and the broken `LIG` 16.x) is `"SAP"`, every
reduced cert (RdSAP-Schema-* and the reduced SAP-Schema-16.x) is `"RdSAP"`
and it is independent of the `schema_type` LABEL, which can be broken. The
structural signals (`data_type`, `sap_opening_types`) agree with it on the
corpus but are derived shape artifacts; `assessment_type` is the meaning."""
return data.get("assessment_type") == "SAP"
def _is_full_sap_label(schema: str) -> bool:
"""Whether `schema` is a recognised full-SAP `schema_type` — so a full-SAP
*shape* under it is correctly labelled, not a broken schema type."""
return schema in _FULL_SAP_SCHEMA_LABELS
def _record_broken_schema_type(schema: str, data: Dict[str, Any]) -> None:
"""Surface a *broken schema type*: a cert whose `schema_type` label disagrees
with its structure the label is not a full-SAP schema, yet the cert is
structurally full-SAP. Logged rather than silently rerouted so the unreliable
labels and their frequency stay visible (mirrors the skipped-cohort-cert
capture), and coverage can grow as new mislabelled shapes surface. e.g. the
LIG as-designed certs lodged under SAP-Schema-16.x."""
logger.warning(
"broken schema_type %r: structurally full-SAP "
"(assessment_type=%r) — routing to the full-SAP mapper (uprn=%s)",
schema,
data.get("assessment_type"),
data.get("uprn"),
)
def _normalize_sap_schema_16_x(data: Dict[str, Any]) -> Dict[str, Any]:
"""Rewrite a `SAP-Schema-16.2`/`16.3` API doc onto the `RdSAP-Schema-17.1`
shape so it can reuse the tested `from_rdsap_schema_17_1` mapper.

View file

@ -11,6 +11,7 @@ exercise the shape variation the design decisions hinge on
"""
import json
import logging
import os
from typing import Any, Dict
@ -682,3 +683,71 @@ class TestFromSapSchema16_2:
EpcPropertyDataMapper.from_api_response(data)
assert "window" not in data # the rename landed only on the copy
assert set(data.keys()) == before_keys
class TestFullSapSchema16xRouting:
"""The `SAP-Schema-16.x` name covers two structurally different certs: the
reduced RdSAP shape (top-level `door_count`/`glazed_area`) AND full SAP (the
as-designed `LIG-*` new-builds `assessment_type=SAP`, measured
`sap_opening_types` + structured `sap_building_parts`, doors lodged as
openings not a top-level count). Dispatch must route each to its own mapper;
sending a full-SAP cert through the reduced normaliser failed loud with
`RdSapSchema17_1: missing required field 'door_count'`."""
def test_full_sap_16_x_cert_maps_via_full_sap_path(self) -> None:
# Arrange — a real full-SAP cert lodged under SAP-Schema-16.0 (cert
# 0293-3880-6151-9872-8795, lodged 83) that previously raised the
# missing-door_count ValueError in the reduced normaliser.
data = load("sap_16_0_full.json")
# Act
epc = EpcPropertyDataMapper.from_api_response(data)
# Assert — mapped from the real measured openings, not reduced-field
# defaults: the single lodged door opening (0.9 x 2.1 m) → door_count 1.
assert isinstance(epc, EpcPropertyData)
assert epc.door_count == 1
assert epc.dwelling_type == "Detached house"
def test_reduced_16_x_cert_unaffected_by_full_sap_routing(self) -> None:
# Arrange — a reduced 16.2 cert (assessment_type RdSAP) must stay on the
# RdSAP path, keeping its top-level property_type.
data = load("sap_16_2.json")
# Act
epc = EpcPropertyDataMapper.from_api_response(data)
# Assert
assert epc.property_type is not None
def test_broken_schema_type_is_recorded(
self, caplog: pytest.LogCaptureFixture
) -> None:
# Arrange — a full-SAP cert mislabelled as SAP-Schema-16.0: the label
# disagrees with the assessment_type, so the mismatch must be surfaced
# (not silently rerouted).
data = load("sap_16_0_full.json")
# Act
with caplog.at_level(logging.WARNING, logger="datatypes.epc.domain.mapper"):
EpcPropertyDataMapper.from_api_response(data)
# Assert
assert any(
"broken schema_type" in r.message and "SAP-Schema-16.0" in r.message
for r in caplog.records
)
def test_correctly_labelled_full_sap_is_not_recorded_as_broken(
self, caplog: pytest.LogCaptureFixture
) -> None:
# Arrange — a correctly-labelled full-SAP cert: assessment_type SAP AND a
# recognised full-SAP label, so no mismatch to record.
data = load("sap_17_1.json")
# Act
with caplog.at_level(logging.WARNING, logger="datatypes.epc.domain.mapper"):
EpcPropertyDataMapper.from_api_response(data)
# Assert
assert not any("broken schema_type" in r.message for r in caplog.records)

View file

@ -0,0 +1,435 @@
{
"uprn": 10008075674,
"roofs": [
{
"description": "Average thermal transmittance 0.11 W/m\u00b2K",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
}
],
"walls": [
{
"description": "Average thermal transmittance 0.21 W/m\u00b2K",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
}
],
"floors": [
{
"description": "Average thermal transmittance 0.12 W/m\u00b2K",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
}
],
"status": "entered",
"windows": {
"description": "High performance glazing",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
},
"lighting": {
"description": "Low energy lighting in all fixed outlets",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
},
"postcode": "PE2 9HD",
"data_type": 2,
"hot_water": {
"description": "From main system, plus solar",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
},
"post_town": "PETERBOROUGH",
"built_form": 1,
"created_at": "2012-05-30 13:30:25.000000",
"living_area": 18.011,
"orientation": 7,
"region_code": 2,
"report_type": 3,
"sap_heating": {
"thermal_store": 1,
"has_solar_panel": "true",
"water_fuel_type": 1,
"solar_store_volume": 100,
"water_heating_code": 901,
"hot_water_store_size": 210,
"main_heating_details": [
{
"burner_control": 3,
"main_fuel_type": 1,
"efficiency_type": 3,
"heat_emitter_type": 1,
"main_heating_code": 102,
"is_flue_fan_present": "false",
"main_heating_number": 1,
"is_condensing_boiler": "true",
"main_heating_control": 2110,
"is_interlocked_system": "true",
"main_heating_category": 2,
"main_heating_fraction": 1,
"gas_or_oil_boiler_type": 1,
"main_heating_flue_type": 2,
"main_heating_efficiency": 89,
"main_heating_data_source": 2,
"has_delayed_start_thermostat": "false",
"load_or_weather_compensation": 0,
"is_central_heating_pump_in_heated_space": "true"
}
],
"has_hot_water_cylinder": "true",
"has_solar_powered_pump": "false",
"has_cylinder_thermostat": "true",
"solar_panel_aperture_area": 4.51,
"has_fixed_air_conditioning": "false",
"secondary_heating_category": 1,
"solar_panel_collector_type": 2,
"is_cylinder_in_heated_space": "true",
"solar_panel_collector_pitch": 2,
"is_hot_water_separately_timed": "true",
"is_primary_pipework_insulated": "true",
"hot_water_store_insulation_type": 1,
"hot_water_store_heat_loss_source": 3,
"is_solar_store_combined_cylinder": "true",
"solar_panel_collector_data_source": 2,
"solar_panel_collector_orientation": 5,
"solar_panel_collector_overshading": 1,
"hot_water_store_insulation_thickness": 85,
"solar_panel_collector_heat_loss_rate": 3.681,
"solar_panel_collector_zero_loss_efficiency": 77
},
"sap_version": 9.9,
"schema_type": "SAP-Schema-16.0",
"uprn_source": "Energy Assessor",
"country_code": "EAW",
"main_heating": [
{
"description": "Boiler and radiators, mains gas",
"energy_efficiency_rating": 4,
"environmental_efficiency_rating": 4
}
],
"air_tightness": {
"description": "Air permeability 4.9 m\u00b3/h.m\u00b2 (as tested)",
"energy_efficiency_rating": 4,
"environmental_efficiency_rating": 4
},
"dwelling_type": "Detached house",
"language_code": 1,
"property_type": 0,
"address_line_1": "43, New Road",
"address_line_2": "Woodston",
"schema_version": "LIG-15.0",
"assessment_date": "2012-05-30",
"assessment_type": "SAP",
"completion_date": "2012-05-30",
"inspection_date": "2012-05-30",
"sap_ventilation": {
"psv_count": 0,
"pressure_test": 1,
"air_permeability": 4.88,
"open_flues_count": 0,
"ventilation_type": 1,
"extract_fans_count": 3,
"open_fireplaces_count": 0,
"sheltered_sides_count": 2,
"flueless_gas_fires_count": 0
},
"design_water_use": 1,
"total_floor_area": 76,
"transaction_type": 6,
"conservatory_type": 1,
"registration_date": "2012-05-30",
"restricted_access": 0,
"sap_energy_source": {
"electricity_tariff": 1,
"wind_turbines_count": 0,
"wind_turbine_terrain_type": 2,
"fixed_lighting_outlets_count": 0,
"low_energy_fixed_lighting_outlets_count": 0,
"low_energy_fixed_lighting_outlets_percentage": 100
},
"sap_opening_types": [
{
"name": "Door (1)",
"type": 1,
"u_value": 1.8,
"data_source": 2,
"description": "Data from Manufacturer",
"glazing_type": 1
},
{
"name": "Windows (1)",
"type": 4,
"u_value": 1.5,
"data_source": 2,
"description": "Data from Manufacturer",
"frame_factor": 0.7,
"glazing_type": 7,
"solar_transmittance": 0.63
}
],
"secondary_heating": {
"description": "None",
"energy_efficiency_rating": 0,
"environmental_efficiency_rating": 0
},
"lzc_energy_sources": [
10
],
"sap_building_parts": [
{
"sap_roofs": [
{
"name": "main roof",
"u_value": 0.11,
"roof_type": 2,
"kappa_value": 9,
"total_roof_area": 38.65
},
{
"name": "flat roof",
"u_value": 0.19,
"roof_type": 2,
"kappa_value": 9,
"total_roof_area": 1.39
}
],
"sap_walls": [
{
"name": "brick wall",
"u_value": 0.21,
"wall_type": 2,
"kappa_value": 150,
"total_wall_area": 115.98,
"is_curtain_walling": "false"
},
{
"name": "downstairs",
"u_value": 0,
"wall_type": 5,
"kappa_value": 9,
"total_wall_area": 44.7
},
{
"name": "upper",
"u_value": 0,
"wall_type": 5,
"kappa_value": 9,
"total_wall_area": 52.99
}
],
"identifier": "Main Dwelling",
"overshading": 2,
"sap_openings": [
{
"name": 1,
"type": "Door (1)",
"width": 0.9,
"height": 2.1,
"location": "brick wall",
"orientation": 7
},
{
"name": 2,
"type": "Windows (1)",
"width": 0.665,
"height": 1.05,
"location": "brick wall",
"orientation": 1
},
{
"name": 3,
"type": "Windows (1)",
"width": 2.09,
"height": 1.05,
"location": "brick wall",
"orientation": 7
},
{
"name": 4,
"type": "Windows (1)",
"width": 0.665,
"height": 1.05,
"location": "brick wall",
"orientation": 5
},
{
"name": 5,
"type": "Windows (1)",
"width": 2.5,
"height": 2.1,
"location": "brick wall",
"orientation": 3
},
{
"name": 6,
"type": "Windows (1)",
"width": 1.05,
"height": 1.425,
"location": "brick wall",
"orientation": 7
},
{
"name": 7,
"type": "Windows (1)",
"width": 0.6,
"height": 1.2,
"location": "brick wall",
"orientation": 1
},
{
"name": 8,
"type": "Windows (1)",
"width": 0.8,
"height": 1.425,
"location": "brick wall",
"orientation": 3
},
{
"name": 9,
"type": "Windows (1)",
"width": 0.8,
"height": 1.425,
"location": "brick wall",
"orientation": 3
},
{
"name": 10,
"type": "Windows (1)",
"width": 0.6,
"height": 1.05,
"location": "brick wall",
"orientation": 5
}
],
"construction_year": 2012,
"sap_thermal_bridges": {
"thermal_bridges": [
{
"length": 9.77,
"psi_value": 0.3,
"psi_value_source": 4,
"thermal_bridge_type": 2
},
{
"length": 7.27,
"psi_value": 0.04,
"psi_value_source": 4,
"thermal_bridge_type": 3
},
{
"length": 23.55,
"psi_value": 0.05,
"psi_value_source": 4,
"thermal_bridge_type": 4
},
{
"length": 27.18,
"psi_value": 0.16,
"psi_value_source": 4,
"thermal_bridge_type": 5
},
{
"length": 27.18,
"psi_value": 0.07,
"psi_value_source": 4,
"thermal_bridge_type": 6
},
{
"length": 8.68,
"psi_value": 0.06,
"psi_value_source": 4,
"thermal_bridge_type": 10
},
{
"length": 17.17,
"psi_value": 0.24,
"psi_value_source": 4,
"thermal_bridge_type": 12
},
{
"length": 21.04,
"psi_value": 0.09,
"psi_value_source": 4,
"thermal_bridge_type": 16
}
],
"thermal_bridge_code": 5
},
"building_part_number": 1,
"sap_floor_dimensions": [
{
"storey": 0,
"u_value": 0.12,
"floor_type": 2,
"storey_height": 2.55,
"heat_loss_area": 38.65,
"total_floor_area": 38.65,
"kappa_value_from_below": 18
},
{
"storey": 1,
"u_value": 0,
"floor_type": 3,
"kappa_value": 18,
"storey_height": 2.71,
"heat_loss_area": 1.3900032,
"total_floor_area": 37.26,
"kappa_value_from_below": 18
}
]
}
],
"bedf_revision_number": 323,
"heating_cost_current": 260,
"co2_emissions_current": 1.2,
"energy_rating_average": 60,
"energy_rating_current": 83,
"lighting_cost_current": 43,
"main_heating_controls": [
{
"description": "Time and temperature zone control",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
}
],
"has_hot_water_cylinder": "true",
"heating_cost_potential": 251,
"hot_water_cost_current": 37,
"suggested_improvements": [
{
"sequence": 1,
"typical_saving": 219,
"indicative_cost": "\u00a311,000 - \u00a320,000",
"improvement_type": "U",
"improvement_details": {
"improvement_number": 34
},
"improvement_category": 3,
"energy_performance_rating": 94,
"environmental_impact_rating": 97
}
],
"co2_emissions_potential": 1.2,
"energy_rating_potential": 83,
"lighting_cost_potential": 43,
"hot_water_cost_potential": 47,
"is_in_smoke_control_area": "unknown",
"renewable_heat_incentive": {
"rhi_new_dwelling": {
"space_heating": 3010,
"water_heating": 2175
}
},
"seller_commission_report": "Y",
"energy_consumption_current": 84,
"has_fixed_air_conditioning": "false",
"calculation_software_version": "Version: 1.4.0.80",
"energy_consumption_potential": 84,
"environmental_impact_current": 87,
"current_energy_efficiency_band": "B",
"environmental_impact_potential": 87,
"has_heated_separate_conservatory": "false",
"potential_energy_efficiency_band": "B",
"co2_emissions_current_per_floor_area": 16
}