Sanitise millimetre-lodged full-SAP opening dimensions to metres

Some SAP-Schema-17.x/18.0.0 certs lodge sap_openings width/height in MILLIMETRES
mixed with metre rows in the same array (e.g. a 2025x2100 mm window beside a
3.06x1 m one). The 17.1 mapper read them all as metres → a 4.25M m2 window →
HTC in the millions → SAP clamped to 1.

Fix (TDD, datatypes/epc/domain/mapper.py): _sanitise_opening_dimension_m treats
any dimension > 50 m as mm and divides by 1000; _sap_opening_area_m2 applies it
to areas. Wired into the window, roof-window, and door-area-weighting paths.
The 3 broken certs (uprn_10093117227 / 10090317693 / 10091636031) now score
90 / 81 / 79 instead of 1.

3 RED->GREEN slices + refactor; new test class
TestFromSapSchema17_1OpeningUnitSanitisation + sap_17_1_mm_openings.json fixture;
0 new pyright errors; no regressions.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jun-te Kim 2026-06-16 20:16:27 +00:00
parent d41798f1c1
commit 05e3a25d25
4 changed files with 464 additions and 5 deletions

View file

@ -162,6 +162,30 @@ def _pv_orientation(field: Any) -> Optional[int]:
return int(_measurement_value(field))
# Full-SAP `sap_openings` occasionally lodge width/height in MILLIMETRES, mixed
# with metre rows in the same array (e.g. a 2025×2100 mm window beside a 3.06×1 m
# one). No real window/door dimension approaches 50 m, so a value that large is a
# millimetre lodgement and is divided by 1000. Without this a 2025×2100 mm window
# becomes 4.25M m², the HTC blows up and SAP clamps to 1 (uprn_10093117227).
_SAP_OPENING_MAX_PLAUSIBLE_METRES: Final[float] = 50.0
def _sanitise_opening_dimension_m(value: float) -> float:
"""Coerce a lodged opening dimension to metres, dividing by 1000 when it is
implausibly large (a millimetre lodgement)."""
if value > _SAP_OPENING_MAX_PLAUSIBLE_METRES:
return value / 1000.0
return value
def _sap_opening_area_m2(width: Any, height: Any) -> float:
"""Measured opening area in m², with mm-lodged width/height coerced to
metres via `_sanitise_opening_dimension_m`."""
return _sanitise_opening_dimension_m(float(width)) * _sanitise_opening_dimension_m(
float(height)
)
def _map_schema_21_pv(
es_pv_supply: Any,
) -> tuple[Optional[PhotovoltaicSupply], Optional[List[PhotovoltaicArray]]]:
@ -783,8 +807,8 @@ class EpcPropertyDataMapper:
orientation=op.orientation if op.orientation is not None else 0,
window_type=ot.frame_type if ot.frame_type is not None else 0,
glazing_type=ot.glazing_type,
window_width=float(op.width),
window_height=float(op.height),
window_width=_sanitise_opening_dimension_m(float(op.width)),
window_height=_sanitise_opening_dimension_m(float(op.height)),
draught_proofed=False,
window_location=op.location or "",
window_wall_type=0,
@ -821,7 +845,7 @@ class EpcPropertyDataMapper:
continue
roof_windows.append(
SapRoofWindow(
area_m2=float(op.width) * float(op.height),
area_m2=_sap_opening_area_m2(op.width, op.height),
u_value_raw=ot.u_value,
orientation=op.orientation if op.orientation is not None else 0,
# default 45° pitch when unlodged — matches the
@ -2778,7 +2802,7 @@ def _sap_door_aggregates(schema: SapSchema17_1) -> Tuple[int, Optional[float]]:
if ot is None or ot.type not in _SAP_OPENING_TYPE_DOORS:
continue
count += 1
area = float(op.width) * float(op.height)
area = _sap_opening_area_m2(op.width, op.height)
weighted_u_area += ot.u_value * area
total_area += area
u_value = weighted_u_area / total_area if total_area > 0 else None

View file

@ -108,6 +108,58 @@ class TestFromSapSchema17_1FabricDescriptions:
assert result.walls and result.walls[0].description
class TestFromSapSchema17_1OpeningUnitSanitisation:
"""Some full-SAP certs lodge sap_openings width/height in MILLIMETRES (mixed
with metre rows in the same array, e.g. a 2025×2100 mm window next to a
3.06×1 m one). The mapper must read them as metres any dimension
implausibly large (>50 m) is mm and is divided by 1000 else the mm window
becomes ~4.25M , the HTC blows up and SAP clamps to 1. Regression for
uprn_10093117227 / 10090317693 / 10091636031."""
def test_mm_lodged_window_is_converted_to_metres(self) -> None:
# Arrange — cert lodges a 2025×2100 mm "Side Elevation" window mixed with
# metre-lodged windows in the same sap_openings array.
data = load("sap_17_1_mm_openings.json")
# Act
epc = EpcPropertyDataMapper.from_api_response(data)
# Assert — no window is physically impossible; the mm row is now ~2 m.
assert max(w.window_width for w in epc.sap_windows) < 50.0
def test_mm_lodged_roof_window_is_converted_to_metres(self) -> None:
# Arrange — inject a millimetre-lodged roof-window dimension (990 mm) into
# a fixture whose roof windows are otherwise lodged in metres.
data = load("sap_17_1_house.json")
for bp in data["sap_building_parts"]:
for op in bp["sap_openings"]:
if op["name"] == "Bed 1 Roof":
op["width"] = 990 # mm (was 0.99 m)
# Act
epc = EpcPropertyDataMapper.from_api_response(data)
# Assert — the roof-window area stays physically plausible (not ~720 m²).
assert epc.sap_roof_windows is not None
assert max(rw.area_m2 for rw in epc.sap_roof_windows) < 50.0
def test_mm_lodged_door_does_not_dominate_area_weighted_u(self) -> None:
# Arrange — the flat fixture has 2 doors (U 1.4 over 1.89 m², U 1.8 over
# 2.12 m²) → area-weighted U 1.6115. Lodge the first door's width in mm;
# un-sanitised, its 1890 m² area swamps the weighting → U collapses to 1.4.
data = load("sap_17_1_flat.json")
for bp in data["sap_building_parts"]:
for op in bp["sap_openings"]:
if op["name"] == "Front":
op["width"] = 1890 # mm (was 1.89 m)
# Act
epc = EpcPropertyDataMapper.from_api_response(data)
# Assert — the mm door is rescaled, so the area weighting is unchanged.
assert epc.insulated_door_u_value == pytest.approx(1.6114713, abs=1e-4)
class TestFromSapSchema17_1Windows:
"""Slice 4a (D2): vertical-window openings (opening-type 4) collapse onto
sap_windows with measured per-window geometry and U-value."""

View file

@ -0,0 +1,376 @@
{
"uprn": 10093117227,
"roofs": [
{
"description": "Average thermal transmittance 0.11 W/m\u00b2K",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
}
],
"walls": [
{
"description": "Average thermal transmittance 0.22 W/m\u00b2K",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
}
],
"floors": [
{
"description": "Average thermal transmittance 0.12 W/m\u00b2K",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
}
],
"status": "entered",
"tenure": "ND",
"windows": {
"description": "High performance glazing",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
},
"lighting": {
"description": "Low energy lighting in all fixed outlets",
"energy_efficiency_rating": 5,
"environmental_efficiency_rating": 5
},
"postcode": "GU31 5BF",
"data_type": 2,
"hot_water": {
"description": "From main system",
"energy_efficiency_rating": 3,
"environmental_efficiency_rating": 2
},
"post_town": "PETERSFIELD",
"built_form": 1,
"created_at": "2018-04-12 07:48:54",
"living_area": 16.22,
"orientation": 6,
"region_code": 16,
"report_type": 3,
"sap_heating": {
"thermal_store": 1,
"water_fuel_type": 39,
"water_heating_code": 901,
"hot_water_store_size": 180,
"main_heating_details": [
{
"main_fuel_type": 39,
"heat_emitter_type": 1,
"main_heating_code": 191,
"emitter_temperature": "NA",
"main_heating_number": 1,
"main_heating_control": 2106,
"main_heating_category": 2,
"main_heating_fraction": 1,
"central_heating_pump_age": 2,
"main_heating_data_source": 3,
"has_separate_delayed_start": "false",
"load_or_weather_compensation": 0,
"is_central_heating_pump_in_heated_space": "true"
}
],
"has_hot_water_cylinder": "true",
"immersion_heating_type": 1,
"has_cylinder_thermostat": "true",
"hot_water_store_heat_loss": 1.95,
"has_fixed_air_conditioning": "false",
"secondary_heating_category": 1,
"is_cylinder_in_heated_space": "true",
"hot_water_store_heat_loss_source": 2
},
"sap_version": 9.92,
"schema_type": "SAP-Schema-17.1",
"uprn_source": "Address Matched",
"country_code": "ENG",
"main_heating": [
{
"description": "Boiler and radiators, electric",
"energy_efficiency_rating": 1,
"environmental_efficiency_rating": 2
}
],
"air_tightness": {
"description": "Air permeability 4.7 m\u00b3/h.m\u00b2 (as tested)",
"energy_efficiency_rating": 4,
"environmental_efficiency_rating": 4
},
"dwelling_type": "Detached bungalow",
"language_code": 1,
"property_type": 1,
"address_line_1": "1 Hilly Field Mews",
"address_line_2": "Parsonage Estate",
"address_line_3": "Rogate",
"assessment_date": "2018-04-12",
"assessment_type": "SAP",
"completion_date": "2018-04-12",
"inspection_date": "2018-04-12",
"sap_ventilation": {
"psv_count": 0,
"pressure_test": 1,
"air_permeability": 4.7,
"open_flues_count": 0,
"ventilation_type": 1,
"extract_fans_count": 2,
"open_fireplaces_count": 0,
"sheltered_sides_count": 2,
"flueless_gas_fires_count": 0
},
"design_water_use": 1,
"sap_data_version": 9.92,
"total_floor_area": 50,
"transaction_type": 6,
"conservatory_type": 1,
"registration_date": "2018-04-12",
"sap_energy_source": {
"pv_arrays": [
{
"pitch": 3,
"peak_power": 1.77,
"orientation": 4,
"overshading": 1,
"pv_connection": 2
}
],
"electricity_tariff": 2,
"wind_turbines_count": 0,
"wind_turbine_terrain_type": 1,
"fixed_lighting_outlets_count": 10,
"low_energy_fixed_lighting_outlets_count": 10,
"low_energy_fixed_lighting_outlets_percentage": 100
},
"sap_opening_types": [
{
"name": "Front Door",
"type": 1,
"u_value": 1,
"data_source": 2,
"glazing_type": 1
},
{
"name": "Windows",
"type": 4,
"u_value": 1.4,
"data_source": 2,
"frame_factor": 0.7,
"glazing_type": 7,
"solar_transmittance": 0.63
}
],
"secondary_heating": {
"description": "None",
"energy_efficiency_rating": 0,
"environmental_efficiency_rating": 0
},
"lzc_energy_sources": [
11
],
"sap_building_parts": [
{
"sap_roofs": [
{
"name": "Roof 1",
"u_value": 0.11,
"roof_type": 2,
"description": "External Roof",
"kappa_value": 9,
"total_roof_area": 50.29
}
],
"sap_walls": [
{
"name": "External Wall 1",
"u_value": 0.22,
"wall_type": 2,
"description": "External Wall",
"kappa_value": 60,
"total_wall_area": 86.33,
"is_curtain_walling": "false"
},
{
"name": "Internal Wall 0",
"u_value": 0,
"wall_type": 5,
"kappa_value": 9,
"total_wall_area": 66.46
}
],
"identifier": "Main Dwelling",
"overshading": 2,
"sap_openings": [
{
"name": "Front Door",
"type": "Front Door",
"width": 1000,
"height": 2100,
"location": "External Wall 1",
"orientation": 0
},
{
"name": "Front Elevation",
"type": "Windows",
"width": 3.06,
"height": 1,
"location": "External Wall 1",
"orientation": 6
},
{
"name": "Side Elevation",
"type": "Windows",
"width": 2025,
"height": 2100,
"location": "External Wall 1",
"orientation": 4
},
{
"name": "Rear Elevation",
"type": "Windows",
"width": 3.6,
"height": 1,
"location": "External Wall 1",
"orientation": 2
}
],
"construction_year": 2017,
"sap_thermal_bridges": {
"thermal_bridges": [
{
"length": 7.83,
"psi_value": 0.24,
"psi_value_source": 3,
"thermal_bridge_type": "E2"
},
{
"length": 6.83,
"psi_value": 0.04,
"psi_value_source": 2,
"thermal_bridge_type": "E3"
},
{
"length": 19.5,
"psi_value": 0.05,
"psi_value_source": 2,
"thermal_bridge_type": "E4"
},
{
"length": 33.46,
"psi_value": 0.16,
"psi_value_source": 2,
"thermal_bridge_type": "E5"
},
{
"length": 22.49,
"psi_value": 0.06,
"psi_value_source": 2,
"thermal_bridge_type": "E10"
},
{
"length": 10.97,
"psi_value": 0.24,
"psi_value_source": 2,
"thermal_bridge_type": "E12"
},
{
"length": 15.48,
"psi_value": 0.09,
"psi_value_source": 2,
"thermal_bridge_type": "E16"
},
{
"length": 5.16,
"psi_value": -0.09,
"psi_value_source": 2,
"thermal_bridge_type": "E17"
}
],
"thermal_bridge_code": 5
},
"building_part_number": 1,
"sap_floor_dimensions": [
{
"storey": 0,
"u_value": 0.12,
"floor_type": 2,
"description": "Ground Floor",
"kappa_value": 75,
"storey_height": 2.58,
"heat_loss_area": 50.29,
"total_floor_area": 50.29
}
]
}
],
"heating_cost_current": {
"value": 403,
"currency": "GBP"
},
"co2_emissions_current": 1.3,
"energy_rating_average": 60,
"energy_rating_current": 80,
"lighting_cost_current": {
"value": 41,
"currency": "GBP"
},
"main_heating_controls": [
{
"description": "Programmer, room thermostat and TRVs",
"energy_efficiency_rating": 4,
"environmental_efficiency_rating": 4
}
],
"has_hot_water_cylinder": "true",
"heating_cost_potential": {
"value": 410,
"currency": "GBP"
},
"hot_water_cost_current": {
"value": 124,
"currency": "GBP"
},
"suggested_improvements": [
{
"sequence": 1,
"typical_saving": {
"value": 50,
"currency": "GBP"
},
"indicative_cost": "\u00a34,000 - \u00a36,000",
"improvement_type": "N",
"improvement_details": {
"improvement_number": 19
},
"improvement_category": 5,
"energy_performance_rating": 82,
"environmental_impact_rating": 84
}
],
"co2_emissions_potential": 0.8,
"energy_rating_potential": 82,
"lighting_cost_potential": {
"value": 41,
"currency": "GBP"
},
"schema_version_original": "LIG-17.0",
"hot_water_cost_potential": {
"value": 66,
"currency": "GBP"
},
"is_in_smoke_control_area": "unknown",
"renewable_heat_incentive": {
"rhi_new_dwelling": {
"space_heating": 2190,
"water_heating": 1600
}
},
"seller_commission_report": "Y",
"energy_consumption_current": 150,
"has_fixed_air_conditioning": "false",
"multiple_glazed_percentage": 100,
"calculation_software_version": "4.05r02",
"energy_consumption_potential": 99,
"environmental_impact_current": 78,
"current_energy_efficiency_band": "C",
"environmental_impact_potential": 84,
"has_heated_separate_conservatory": "false",
"potential_energy_efficiency_band": "B",
"co2_emissions_current_per_floor_area": 25
}

View file

@ -18,7 +18,14 @@ habitable_room_count, multiple_glazing_type). Fail loud at from_dict (correct):
## 🐞 BUGS FOUND (diagnosed; NOT fixed — for your review)
### 1. Opening dimensions in millimetres read as metres — MAPPER bug (SEVERE)
### 1. Opening dimensions in millimetres read as metres — MAPPER bug (SEVERE) ✅ FIXED
**FIXED** (TDD, `_sanitise_opening_dimension_m` + `_sap_opening_area_m2`): any opening
dimension > 50 m is mm → ÷1000. Applied to windows, roof windows, and the door
area-weighting. The 3 broken certs now score 90 / 81 / 79 (were all 1). 3 RED→GREEN
slices + refactor; fixture sap_17_1_mm_openings.json; 0 new pyright errors.
Original report below.
Full-SAP certs whose `sap_building_parts[].sap_openings[]` lodge width/height in
**mm** (mixed with metre rows in the same array) → the 17.1 mapper treats all as
metres → multi-million-m² windows → HTC in the millions → **SAP clamps to 1**.