diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 70bf7d61..0f1b8bad 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1,4 +1,5 @@ import copy +import logging import re from dataclasses import replace from datetime import date @@ -965,13 +966,32 @@ class EpcPropertyDataMapper: @staticmethod def _from_sap_schema_16_x(data: Dict[str, Any]) -> EpcPropertyData: """Shared body for the SAP-Schema-16.x dedicated mappers: normalise the - reduced-field doc onto the RdSAP-17.1 shape and reuse that mapper.""" + reduced-field doc onto the RdSAP-17.1 shape and reuse that mapper. + + Only the reduced-RdSAP shape reaches here. A full-SAP cert mis-lodged + under a SAP-Schema-16.x label (the `LIG-*` as-designed new-builds) is a + *broken schema type* — `from_api_response` detects the full-SAP shape and + routes it to the full-SAP mapper before this label dispatch (see + `_is_full_sap_assessment`).""" from datatypes.epc.schema.rdsap_schema_17_1 import RdSapSchema17_1 return EpcPropertyDataMapper.from_rdsap_schema_17_1( from_dict(RdSapSchema17_1, _normalize_sap_schema_16_x(data)) ) + @staticmethod + def _from_full_sap(data: Dict[str, Any]) -> EpcPropertyData: + """Map a cert whose *structure* is full-SAP (measured `sap_opening_types`) + via the full-SAP 17.1 mapper, regardless of its `schema_type` label — the + door/window/fabric come from the real measured openings, no reduced-field + defaulting. The only `SapSchema17_1` field the broken-label (16.x) full-SAP + shape omits is `tenure` (register metadata, no SAP effect), defaulted.""" + normalised = copy.deepcopy(data) + normalised.setdefault("tenure", "unknown") + return EpcPropertyDataMapper.from_sap_schema_17_1( + from_dict(SapSchema17_1, normalised) + ) + @staticmethod def from_rdsap_schema_17_1(schema: RdSapSchema17_1) -> EpcPropertyData: es = schema.sap_energy_source @@ -2635,7 +2655,20 @@ class EpcPropertyDataMapper: data = _normalize_shower_outlets(data) data = _default_missing_post_town(data) schema = data.get("schema_type", "") - if schema == "RdSAP-Schema-21.0.1": + + # Shape over label. The `schema_type` is a LABEL that can disagree with the + # cert's STRUCTURE. A cert that is structurally full-SAP (measured + # `sap_opening_types`) is mapped by the full-SAP mapper whatever its label + # claims. When the label is NOT a recognised full-SAP schema it is a + # *broken schema type* — e.g. the LIG as-designed certs mis-lodged under + # SAP-Schema-16.x — recorded so the unreliable labels stay visible and + # coverage grows as new shapes surface. Correctly-labelled full-SAP certs + # fall through to their own dedicated branch below, keeping the + # one-mapper-per-schema convention. + if _is_full_sap_assessment(data) and not _is_full_sap_label(schema): + _record_broken_schema_type(schema, data) + mapped = EpcPropertyDataMapper._from_full_sap(data) + elif schema == "RdSAP-Schema-21.0.1": from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1 mapped = EpcPropertyDataMapper.from_rdsap_schema_21_0_1( @@ -3317,6 +3350,60 @@ def _derive_built_form_16x(dwelling_type: Any) -> int: return 4 # flats / unstated form → modal built_form (SAP- and gate-neutral) +logger = logging.getLogger(__name__) + +# The recognised full-SAP schema_type labels: a full-SAP *shape* under one of +# these is correctly labelled, not a broken schema type. Anything else carrying +# the full-SAP shape is a mislabel (e.g. the LIG as-designed certs lodged as +# SAP-Schema-16.x). +_FULL_SAP_SCHEMA_LABELS: frozenset[str] = frozenset( + { + "SAP-Schema-17.0", + "SAP-Schema-17.1", + "SAP-Schema-18.0.0", + "SAP-Schema-19.1.0", + } +) + + +def _is_full_sap_assessment(data: Dict[str, Any]) -> bool: + """Whether a cert is a full-SAP assessment (vs reduced RdSAP), keyed on the + gov-API's own `assessment_type` declaration — the authoritative SAP-vs-RdSAP + classification, not a structural proxy. A full-SAP assessment lodges fabric + as measured openings + measured U-values; a reduced RdSAP one carries the + top-level count fields. + + `assessment_type` separates the entire cert corpus cleanly — every full-SAP + schema (SAP-Schema-17.x/18.x and the broken `LIG` 16.x) is `"SAP"`, every + reduced cert (RdSAP-Schema-* and the reduced SAP-Schema-16.x) is `"RdSAP"` — + and it is independent of the `schema_type` LABEL, which can be broken. The + structural signals (`data_type`, `sap_opening_types`) agree with it on the + corpus but are derived shape artifacts; `assessment_type` is the meaning.""" + return data.get("assessment_type") == "SAP" + + +def _is_full_sap_label(schema: str) -> bool: + """Whether `schema` is a recognised full-SAP `schema_type` — so a full-SAP + *shape* under it is correctly labelled, not a broken schema type.""" + return schema in _FULL_SAP_SCHEMA_LABELS + + +def _record_broken_schema_type(schema: str, data: Dict[str, Any]) -> None: + """Surface a *broken schema type*: a cert whose `schema_type` label disagrees + with its structure — the label is not a full-SAP schema, yet the cert is + structurally full-SAP. Logged rather than silently rerouted so the unreliable + labels and their frequency stay visible (mirrors the skipped-cohort-cert + capture), and coverage can grow as new mislabelled shapes surface. e.g. the + LIG as-designed certs lodged under SAP-Schema-16.x.""" + logger.warning( + "broken schema_type %r: structurally full-SAP " + "(assessment_type=%r) — routing to the full-SAP mapper (uprn=%s)", + schema, + data.get("assessment_type"), + data.get("uprn"), + ) + + def _normalize_sap_schema_16_x(data: Dict[str, Any]) -> Dict[str, Any]: """Rewrite a `SAP-Schema-16.2`/`16.3` API doc onto the `RdSAP-Schema-17.1` shape so it can reuse the tested `from_rdsap_schema_17_1` mapper. diff --git a/datatypes/epc/domain/tests/test_from_sap_schema.py b/datatypes/epc/domain/tests/test_from_sap_schema.py index d4b3330b..25acf566 100644 --- a/datatypes/epc/domain/tests/test_from_sap_schema.py +++ b/datatypes/epc/domain/tests/test_from_sap_schema.py @@ -11,6 +11,7 @@ exercise the shape variation the design decisions hinge on """ import json +import logging import os from typing import Any, Dict @@ -682,3 +683,71 @@ class TestFromSapSchema16_2: EpcPropertyDataMapper.from_api_response(data) assert "window" not in data # the rename landed only on the copy assert set(data.keys()) == before_keys + + +class TestFullSapSchema16xRouting: + """The `SAP-Schema-16.x` name covers two structurally different certs: the + reduced RdSAP shape (top-level `door_count`/`glazed_area`) AND full SAP (the + as-designed `LIG-*` new-builds — `assessment_type=SAP`, measured + `sap_opening_types` + structured `sap_building_parts`, doors lodged as + openings not a top-level count). Dispatch must route each to its own mapper; + sending a full-SAP cert through the reduced normaliser failed loud with + `RdSapSchema17_1: missing required field 'door_count'`.""" + + def test_full_sap_16_x_cert_maps_via_full_sap_path(self) -> None: + # Arrange — a real full-SAP cert lodged under SAP-Schema-16.0 (cert + # 0293-3880-6151-9872-8795, lodged 83) that previously raised the + # missing-door_count ValueError in the reduced normaliser. + data = load("sap_16_0_full.json") + + # Act + epc = EpcPropertyDataMapper.from_api_response(data) + + # Assert — mapped from the real measured openings, not reduced-field + # defaults: the single lodged door opening (0.9 x 2.1 m) → door_count 1. + assert isinstance(epc, EpcPropertyData) + assert epc.door_count == 1 + assert epc.dwelling_type == "Detached house" + + def test_reduced_16_x_cert_unaffected_by_full_sap_routing(self) -> None: + # Arrange — a reduced 16.2 cert (assessment_type RdSAP) must stay on the + # RdSAP path, keeping its top-level property_type. + data = load("sap_16_2.json") + + # Act + epc = EpcPropertyDataMapper.from_api_response(data) + + # Assert + assert epc.property_type is not None + + def test_broken_schema_type_is_recorded( + self, caplog: pytest.LogCaptureFixture + ) -> None: + # Arrange — a full-SAP cert mislabelled as SAP-Schema-16.0: the label + # disagrees with the assessment_type, so the mismatch must be surfaced + # (not silently rerouted). + data = load("sap_16_0_full.json") + + # Act + with caplog.at_level(logging.WARNING, logger="datatypes.epc.domain.mapper"): + EpcPropertyDataMapper.from_api_response(data) + + # Assert + assert any( + "broken schema_type" in r.message and "SAP-Schema-16.0" in r.message + for r in caplog.records + ) + + def test_correctly_labelled_full_sap_is_not_recorded_as_broken( + self, caplog: pytest.LogCaptureFixture + ) -> None: + # Arrange — a correctly-labelled full-SAP cert: assessment_type SAP AND a + # recognised full-SAP label, so no mismatch to record. + data = load("sap_17_1.json") + + # Act + with caplog.at_level(logging.WARNING, logger="datatypes.epc.domain.mapper"): + EpcPropertyDataMapper.from_api_response(data) + + # Assert + assert not any("broken schema_type" in r.message for r in caplog.records) diff --git a/datatypes/epc/schema/tests/fixtures/sap_16_0_full.json b/datatypes/epc/schema/tests/fixtures/sap_16_0_full.json new file mode 100644 index 00000000..b665cab2 --- /dev/null +++ b/datatypes/epc/schema/tests/fixtures/sap_16_0_full.json @@ -0,0 +1,435 @@ +{ + "uprn": 10008075674, + "roofs": [ + { + "description": "Average thermal transmittance 0.11 W/m\u00b2K", + "energy_efficiency_rating": 5, + "environmental_efficiency_rating": 5 + } + ], + "walls": [ + { + "description": "Average thermal transmittance 0.21 W/m\u00b2K", + "energy_efficiency_rating": 5, + "environmental_efficiency_rating": 5 + } + ], + "floors": [ + { + "description": "Average thermal transmittance 0.12 W/m\u00b2K", + "energy_efficiency_rating": 5, + "environmental_efficiency_rating": 5 + } + ], + "status": "entered", + "windows": { + "description": "High performance glazing", + "energy_efficiency_rating": 5, + "environmental_efficiency_rating": 5 + }, + "lighting": { + "description": "Low energy lighting in all fixed outlets", + "energy_efficiency_rating": 5, + "environmental_efficiency_rating": 5 + }, + "postcode": "PE2 9HD", + "data_type": 2, + "hot_water": { + "description": "From main system, plus solar", + "energy_efficiency_rating": 5, + "environmental_efficiency_rating": 5 + }, + "post_town": "PETERBOROUGH", + "built_form": 1, + "created_at": "2012-05-30 13:30:25.000000", + "living_area": 18.011, + "orientation": 7, + "region_code": 2, + "report_type": 3, + "sap_heating": { + "thermal_store": 1, + "has_solar_panel": "true", + "water_fuel_type": 1, + "solar_store_volume": 100, + "water_heating_code": 901, + "hot_water_store_size": 210, + "main_heating_details": [ + { + "burner_control": 3, + "main_fuel_type": 1, + "efficiency_type": 3, + "heat_emitter_type": 1, + "main_heating_code": 102, + "is_flue_fan_present": "false", + "main_heating_number": 1, + "is_condensing_boiler": "true", + "main_heating_control": 2110, + "is_interlocked_system": "true", + "main_heating_category": 2, + "main_heating_fraction": 1, + "gas_or_oil_boiler_type": 1, + "main_heating_flue_type": 2, + "main_heating_efficiency": 89, + "main_heating_data_source": 2, + "has_delayed_start_thermostat": "false", + "load_or_weather_compensation": 0, + "is_central_heating_pump_in_heated_space": "true" + } + ], + "has_hot_water_cylinder": "true", + "has_solar_powered_pump": "false", + "has_cylinder_thermostat": "true", + "solar_panel_aperture_area": 4.51, + "has_fixed_air_conditioning": "false", + "secondary_heating_category": 1, + "solar_panel_collector_type": 2, + "is_cylinder_in_heated_space": "true", + "solar_panel_collector_pitch": 2, + "is_hot_water_separately_timed": "true", + "is_primary_pipework_insulated": "true", + "hot_water_store_insulation_type": 1, + "hot_water_store_heat_loss_source": 3, + "is_solar_store_combined_cylinder": "true", + "solar_panel_collector_data_source": 2, + "solar_panel_collector_orientation": 5, + "solar_panel_collector_overshading": 1, + "hot_water_store_insulation_thickness": 85, + "solar_panel_collector_heat_loss_rate": 3.681, + "solar_panel_collector_zero_loss_efficiency": 77 + }, + "sap_version": 9.9, + "schema_type": "SAP-Schema-16.0", + "uprn_source": "Energy Assessor", + "country_code": "EAW", + "main_heating": [ + { + "description": "Boiler and radiators, mains gas", + "energy_efficiency_rating": 4, + "environmental_efficiency_rating": 4 + } + ], + "air_tightness": { + "description": "Air permeability 4.9 m\u00b3/h.m\u00b2 (as tested)", + "energy_efficiency_rating": 4, + "environmental_efficiency_rating": 4 + }, + "dwelling_type": "Detached house", + "language_code": 1, + "property_type": 0, + "address_line_1": "43, New Road", + "address_line_2": "Woodston", + "schema_version": "LIG-15.0", + "assessment_date": "2012-05-30", + "assessment_type": "SAP", + "completion_date": "2012-05-30", + "inspection_date": "2012-05-30", + "sap_ventilation": { + "psv_count": 0, + "pressure_test": 1, + "air_permeability": 4.88, + "open_flues_count": 0, + "ventilation_type": 1, + "extract_fans_count": 3, + "open_fireplaces_count": 0, + "sheltered_sides_count": 2, + "flueless_gas_fires_count": 0 + }, + "design_water_use": 1, + "total_floor_area": 76, + "transaction_type": 6, + "conservatory_type": 1, + "registration_date": "2012-05-30", + "restricted_access": 0, + "sap_energy_source": { + "electricity_tariff": 1, + "wind_turbines_count": 0, + "wind_turbine_terrain_type": 2, + "fixed_lighting_outlets_count": 0, + "low_energy_fixed_lighting_outlets_count": 0, + "low_energy_fixed_lighting_outlets_percentage": 100 + }, + "sap_opening_types": [ + { + "name": "Door (1)", + "type": 1, + "u_value": 1.8, + "data_source": 2, + "description": "Data from Manufacturer", + "glazing_type": 1 + }, + { + "name": "Windows (1)", + "type": 4, + "u_value": 1.5, + "data_source": 2, + "description": "Data from Manufacturer", + "frame_factor": 0.7, + "glazing_type": 7, + "solar_transmittance": 0.63 + } + ], + "secondary_heating": { + "description": "None", + "energy_efficiency_rating": 0, + "environmental_efficiency_rating": 0 + }, + "lzc_energy_sources": [ + 10 + ], + "sap_building_parts": [ + { + "sap_roofs": [ + { + "name": "main roof", + "u_value": 0.11, + "roof_type": 2, + "kappa_value": 9, + "total_roof_area": 38.65 + }, + { + "name": "flat roof", + "u_value": 0.19, + "roof_type": 2, + "kappa_value": 9, + "total_roof_area": 1.39 + } + ], + "sap_walls": [ + { + "name": "brick wall", + "u_value": 0.21, + "wall_type": 2, + "kappa_value": 150, + "total_wall_area": 115.98, + "is_curtain_walling": "false" + }, + { + "name": "downstairs", + "u_value": 0, + "wall_type": 5, + "kappa_value": 9, + "total_wall_area": 44.7 + }, + { + "name": "upper", + "u_value": 0, + "wall_type": 5, + "kappa_value": 9, + "total_wall_area": 52.99 + } + ], + "identifier": "Main Dwelling", + "overshading": 2, + "sap_openings": [ + { + "name": 1, + "type": "Door (1)", + "width": 0.9, + "height": 2.1, + "location": "brick wall", + "orientation": 7 + }, + { + "name": 2, + "type": "Windows (1)", + "width": 0.665, + "height": 1.05, + "location": "brick wall", + "orientation": 1 + }, + { + "name": 3, + "type": "Windows (1)", + "width": 2.09, + "height": 1.05, + "location": "brick wall", + "orientation": 7 + }, + { + "name": 4, + "type": "Windows (1)", + "width": 0.665, + "height": 1.05, + "location": "brick wall", + "orientation": 5 + }, + { + "name": 5, + "type": "Windows (1)", + "width": 2.5, + "height": 2.1, + "location": "brick wall", + "orientation": 3 + }, + { + "name": 6, + "type": "Windows (1)", + "width": 1.05, + "height": 1.425, + "location": "brick wall", + "orientation": 7 + }, + { + "name": 7, + "type": "Windows (1)", + "width": 0.6, + "height": 1.2, + "location": "brick wall", + "orientation": 1 + }, + { + "name": 8, + "type": "Windows (1)", + "width": 0.8, + "height": 1.425, + "location": "brick wall", + "orientation": 3 + }, + { + "name": 9, + "type": "Windows (1)", + "width": 0.8, + "height": 1.425, + "location": "brick wall", + "orientation": 3 + }, + { + "name": 10, + "type": "Windows (1)", + "width": 0.6, + "height": 1.05, + "location": "brick wall", + "orientation": 5 + } + ], + "construction_year": 2012, + "sap_thermal_bridges": { + "thermal_bridges": [ + { + "length": 9.77, + "psi_value": 0.3, + "psi_value_source": 4, + "thermal_bridge_type": 2 + }, + { + "length": 7.27, + "psi_value": 0.04, + "psi_value_source": 4, + "thermal_bridge_type": 3 + }, + { + "length": 23.55, + "psi_value": 0.05, + "psi_value_source": 4, + "thermal_bridge_type": 4 + }, + { + "length": 27.18, + "psi_value": 0.16, + "psi_value_source": 4, + "thermal_bridge_type": 5 + }, + { + "length": 27.18, + "psi_value": 0.07, + "psi_value_source": 4, + "thermal_bridge_type": 6 + }, + { + "length": 8.68, + "psi_value": 0.06, + "psi_value_source": 4, + "thermal_bridge_type": 10 + }, + { + "length": 17.17, + "psi_value": 0.24, + "psi_value_source": 4, + "thermal_bridge_type": 12 + }, + { + "length": 21.04, + "psi_value": 0.09, + "psi_value_source": 4, + "thermal_bridge_type": 16 + } + ], + "thermal_bridge_code": 5 + }, + "building_part_number": 1, + "sap_floor_dimensions": [ + { + "storey": 0, + "u_value": 0.12, + "floor_type": 2, + "storey_height": 2.55, + "heat_loss_area": 38.65, + "total_floor_area": 38.65, + "kappa_value_from_below": 18 + }, + { + "storey": 1, + "u_value": 0, + "floor_type": 3, + "kappa_value": 18, + "storey_height": 2.71, + "heat_loss_area": 1.3900032, + "total_floor_area": 37.26, + "kappa_value_from_below": 18 + } + ] + } + ], + "bedf_revision_number": 323, + "heating_cost_current": 260, + "co2_emissions_current": 1.2, + "energy_rating_average": 60, + "energy_rating_current": 83, + "lighting_cost_current": 43, + "main_heating_controls": [ + { + "description": "Time and temperature zone control", + "energy_efficiency_rating": 5, + "environmental_efficiency_rating": 5 + } + ], + "has_hot_water_cylinder": "true", + "heating_cost_potential": 251, + "hot_water_cost_current": 37, + "suggested_improvements": [ + { + "sequence": 1, + "typical_saving": 219, + "indicative_cost": "\u00a311,000 - \u00a320,000", + "improvement_type": "U", + "improvement_details": { + "improvement_number": 34 + }, + "improvement_category": 3, + "energy_performance_rating": 94, + "environmental_impact_rating": 97 + } + ], + "co2_emissions_potential": 1.2, + "energy_rating_potential": 83, + "lighting_cost_potential": 43, + "hot_water_cost_potential": 47, + "is_in_smoke_control_area": "unknown", + "renewable_heat_incentive": { + "rhi_new_dwelling": { + "space_heating": 3010, + "water_heating": 2175 + } + }, + "seller_commission_report": "Y", + "energy_consumption_current": 84, + "has_fixed_air_conditioning": "false", + "calculation_software_version": "Version: 1.4.0.80", + "energy_consumption_potential": 84, + "environmental_impact_current": 87, + "current_energy_efficiency_band": "B", + "environmental_impact_potential": 87, + "has_heated_separate_conservatory": "false", + "potential_energy_efficiency_band": "B", + "co2_emissions_current_per_floor_area": 16 +} \ No newline at end of file