From 85d6f8468cf604867ce382384901f34ef8382ca0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Jun 2026 22:16:21 +0000 Subject: [PATCH] feat(elmhurst-extractor): capture section 15.1 Immersion Heater (Dual/Single) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Elmhurst Summary section 15.1 "Hot Water Cylinder" block lodges "Immersion Heater: Dual" / "Single"; the extractor dropped it, so the Summary path left immersion_heating_type = None while the API path already captured it. Capturing it drives SAP Table 13's high-rate-fraction DHW-cost split (RdSAP 10 section 10.5 p.54: 1 = dual, 2 = single) and brings the two front-ends to parity. Three-file change: WaterHeating.immersion_type field + _extract_water_heating parse (scoped to the 15.1..15.2 slice) + _elmhurst_immersion_type_code mapper (strict-raise on an unmapped label, mirroring _elmhurst_cylinder_insulation_code). Safe to land now that the preceding commit zeroes the high-rate fraction for 18-/24-hour tariffs: the 20 solid-fuel corpus certs (solid fuel 4-11: WHC 903 dual immersion, 18-hour meter, 110 L) carry a dual immersion, but their 18-hour tariff bills 100% low-rate per Table 12a's 7-/10-hour scope — so they stay EXACT instead of regressing to the 10-hour-column ~0.10. 7-/10-hour Summary immersion certs now correctly cost the Table 13 high-rate fraction instead of falling to the immersion=None 100%-low default. Regression gate green (3 pre-existing fails unrelated); API gauge unchanged (Summary-path-only): 57.6% within 0.5, mean|err| 1.185. Co-Authored-By: Claude Opus 4.8 --- .../documents_parser/elmhurst_extractor.py | 4 +++ .../tests/test_heating_systems_corpus.py | 18 ++++++++++ .../tests/test_summary_pdf_mapper_chain.py | 32 +++++++++++++++++ datatypes/epc/domain/mapper.py | 34 +++++++++++++++++++ datatypes/epc/surveys/elmhurst_site_notes.py | 5 +++ 5 files changed, 93 insertions(+) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index e2a5f1e7..87075f59 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1531,6 +1531,9 @@ class ElmhurstSiteNotesExtractor: if cylinder_thermostat is None: if "Cylinder thermostat (Already installed)" in self._lines: cylinder_thermostat = True + # §15.1 "Immersion Heater" lodging ("Dual" / "Single"). Scoped to + # the §15.1..§15.2 slice so the lookup can't collide elsewhere. + immersion_type = self._local_val(cylinder_lines, "Immersion Heater") return WaterHeating( water_heating_code=self._str_val("Water Heating Code"), water_heating_sap_code=self._int_val("Water Heating SapCode"), @@ -1540,6 +1543,7 @@ class ElmhurstSiteNotesExtractor: cylinder_insulation_label=cylinder_insulation_label, cylinder_insulation_thickness_mm=cylinder_insulation_thickness_mm, cylinder_thermostat=cylinder_thermostat, + immersion_type=immersion_type, ) def _extract_baths_and_showers(self) -> BathsAndShowers: diff --git a/backend/documents_parser/tests/test_heating_systems_corpus.py b/backend/documents_parser/tests/test_heating_systems_corpus.py index ab7889e4..da85136a 100644 --- a/backend/documents_parser/tests/test_heating_systems_corpus.py +++ b/backend/documents_parser/tests/test_heating_systems_corpus.py @@ -960,6 +960,24 @@ def test_heating_systems_corpus_residual_matches_pin( ) +def test_solid_fuel_5_captures_section_15_1_dual_immersion() -> None: + # Arrange — solid fuel 5 (cert 001431: House Coal main SAP 153, WHC 903 + # electric immersion HW, 18-hour meter, 110 L Normal cylinder). The + # Elmhurst Summary §15.1 "Hot Water Cylinder" block lodges "Immersion + # Heater: Dual". The extractor must surface it and the mapper map it to + # the SAP10 `immersion_heating_type` code 1 (dual) per RdSAP 10 §10.5. + summary_pdf, _p960 = _variant_paths('solid fuel 5') + pages = _summary_pdf_to_textract_style_pages(summary_pdf) + + # Act + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert site_notes.water_heating.immersion_type == "Dual" + assert epc.sap_heating.immersion_heating_type == 1 + + def test_oil_6_no_room_thermostat_applies_table_4c2_minus_5pp_space_efficiency() -> None: # Arrange — oil 6 (B30K standard liquid-fuel boiler, Table 4b code # 126 winter 80 / summer 68) lodges "Main Heating Controls Sap: SAP diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 5a0d47d2..f89c4c72 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -43,6 +43,7 @@ from datatypes.epc.domain.mapper import ( UnmappedApiCode, UnmappedElmhurstLabel, _elmhurst_glazing_type_code, # pyright: ignore[reportPrivateUsage] + _elmhurst_immersion_type_code, # pyright: ignore[reportPrivateUsage] ) from domain.sap10_calculator.calculator import calculate_sap_from_inputs from domain.sap10_calculator.rdsap.cert_to_inputs import ( @@ -1539,6 +1540,37 @@ def test_summary_mapper_raises_on_unmapped_cylinder_insulation_label() -> None: assert excinfo.value.value == "Polyester wool" +def test_elmhurst_immersion_type_code_maps_dual_and_single() -> None: + # Arrange — Elmhurst Summary §15.1 "Immersion Heater" lodges "Dual" + # or "Single". RdSAP 10 §10.5 (PDF p.54): an immersion is "assumed + # dual" on a dual/off-peak meter; the SAP10 cascade code is 1 = dual, + # 2 = single (cert_to_inputs `_IMMERSION_TYPE_DUAL`). + + # Act + dual = _elmhurst_immersion_type_code("Dual", cylinder_present=True) + single = _elmhurst_immersion_type_code("Single", cylinder_present=True) + no_cylinder = _elmhurst_immersion_type_code("Dual", cylinder_present=False) + absent = _elmhurst_immersion_type_code(None, cylinder_present=True) + + # Assert + assert dual == 1 + assert single == 2 + assert no_cylinder is None + assert absent is None + + +def test_elmhurst_immersion_type_code_raises_on_unmapped_label() -> None: + # Arrange — a lodged §15.1 "Immersion Heater" label outside the + # {Dual, Single} set must strict-raise (mirror of the cylinder-size / + # cylinder-insulation helpers) rather than silently drop the field. + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + _elmhurst_immersion_type_code("Triple", cylinder_present=True) + assert excinfo.value.field == "immersion_type" + assert excinfo.value.value == "Triple" + + def test_all_seven_ashp_cohort_certs_extract_without_unmapped_label_raise() -> None: # Arrange — coverage forcing function: every cohort cert must # extract through `from_elmhurst_site_notes` without triggering an diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 39c3365b..e5c794c8 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -5167,6 +5167,15 @@ _ELMHURST_CYLINDER_INSULATION_LABEL_TO_SAP10: Dict[str, int] = { } +# Elmhurst §15.1 "Immersion Heater" label → SAP10 `immersion_heating_type` +# cascade code. RdSAP 10 §10.5 (PDF p.54) + cert_to_inputs +# `_IMMERSION_TYPE_DUAL`/`_IMMERSION_TYPE_SINGLE`: 1 = dual, 2 = single. +_ELMHURST_IMMERSION_TYPE_LABEL_TO_SAP10: Dict[str, int] = { + "Dual": 1, + "Single": 2, +} + + # Elmhurst §15.0 "Water Heating Fuel Type" labels that route to solid- # fuel Table 32 codes (Anthracite, House coal, Wood logs/pellets, etc.). # Used by `_resolve_elmhurst_inaccessible_cylinder_size` to detect the @@ -5282,6 +5291,23 @@ def _elmhurst_cylinder_insulation_code( return code +def _elmhurst_immersion_type_code( + immersion_type_label: Optional[str], cylinder_present: bool, +) -> Optional[int]: + """Map an Elmhurst §15.1 "Immersion Heater" label ("Dual" / "Single") + to the SAP10 `immersion_heating_type` cascade code (1 = dual, 2 = + single per RdSAP 10 §10.5 p.54). Returns None when no cylinder is + present or the label is genuinely absent. Raises `UnmappedElmhurstLabel` + when the label IS lodged but isn't in the mapping dict — same + strict-fallback as `_elmhurst_cylinder_insulation_code`.""" + if not cylinder_present or immersion_type_label is None: + return None + code = _ELMHURST_IMMERSION_TYPE_LABEL_TO_SAP10.get(immersion_type_label) + if code is None: + raise UnmappedElmhurstLabel("immersion_type", immersion_type_label) + return code + + def _resolve_elmhurst_inaccessible_cylinder_insulation( age_band: str, ) -> tuple[int, int]: @@ -5850,6 +5876,14 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: and survey.water_heating.cylinder_thermostat is not None else None ), + # §15.1 "Immersion Heater" (Dual / Single) → SAP10 + # `immersion_heating_type` code (1 = dual, 2 = single). Drives the + # Table 13 high-rate-fraction split for WHC-903 electric immersion + # DHW on a 7-/10-hour off-peak tariff (18-/24-hour bill 100% low). + immersion_heating_type=_elmhurst_immersion_type_code( + survey.water_heating.immersion_type, + survey.water_heating.hot_water_cylinder_present, + ), water_heating_code=survey.water_heating.water_heating_sap_code, water_heating_fuel=water_heating_fuel, secondary_heating_type=mh.secondary_heating_sap_code, diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index 2fa55acc..16464766 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -367,6 +367,11 @@ class WaterHeating: # §15.1 "Cylinder Thermostat" lodging (Yes / No). False or absent # keeps the cascade's no-thermostat Table 2b temperature factor. cylinder_thermostat: Optional[bool] = None + # §15.1 "Immersion Heater" lodging ("Dual" / "Single"). Drives the + # SAP10 `immersion_heating_type` code (1 = dual, 2 = single) used by + # the Table 13 high-rate-fraction DHW-cost split. None when no + # cylinder is present or the line is absent. + immersion_type: Optional[str] = None @dataclass