diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 2b7cd2e6..ee4018ed 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -35,8 +35,13 @@ import subprocess from pathlib import Path from typing import cast +import pytest + from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor -from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.domain.mapper import ( + EpcPropertyDataMapper, + UnmappedElmhurstLabel, +) from domain.sap10_calculator.calculator import calculate_sap_from_inputs from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs from domain.sap10_calculator.worksheet.tests import ( @@ -817,6 +822,60 @@ def test_summary_2636_full_chain_sap_within_spec_floor_of_worksheet() -> None: assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE +def test_summary_mapper_raises_on_unmapped_cylinder_size_label() -> None: + # Arrange — start from a real cohort cert (any extracted site + # notes) and inject an unmapped §15.1 "Cylinder Size" label + # ("Tiny" — not in the lookup dict). `from_elmhurst_site_notes` + # must raise `UnmappedElmhurstLabel` rather than silently + # returning None for `cylinder_size` (the failure mode that hid + # cert 9418's "Large" miss until Slice S0380.14 surfaced it as + # a Δ +2.60 SAP gap). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + site_notes.water_heating.cylinder_size_label = "Tiny" + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + assert excinfo.value.field == "cylinder_size" + assert excinfo.value.value == "Tiny" + + +def test_summary_mapper_raises_on_unmapped_cylinder_insulation_label() -> None: + # Arrange — mirror test for the §15.1 "Insulated" label dict. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + site_notes.water_heating.cylinder_insulation_label = "Polyester wool" + + # Act / Assert + with pytest.raises(UnmappedElmhurstLabel) as excinfo: + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + assert excinfo.value.field == "cylinder_insulation" + assert excinfo.value.value == "Polyester wool" + + +def test_all_seven_ashp_cohort_certs_extract_without_unmapped_label_raise() -> None: + # Arrange — coverage forcing function: every cohort cert must + # extract through `from_elmhurst_site_notes` without triggering an + # `UnmappedElmhurstLabel` raise from any strict helper. New cohort + # certs added in subsequent slices fall under the same gate, and + # any future Elmhurst-PDF variant with an unmapped label fails + # this test until the missing dict entry is added. + cohort_pdfs = ( + _SUMMARY_000899_PDF, _SUMMARY_000903_PDF, _SUMMARY_000900_PDF, + _SUMMARY_000898_PDF, _SUMMARY_000901_PDF, _SUMMARY_000904_PDF, + _SUMMARY_000902_PDF, + ) + + # Act / Assert + for pdf in cohort_pdfs: + pages = _summary_pdf_to_textract_style_pages(pdf) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + # Strict mapper run — raises if any cylinder helper hits an + # unknown label. + EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + def test_summary_9418_large_cylinder_routes_to_code_4() -> None: # Arrange — cert 9418-3062-8205-3566-7200's Summary §15.1 lodges # "Cylinder Size: Large". The dr87 worksheet lodges "Cylinder diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 3b6869c0..0a0d5e81 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3373,12 +3373,39 @@ _ELMHURST_GAS_BOILER_FUEL_TYPES: frozenset[str] = frozenset({ }) +class UnmappedElmhurstLabel(ValueError): + """An Elmhurst Summary lodged a finite-enum label that the mapper + does not yet know how to translate to the SAP10 cascade enum. + + Raised by the strict Elmhurst label-to-enum helpers (cylinder size, + cylinder insulation, …) to surface mapper-coverage gaps at the + extraction boundary instead of silently returning None and letting + the cascade default to a wrong-but-not-obviously-wrong value + downstream. Caught the cert 9418 "Large" cylinder regression that + Slice S0380.14 fixed — strict raising would have caught it at the + first cohort probe instead of after a SAP-delta investigation. + + Distinguish "lodging absent" (helper returns None — correct) from + "lodging present but unrecognised" (raise — fixture variant the + mapper doesn't yet cover, needs a dict entry added). + """ + + def __init__(self, field: str, value: str) -> None: + super().__init__( + f"unmapped Elmhurst {field} label: {value!r}; " + f"add an entry to the corresponding mapper lookup dict" + ) + self.field = field + self.value = value + + # Elmhurst Summary §15.1 "Cylinder Size" labels mapped to the SAP10 # cascade enum that `domain/sap10_calculator/rdsap/cert_to_inputs.py` # `_CYLINDER_SIZE_CODE_TO_LITRES` keys ({3: 160.0, 4: 210.0}). Exercised # by the cohort: "Medium" (cert 0380 et al — 160 L) and "Large" (cert # 9418 — 210 L). "Small" and "Very Large" labels are deferred until a -# fixture exercises them. +# fixture exercises them — when encountered they raise +# `UnmappedElmhurstLabel` rather than silently returning None. _ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10: Dict[str, int] = { "Medium": 3, "Large": 4, @@ -3389,8 +3416,8 @@ _ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10: Dict[str, int] = { # `cylinder_insulation_type` cascade enum. Cascade enum 1 # (factory) is exercised by the cohort (cert 0380 lodges "Foam", # which SAP 10.2 Table 2 Note 2 treats as factory-applied PU foam). -# Other labels (Loose Jacket, None) are deferred until a fixture -# exercises them. +# Other labels (Loose Jacket, None) raise `UnmappedElmhurstLabel` +# until a fixture exercises them. _ELMHURST_CYLINDER_INSULATION_LABEL_TO_SAP10: Dict[str, int] = { "Foam": 1, } @@ -3401,13 +3428,17 @@ def _elmhurst_cylinder_size_code( ) -> Optional[int]: """Map an Elmhurst §15.1 "Cylinder Size" label to the SAP10 cascade enum. Returns None when no cylinder is present or the - label is missing/unknown — the cascade's - `_int_or_none(cylinder_size) → None` then routes the cert off the - Table 2/2a/2b storage-loss path (correct for combis / instantaneous - HW; wrong for HP-with-cylinder certs until a label is mapped).""" + label is genuinely absent (no §15.1 lodging). Raises + `UnmappedElmhurstLabel` when the label IS lodged but isn't in + `_ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10` — that's a mapper-coverage + gap that should be made explicit so the next fixture forces a dict + entry, not silently routed off the HW-with-cylinder cascade path.""" if not cylinder_present or cylinder_size_label is None: return None - return _ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10.get(cylinder_size_label) + code = _ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10.get(cylinder_size_label) + if code is None: + raise UnmappedElmhurstLabel("cylinder_size", cylinder_size_label) + return code def _elmhurst_cylinder_insulation_code( @@ -3415,10 +3446,15 @@ def _elmhurst_cylinder_insulation_code( ) -> Optional[int]: """Map an Elmhurst §15.1 "Insulated" label to the SAP10 `cylinder_insulation_type` cascade enum. Returns None when no - cylinder is present or the label is missing/unknown.""" + cylinder is present or the label is genuinely absent. Raises + `UnmappedElmhurstLabel` when the label IS lodged but isn't in the + mapping dict — see `_elmhurst_cylinder_size_code` rationale.""" if not cylinder_present or cylinder_insulation_label is None: return None - return _ELMHURST_CYLINDER_INSULATION_LABEL_TO_SAP10.get(cylinder_insulation_label) + code = _ELMHURST_CYLINDER_INSULATION_LABEL_TO_SAP10.get(cylinder_insulation_label) + if code is None: + raise UnmappedElmhurstLabel("cylinder_insulation", cylinder_insulation_label) + return code def _elmhurst_main_heating_category(