From 7f17de84aae4a7b3d93746fdd33d2d8e2e8fdfe8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 24 May 2026 20:13:19 +0000 Subject: [PATCH] Slice 49: Summary_000490 chain pins SAP at 1e-4; secondary heating + RdSAP sheltered-sides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two mapper extensions, both validated by 000490 closing to 1e-4: 1. Secondary heating extraction. Elmhurst Summary PDFs lodge the secondary heating SAP code in the §14.1 Main Heating2 sub-section (between "14.1 Main Heating2" and "14.1 Community Heating") — not in the §14.0 Main Heating1 block where the main system lives. `ElmhurstMainHeating` gains a `secondary_heating_sap_code` field; the extractor reads it from the right section; the mapper threads it through to `SapHeating.secondary_heating_type`. The cascade then applies Table 11's 10% secondary fraction. 2. Sheltered-sides derivation per RdSAP §S5. The Summary PDF doesn't lodge per-dwelling sheltered-sides; the value is derived from built-form (Detached=0, Semi-Detached=1, End-Terrace=1, Mid- Terrace=2, Enclosed Mid-Terrace=3, Enclosed End-Terrace=2). `_map_elmhurst_ventilation` now takes built_form and populates `SapVentilation.sheltered_sides`. The table is cross-checked against U985-0001-NNNNNN.pdf line (19) across the 6 worksheet fixtures. Cohort SAP deltas after this slice (target 1e-4): 000474 0.0000 ✓ Slice 47 000477 +2.6555 diagnosis pending (lighting bulb count diff) 000480 +4.1955 diagnosis pending 000487 +4.4553 extractor still drops most windows 000490 0.0000 ✓ THIS SLICE 000516 +1.5162 roof-window separation Pyright net-zero on touched files (35 errors, same baseline). 755 tests pass (up from 754 — new `test_summary_000490_full_chain_sap_ matches_worksheet_pdf_exactly`). Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 15 +++++++++ .../tests/test_summary_pdf_mapper_chain.py | 22 +++++++++++++ datatypes/epc/domain/mapper.py | 31 +++++++++++++++++-- datatypes/epc/surveys/elmhurst_site_notes.py | 5 +++ 4 files changed, 71 insertions(+), 2 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index a3449014..7b6ea38e 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -764,6 +764,20 @@ class ElmhurstSiteNotesExtractor: lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2") pct_raw = self._local_val(lines, "Percentage of Heat") pct = int(pct_raw.split()[0]) if pct_raw else 0 + # The "Secondary Heating SapCode" key is lodged inside §14.1 Main + # Heating2 — Elmhurst uses the Main-2 block to also carry the + # cert's secondary heating system (when one exists). Look for it + # in that section; absence (or "0") means no secondary lodged. + secondary_lines = self._section_lines( + "14.1 Main Heating2", "14.1 Community Heating" + ) + secondary_raw = self._local_val(secondary_lines, "Secondary Heating SapCode") + secondary_code = ( + int(secondary_raw) + if secondary_raw is not None and secondary_raw.isdigit() + and int(secondary_raw) > 0 + else None + ) return MainHeating( heat_emitter=self._local_str(lines, "Heat Emitter"), fuel_type=self._local_str(lines, "Fuel Type"), @@ -775,6 +789,7 @@ class ElmhurstSiteNotesExtractor: percentage_of_heat=pct, pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"), heat_pump_age=self._local_val(lines, "Heat pump age"), + secondary_heating_sap_code=secondary_code, ) def _extract_meters(self) -> Meters: diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 07047241..98dc0116 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -39,6 +39,7 @@ from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs _FIXTURES = Path(__file__).parent / "fixtures" _SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf" +_SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf" def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]: @@ -136,3 +137,24 @@ def test_summary_000474_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Elmhurst exactly and we expect identical outputs. worksheet_unrounded_sap = 62.2584 assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + +def test_summary_000490_full_chain_sap_matches_worksheet_pdf_exactly() -> None: + # Arrange — cert U985-0001-000490 is an end-terrace with main + + # 1st extension. The worksheet PDF lodges unrounded SAP 57.3979. + # End-terrace built-form drives sheltered_sides=1 (RdSAP §S5) and + # the cert's Summary §14.1 Main Heating2 sub-section carries a + # secondary heating SAP code (691, electric panel) — both required + # for the mapped chain to reproduce the worksheet to 1e-4. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000490_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + + # Assert + worksheet_unrounded_sap = 57.3979 + assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 62dc74ea..a667ee97 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -320,7 +320,7 @@ class EpcPropertyDataMapper: number_of_storeys=survey.number_of_storeys, hydro=survey.renewables.hydro_electricity_generated_kwh > 0, photovoltaic_array=survey.renewables.photovoltaic_panel != "None", - sap_ventilation=_map_elmhurst_ventilation(survey.ventilation), + sap_ventilation=_map_elmhurst_ventilation(survey.ventilation, built_form), percent_draughtproofed=survey.draught_proofing_percent, waste_water_heat_recovery=( "None" if not survey.renewables.wwhrs_present else "Present" @@ -2302,10 +2302,36 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: else survey.water_heating.water_heating_code ), water_heating_code=survey.water_heating.water_heating_sap_code, + secondary_heating_type=mh.secondary_heating_sap_code, ) -def _map_elmhurst_ventilation(v: ElmhurstVentilation) -> SapVentilation: +# RdSAP §S5 — default sheltered-sides count for each built-form code, +# applied when the cert doesn't lodge a per-dwelling value (Elmhurst's +# Summary PDF doesn't). The cohort uses the RdSAP convention: terraces +# pick up exposure-shielding from neighbours; detached/semi-detached +# don't. Values cross-checked against U985-0001-NNNNNN.pdf line (19) +# across the 6 Elmhurst worksheet fixtures. +_ELMHURST_SHELTERED_SIDES_BY_BUILT_FORM: Dict[str, int] = { + "Detached": 0, + "Semi-Detached": 1, + "End-Terrace": 1, + "Mid-Terrace": 2, + "Enclosed End-Terrace": 2, + "Enclosed Mid-Terrace": 3, +} + + +def _elmhurst_sheltered_sides(built_form: str) -> Optional[int]: + """Default sheltered-sides count per RdSAP §S5 based on the cert's + built-form. Returns None when the form isn't recognised so the + cascade applies its own default (currently 2).""" + return _ELMHURST_SHELTERED_SIDES_BY_BUILT_FORM.get(built_form) + + +def _map_elmhurst_ventilation( + v: ElmhurstVentilation, built_form: str +) -> SapVentilation: return SapVentilation( ventilation_type=None, draught_lobby=v.draught_lobby != "Not present", @@ -2318,4 +2344,5 @@ def _map_elmhurst_ventilation(v: ElmhurstVentilation) -> SapVentilation: passive_vents_count=v.passive_vents_count, flueless_gas_fires_count=v.flueless_gas_fires_count, ventilation_in_pcdf_database=None, + sheltered_sides=_elmhurst_sheltered_sides(built_form), ) diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index e943ad48..2d81ecdb 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -140,6 +140,11 @@ class MainHeating: None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%" ) heat_pump_age: Optional[str] = None + # Section 14.0 also lodges a secondary heating system (when one is + # installed). The SAP code is the integer the cascade reads via + # `SapHeating.secondary_heating_type` to apply the Table 11 + # secondary-fraction split; None when no secondary is lodged. + secondary_heating_sap_code: Optional[int] = None @dataclass