From bb9097e1a5328dc65b3b2e371de4ae894d212765 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 May 2026 22:18:51 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.53:=20Elmhurst=20=C2=A714.0=20"Ma?= =?UTF-8?q?in=20Heating=20SAP=20Code"=20extraction=20+=20strict-raise?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cert 000565 surfaced an Elmhurst extractor schema gap. §14.0 lodges "Main Heating SAP Code 224" identifying Main 1 as an Air Source Heat Pump (SAP 10.2 Table 4a row 224: "Air source heat pump, 2013 or later") — but the extractor was dropping the line. The mapper therefore produced a `MainHeatingDetail` with `sap_main_heating_code = None` AND `main_heating_index_number = None` (because `PCDF boiler Reference = 0` for HP certs), leaving the cascade to fall back to the 0.80 gas-boiler default efficiency. Cascade impact on cert 000565 main_heating_fuel_kwh_per_yr pin: - Before: actual 62,375.80 kWh/yr (= 59,008 / 0.80 wrong default) Δ +27,665.01 vs U985-0001-000565.pdf expected 34,710.79 - After: actual 29,353.32 kWh/yr (= 59,008 / 1.70 HP COP via §A4.1) Δ −5,357.47 (remaining gap is on the space_heating side, not heating efficiency) The strict-raise mirrors [[unmapped-api-code]] (Slice S0380.51) and [[unmapped-elmhurst-label]] (cylinder size / glazing type) — when neither the §14.0 SAP code nor the PCDB boiler reference identifies Main 1, the mapper raises `UnmappedElmhurstLabel("main_heating", ...)` so the coverage gap surfaces at extraction time instead of as an opaque downstream SAP delta. Per user end-of-S0380.52 directive: "if we're missing mapping on EpcPropertyDataMapper - let's raise an exception". Spec source: SAP 10.2 §A4 Appendix A "Heat pump cascade", Table 4a row 224 (Air source heat pump, 2013 or later) — `seasonal_efficiency` reads the SAP code when no PCDB Table 105/362 record overrides. Touched: - datatypes/epc/surveys/elmhurst_site_notes.py: `MainHeating. main_heating_sap_code: Optional[int]` field added (treat 0 as None per Elmhurst convention — PCDB-listed boilers lodge §14.0 SAP code as 0 and identify themselves via the PCDB index instead) - backend/documents_parser/elmhurst_extractor.py: `_extract_main_heating` reads §14.0 "Main Heating SAP Code" via the existing `_local_val` slice helper; 0/absent → None - datatypes/epc/domain/mapper.py: `_map_elmhurst_sap_heating` passes `sap_main_heating_code=mh.main_heating_sap_code` to `MainHeatingDetail`, and raises `UnmappedElmhurstLabel` when neither identifier resolves Cohort regression check: 415 pass + 10 expected 000565 failures (unchanged from S0380.52 — same pins, different residuals). Pyright net-zero on all 3 touched files. Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 14 +++++++++++ datatypes/epc/domain/mapper.py | 24 +++++++++++++++++++ datatypes/epc/surveys/elmhurst_site_notes.py | 8 +++++++ 3 files changed, 46 insertions(+) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 1bb3b3c2..061d972f 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1044,6 +1044,19 @@ class ElmhurstSiteNotesExtractor: lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2") pct_raw = self._local_val(lines, "Percentage of Heat") pct = int(pct_raw.split()[0]) if pct_raw else 0 + # §14.0 "Main Heating SAP Code" identifies Main 1 by SAP 10.2 + # Table 4a code (e.g. 224 = "Air source heat pump, 2013 or + # later"). PCDB-boiler certs leave this empty / lodge "0" — the + # PCDB index in `PCDF boiler Reference` is the identifier in + # that case. Treat 0 (or absent) as None so the mapper can + # distinguish "no SAP code lodged" from a real Table 4a code. + sap_code_raw = self._local_val(lines, "Main Heating SAP Code") + main_heating_sap_code: Optional[int] = None + if sap_code_raw is not None: + head = sap_code_raw.split()[0] if sap_code_raw.split() else "" + if head.isdigit(): + v = int(head) + main_heating_sap_code = v if v > 0 else None # The "Secondary Heating SapCode" key is lodged inside §14.1 Main # Heating2 — Elmhurst uses the Main-2 block to also carry the # cert's secondary heating system (when one exists). Look for it @@ -1069,6 +1082,7 @@ class ElmhurstSiteNotesExtractor: percentage_of_heat=pct, pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"), heat_pump_age=self._local_val(lines, "Heat pump age"), + main_heating_sap_code=main_heating_sap_code, secondary_heating_sap_code=secondary_code, ) diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 1e5cec98..963f5abc 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3826,6 +3826,24 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: heat_emitter_int = _elmhurst_heat_emitter_int(mh.heat_emitter) sap_control_int = _elmhurst_sap_control_code(sap_control) main_heating_category = _elmhurst_main_heating_category(mh, pcdb_index) + # Strict-raise mirror of [[unmapped-api-code]] — when Main 1 has + # neither a PCDB boiler reference nor a lodged Table 4a SAP code, + # the mapper has no identifier for the heat source and the cascade + # would silently fall back to the 0.80 gas-boiler default. First + # surfaced on cert 000565 where Main 1 is a heat pump lodging + # `PCDF boiler Reference = 0` + `Main Heating SAP Code = 224`; if + # the extractor (or a future variant cert) drops both, raise so the + # gap surfaces here instead of as a SAP-delta residual downstream. + if mh.main_heating_sap_code is None and pcdb_index is None: + raise UnmappedElmhurstLabel( + "main_heating", + ( + f"§14.0 Main Heating1 has neither PCDF boiler reference " + f"({mh.pcdf_boiler_reference!r}) nor SAP code " + f"({mh.main_heating_sap_code!r}); cannot identify the " + f"heat source" + ), + ) # Shower-outlet classification: SAP10.2 Appendix J routes electric # showers via §J line 64a (their own kWh stream) and treats mixer # showers as drawing from the HW system. The Summary PDF lodges @@ -3874,6 +3892,12 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: # number drives PCDB lookup in the cascade. main_heating_index_number=pcdb_index, main_heating_data_source=1 if pcdb_index is not None else None, + # §14.0 "Main Heating SAP Code" — Table 4a integer + # identifying Main 1 when no PCDB boiler reference is + # lodged (e.g. heat pump SAP code 224 on cert 000565). + # The cascade's `seasonal_efficiency` reads this when + # there is no PCDB Table 105/362 record to override. + sap_main_heating_code=mh.main_heating_sap_code, ) ], has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling, diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index fa87f167..7c5396a0 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -204,6 +204,14 @@ class MainHeating: None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%" ) heat_pump_age: Optional[str] = None + # Section 14.0 "Main Heating SAP Code" — the SAP 10.2 Table 4a code + # identifying Main 1 when no PCDB boiler reference is lodged (e.g. + # heat pump certs lodge `PCDF boiler Reference = 0` + SAP code = 224 + # for "Air source heat pump, 2013 or later"). None when the line is + # absent or lodged as 0 (= "no code lodged"; PCDB-listed boilers + # leave §14.0 SAP code empty and identify themselves via the PCDB + # index instead). + main_heating_sap_code: Optional[int] = None # Section 14.0 also lodges a secondary heating system (when one is # installed). The SAP code is the integer the cascade reads via # `SapHeating.secondary_heating_type` to apply the Table 11