From 7db21560f166cd76d586d2e756f351295812124b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 31 May 2026 10:04:28 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.133:=20derive=20solid-fuel=20main?= =?UTF-8?q?=20fuel=20from=20=C2=A714.0=20EES=20Code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Elmhurst Summary §14.0 "Main Heating EES Code" is a three-letter identifier that resolves to the specific fuel for solid-fuel main heating systems. The §14.0 "Main Heating SAP Code" alone can't disambiguate because Table 4a categorises solid-fuel systems by appliance type rather than fuel — SAP code 160 ("Closed room heater with boiler") is shared by anthracite, wood chips, dual fuel and smokeless across the heating-systems corpus. Three changes land together: 1. `MainHeating` dataclass (`elmhurst_site_notes.py`) gains a `main_heating_ees: str = ""` field for the §14.0 EES code. 2. `ElmhurstSiteNotesExtractor._extract_main_heating` reads "Main Heating EES Code" from §14.0. 3. `_map_elmhurst_sap_heating` adds a fourth fuel-derivation fallback (after the existing electric-SAP-code + §15.0-liquid- fuel branches): when `main_fuel_int is None` and the §14.0 EES code is in `_ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE`, use that dict's value as the main fuel. Dict (corpus-derived, 10 entries → 7 distinct Table 32 fuels): BAF, BAI, RAM → 15 anthracite (3.64 / 0.395 / 1.064) BCC → 11 house coal (3.67 / 0.395 / 1.064) BDI → 10 dual fuel (3.99 / 0.087 / 1.049) BKI → 12 smokeless (4.61 / 0.366 / 1.261) BQI → 21 wood chips (3.07 / 0.023 / 1.046) RPS → 22 wood pellets bags (5.81 / 0.053 / 1.325) RUN → 23 bulk pellets (5.26 / 0.053 / 1.325) RWN → 20 wood logs (4.23 / 0.028 / 1.046) Dict values are Table 32 fuel codes, NOT API `main_fuel` enum codes — the API codes 1-9 collide with Table 32 codes for unrelated fuels (e.g. API 5 = "anthracite" vs Table 32 5 = "bottled LPG main heating"). `unit_price_p_per_kwh` / `co2_factor_kg_per_kwh` / `primary_energy_factor` all check the Table 32 dict before falling through to the API translation, so using Table 32 codes here avoids the collision and routes cost/CO2/PE through the correct fuel row. Heating-systems corpus impact — all 10 solid-fuel variants move from `_BLOCKED_BY_MISSING_MAIN_FUEL_TYPE` (assert-on-raise) back onto the residual-pin grid in `_EXPECTATIONS`: variant ΔSAP Δcost ΔCO2 ΔPE solid fuel 2 +4.79 -£110 -484 kg +441 kWh anthracite solid fuel 3 +4.43 -£102 -1206 +1452 anthracite solid fuel 4 +4.13 -£95 -714 +1655 anthracite solid fuel 5 +2.71 -£62 -301 +2360 house coal — smallest solid fuel 6 -7.38 +£168 -154 +2519 dual fuel — only negative solid fuel 7 +5.82 -£131 -758 +2968 smokeless solid fuel 8 +4.24 -£98 -15 +2513 wood chips solid fuel 9 +3.44 -£79 -8 +2428 wood pellets bags solid fuel 10 +5.14 -£118 -53 +1849 wood pellets bulk solid fuel 11 +4.35 -£100 -9 +1536 wood logs Remaining residuals trace to heating-system efficiency / control type — separate slices. 16 variants still in `_BLOCKED`: community heating ×5, electric storage ×4, no system, oil non-Heating-oil ×5, Bulk LPG ×1. Each is its own derivation slice. Extended handover suite at HEAD post-slice: 876 pass / 0 fail (was 875 + 1 new EES wiring AAA test). Pyright net-zero on touched files (45 → 45 — all pre-existing). No golden fixture impact — no golden cert lodges an EES code via the Elmhurst path. Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 1 + .../tests/test_heating_systems_corpus.py | 40 ++++++++--- .../tests/test_summary_pdf_mapper_chain.py | 39 +++++++++++ datatypes/epc/domain/mapper.py | 67 +++++++++++++++++++ datatypes/epc/surveys/elmhurst_site_notes.py | 10 +++ 5 files changed, 147 insertions(+), 10 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index f1b5748b..666980d2 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1251,6 +1251,7 @@ class ElmhurstSiteNotesExtractor: pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"), heat_pump_age=self._local_val(lines, "Heat pump age"), main_heating_sap_code=main_heating_sap_code, + main_heating_ees=self._local_str(lines, "Main Heating EES Code"), secondary_heating_sap_code=secondary_code, main_heating_2=main_heating_2, ) diff --git a/backend/documents_parser/tests/test_heating_systems_corpus.py b/backend/documents_parser/tests/test_heating_systems_corpus.py index b13cc8fa..7be9cb03 100644 --- a/backend/documents_parser/tests/test_heating_systems_corpus.py +++ b/backend/documents_parser/tests/test_heating_systems_corpus.py @@ -95,6 +95,16 @@ class _CorpusExpectation: # `_BLOCKED_BY_MISSING_MAIN_FUEL_TYPE` (assert-on-raise test) until # each mapper gap is closed and the cert can be moved back onto the # residual-pin grid. +# +# Slice S0380.133 unblocked all 10 solid-fuel variants (solid fuel 2.. +# 11) by routing the §14.0 "Main Heating EES Code" through the new +# `_ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE` dict (Table 32 fuel codes +# keyed by Elmhurst's 3-letter EES code: BAF/BAI/RAM = anthracite, +# BCC = house coal, BDI = dual fuel, BKI = smokeless, BQI = wood +# chips, RPS = wood pellets in bags, RUN = bulk pellets, RWN = wood +# logs). All 10 close to ΔSAP ±7.4; solid fuel 5 +2.71 is the +# smallest open. 16 variants remain blocked (community heating, +# 4 electric storage codes, no system, oil non-Heating-oil, Bulk LPG). _EXPECTATIONS: tuple[_CorpusExpectation, ...] = ( _CorpusExpectation(variant='ashp', block='11a', expected_sap_resid=+5.6680, expected_cost_resid_gbp=-130.5995, expected_co2_resid_kg=-1.4283, expected_pe_resid_kwh=+1467.8983), _CorpusExpectation(variant='electric 1', block='11a', expected_sap_resid=+9.6439, expected_cost_resid_gbp=-222.2109, expected_co2_resid_kg=+14.3441, expected_pe_resid_kwh=+2837.1414), @@ -111,6 +121,23 @@ _EXPECTATIONS: tuple[_CorpusExpectation, ...] = ( _CorpusExpectation(variant='oil pcdb 2', block='11a', expected_sap_resid=+0.4239, expected_cost_resid_gbp=-9.7668, expected_co2_resid_kg=-35.9551, expected_pe_resid_kwh=+2086.7505), _CorpusExpectation(variant='oil pcdb 3', block='11a', expected_sap_resid=+1.1597, expected_cost_resid_gbp=-26.7204, expected_co2_resid_kg=-53.1709, expected_pe_resid_kwh=+1897.4341), _CorpusExpectation(variant='pcdb 1', block='11a', expected_sap_resid=+6.9521, expected_cost_resid_gbp=-157.6055, expected_co2_resid_kg=-845.8065, expected_pe_resid_kwh=-171.6971), + # Slice S0380.133 unblocked 10 solid-fuel variants by routing the + # Elmhurst §14.0 "Main Heating EES Code" through the new + # `_ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE` dict. Pre-slice the + # cascade had no fuel and raised `MissingMainFuelType`; post-slice + # cost / CO2 / PE all route via the correct Table 32 fuel code. + # Remaining residuals are likely heating-system efficiency or + # control-type gaps — separate slices. + _CorpusExpectation(variant='solid fuel 2', block='11a', expected_sap_resid=+4.7910, expected_cost_resid_gbp=-110.3933, expected_co2_resid_kg=-484.3578, expected_pe_resid_kwh=+440.7506), + _CorpusExpectation(variant='solid fuel 3', block='11a', expected_sap_resid=+4.4310, expected_cost_resid_gbp=-102.0983, expected_co2_resid_kg=-1206.1483, expected_pe_resid_kwh=+1451.7872), + _CorpusExpectation(variant='solid fuel 4', block='11a', expected_sap_resid=+4.1283, expected_cost_resid_gbp=-95.1230, expected_co2_resid_kg=-714.4446, expected_pe_resid_kwh=+1655.3360), + _CorpusExpectation(variant='solid fuel 5', block='11a', expected_sap_resid=+2.7081, expected_cost_resid_gbp=-62.3977, expected_co2_resid_kg=-301.4166, expected_pe_resid_kwh=+2359.8540), + _CorpusExpectation(variant='solid fuel 6', block='11a', expected_sap_resid=-7.3846, expected_cost_resid_gbp=+168.2332, expected_co2_resid_kg=-153.6470, expected_pe_resid_kwh=+2519.2301), + _CorpusExpectation(variant='solid fuel 7', block='11a', expected_sap_resid=+5.8242, expected_cost_resid_gbp=-131.0462, expected_co2_resid_kg=-758.2093, expected_pe_resid_kwh=+2967.9919), + _CorpusExpectation(variant='solid fuel 8', block='11a', expected_sap_resid=+4.2391, expected_cost_resid_gbp=-97.6761, expected_co2_resid_kg=-14.9661, expected_pe_resid_kwh=+2512.8796), + _CorpusExpectation(variant='solid fuel 9', block='11a', expected_sap_resid=+3.4416, expected_cost_resid_gbp=-79.3010, expected_co2_resid_kg=-8.4751, expected_pe_resid_kwh=+2427.8078), + _CorpusExpectation(variant='solid fuel 10', block='11a', expected_sap_resid=+5.1366, expected_cost_resid_gbp=-118.3539, expected_co2_resid_kg=-52.9522, expected_pe_resid_kwh=+1848.8905), + _CorpusExpectation(variant='solid fuel 11', block='11a', expected_sap_resid=+4.3479, expected_cost_resid_gbp=-100.1809, expected_co2_resid_kg=-8.8428, expected_pe_resid_kwh=+1535.5344), ) @@ -146,16 +173,9 @@ _BLOCKED_BY_MISSING_MAIN_FUEL_TYPE: tuple[str, ...] = ( 'oil 5', 'oil 6', 'pcdb 3', - 'solid fuel 10', - 'solid fuel 11', - 'solid fuel 2', - 'solid fuel 3', - 'solid fuel 4', - 'solid fuel 5', - 'solid fuel 6', - 'solid fuel 7', - 'solid fuel 8', - 'solid fuel 9', + # Slice S0380.133 unblocked all 10 solid-fuel variants via the + # §14.0 EES-code-driven fuel derivation; they now appear in + # `_EXPECTATIONS` above with their post-derivation residual pins. ) diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 86eaf02b..d3bf0822 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -277,6 +277,45 @@ def test_summary_001431_oil_1_main_fuel_inferred_from_section_15_water_heating_f assert epc.sap_heating.water_heating_fuel == 28 +def test_summary_001431_solid_fuel_8_main_fuel_inferred_from_main_heating_ees_code() -> None: + # Arrange — heating-systems corpus fixture 001431 / "solid fuel 8" + # lodges §14.0 "Main Heating SAP Code: 160" + "Main Heating EES + # Code: BQI" with NO §14.0 "Fuel Type" lodging — typical of solid- + # fuel main heating where the SAP code (160 = "Closed room heater + # with boiler") covers multiple distinct fuels. + # + # Anthracite (EES BAI), Wood Chips (BQI), Dual Fuel (BDI), and + # Smokeless Fuel (BKI) all share SAP code 160 across the corpus; + # the SAP code alone can't disambiguate, so the mapper has to look + # at the EES code. Pre-S0380.133 the mapper produced + # `main_fuel_type=''`; post-S0380.132 the cascade strict-raised + # `MissingMainFuelType`. + # + # The fix routes the §14.0 EES code through + # `_ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE` (corpus-derived dict + # mirroring the §15.0 fallback added in S0380.130). BQI → Table 32 + # code 21 = "wood chips" (3.07 p/kWh + 0.023 kg CO2/kWh + 1.046 PE + # factor per RdSAP 10 spec p.95). The dict uses Table 32 codes + # directly rather than the API enum because the API codes 1-9 + # collide with Table 32 codes for unrelated fuels (e.g. API 5 = + # "anthracite" vs Table 32 5 = "bottled LPG main heating"). + summary_pdf = ( + Path(__file__).parents[3] + / "sap worksheets/heating systems examples/solid fuel 8/Summary_001431.pdf" + ) + pages = _summary_pdf_to_textract_style_pages(summary_pdf) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + main_1 = epc.sap_heating.main_heating_details[0] + assert site_notes.main_heating.main_heating_ees == "BQI" + assert main_1.main_fuel_type == 21 + assert main_1.sap_main_heating_code == 160 + + def test_summary_001431_community_heating_1_main_heating_sap_code_extracted_when_no_main_heating_2_block() -> None: # Arrange — Heating-systems corpus fixture 001431 / "community heating 1" # lodges §14.0 Main Heating1 directly followed by §14.1 Community diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 04dc6e25..0258da83 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -4099,6 +4099,60 @@ _LIQUID_FUEL_BOILER_SAP_MAIN_HEATING_CODES: Final[frozenset[int]] = ( ) +# Elmhurst §14.0 "Main Heating EES Code" → Table 32 main fuel code. +# Empirically derived from the heating-systems corpus at +# `sap worksheets/heating systems examples/` (each P960 worksheet lodges +# the resolved "FuelType" string for the corresponding §14.0 EES code). +# +# Values are Table 32 fuel codes (per RdSAP 10 Specification 10-06-2025 +# Table 32 p.95 / SAP 10.2 Table 12 p.191), NOT API `main_fuel` enum +# codes — codes 1-9 in the API enum collide with Table 32 codes for +# unrelated fuels (e.g. API 5 = "anthracite" vs Table 32 5 = "bottled +# LPG main heating"). Cascade lookups read `main_fuel_type` directly +# against the Table 32 dict before falling through to the API +# translation, so using Table 32 codes here resolves cleanly without +# triggering the collision. +# +# Used as a fallback in `_map_elmhurst_sap_heating` when the §14.0 +# "Fuel Type" string is absent (typical for solid-fuel main heating +# certs — SAP code 153/158/160/633/634/636 cover several distinct fuels +# under one row, so the SAP code can't identify the specific fuel). +# Adding a new entry whenever a corpus fixture lodges a new EES code +# is the forcing function for solid-fuel mapper completeness. +_ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE: Final[dict[str, int]] = { + # Anthracite — Table 32 code 15 (3.64 p/kWh / 0.395 kg CO2/kWh / + # 1.064 PE factor). Three EES codes share the fuel: + # solid fuel 2 (SAP 158), solid fuel 3 (SAP 160), solid fuel 4 + # (SAP 633). + "BAF": 15, + "BAI": 15, + "RAM": 15, + # House Coal — Table 32 code 11 (3.67 / 0.395 / 1.064). + # Corpus variant solid fuel 5 (SAP 153). + "BCC": 11, + # Dual Fuel (mineral + wood) — Table 32 code 10 (3.99 / 0.087 / + # 1.049). Corpus variant solid fuel 6 (SAP 160). + "BDI": 10, + # Smokeless Fuel (manufactured) — Table 32 code 12 (4.61 / 0.366 / + # 1.261). Corpus variant solid fuel 7 (SAP 160). + "BKI": 12, + # Wood Chips — Table 32 code 21 (3.07 / 0.023 / 1.046). Corpus + # variant solid fuel 8 (SAP 160). + "BQI": 21, + # Wood Pellets in Bags (secondary-row in Table 32, price 5.81 p/kWh) + # — Table 32 code 22 (5.81 / 0.053 / 1.325). Corpus variant + # solid fuel 9 (SAP 636). Elmhurst routes bagged pellets via the + # secondary-row even when used as the main heating fuel. + "RPS": 22, + # Wood Pellets (bulk, for main heating) — Table 32 code 23 (5.26 + # / 0.053 / 1.325). Corpus variant solid fuel 10 (SAP 634). + "RUN": 23, + # Wood Logs — Table 32 code 20 (4.23 / 0.028 / 1.046). Corpus + # variant solid fuel 11 (SAP 634). + "RWN": 20, +} + + class UnmappedElmhurstLabel(ValueError): """An Elmhurst Summary lodged a finite-enum label that the mapper does not yet know how to translate to the SAP10 cascade enum. @@ -4491,6 +4545,19 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: and mh.main_heating_sap_code in _LIQUID_FUEL_BOILER_SAP_MAIN_HEATING_CODES ): main_fuel_int = water_heating_fuel + # Solid-fuel main heating: SAP code rows 150-160 (open / closed + # room heaters with boiler) and 600-636 (independent solid-fuel + # boilers) cover multiple distinct fuels under a single Table 4a + # category — e.g. SAP code 160 is shared by anthracite (BAI), + # wood chips (BQI), dual fuel (BDI), and smokeless (BKI). The + # SAP code alone can't resolve the fuel; the Elmhurst §14.0 + # "Main Heating EES Code" does. Use the corpus-derived dict to + # map the EES code to a SAP10 main_fuel int. + if ( + main_fuel_int is None + and mh.main_heating_ees in _ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE + ): + main_fuel_int = _ELMHURST_MAIN_HEATING_EES_TO_FUEL_CODE[mh.main_heating_ees] heat_emitter_int = _elmhurst_heat_emitter_int( mh.heat_emitter, main_floor=survey.floor, diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index 1a41d932..15464adc 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -270,6 +270,16 @@ class MainHeating: # leave §14.0 SAP code empty and identify themselves via the PCDB # index instead). main_heating_sap_code: Optional[int] = None + # Section 14.0 "Main Heating EES Code" — Elmhurst's three-letter + # identifier for the specific main heating system. Distinct from + # `main_heating_sap_code` because the SAP Table 4a code is a generic + # category (e.g. SAP 160 covers anthracite + wood chips + dual fuel + # + smokeless under one "Closed room heater with boiler" row) whereas + # the EES code resolves to the specific fuel (e.g. BQI = wood chips, + # BDI = dual fuel). The mapper uses this as a fallback fuel-derivation + # source when §14.0 "Fuel Type" is absent. Empty string when the + # field is absent (PCDB-listed boilers lodge no EES code). + main_heating_ees: str = "" # Section 14.0 also lodges a secondary heating system (when one is # installed). The SAP code is the integer the cascade reads via # `SapHeating.secondary_heating_type` to apply the Table 11