From c28b061cfb84382f8ff1b310682d9dbaf0cbd377 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 31 May 2026 08:59:48 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.130:=20route=20Elmhurst=20oil=20m?= =?UTF-8?q?ains=20via=20=C2=A715.0=20Water=20Heating=20Fuel=20Type?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Elmhurst Summary §14.0 Main Heating1 leaves "Fuel Type" empty for Table 4b liquid-fuel boilers (heating oil / HVO / FAME / B30K / bioethanol — SAP codes 120-141). Unlike gas boilers (codes 101-119) where Elmhurst explicitly lodges "Mains gas", liquid-fuel boilers take the fuel from §15.0 "Water Heating Fuel Type" since the same boiler heats space + water. Pre-slice: - `_elmhurst_main_fuel_int(mh.fuel_type)` returned None for the empty §14.0 fuel string. - The electric-SAP-code inference (`_ELECTRIC_SAP_MAIN_HEATING_CODES`) didn't fire because SAP 127 is a Table 4b oil boiler, not electric. - `main_fuel_type` fell through to the raw empty string. - `cert_to_inputs._main_fuel_code` returned None. - `table_32.unit_price_p_per_kwh(None)` defaulted to mains gas (3.48 p/kWh). - The cascade therefore priced ~13.7k kWh/yr of oil space + water heating at the gas tariff — a 56% under-count vs the worksheet's Table 32 oil rate. Two complementary fixes: 1. Add "Heating oil" → 28 ("oil (not community)" per epc_codes.csv row main_fuel,28) to `_ELMHURST_MAIN_FUEL_TO_SAP10`. The existing `API_FUEL_TO_TABLE_32` then routes API 28 → Table 32 code 4 (heating oil — 7.64 p/kWh / 0.298 kg CO2/kWh / 1.180 PE factor per RdSAP 10 spec p.95). This fix handles pcdb 1 directly because pcdb 1 lodges §14.0 "Fuel Type: Heating oil" explicitly. 2. Thread a §15.0-fuel fallback for the main_fuel inference: when `mh.fuel_type` is empty AND `mh.main_heating_sap_code` is in the Table 4b liquid-fuel range (120-141 per SAP 10.2 Table 4b "Seasonal efficiency for gas and liquid fuel boilers"), use the §15.0 water_heating_fuel as the main fuel too. Gated on the SAP code range so this can't accidentally fire on solid-fuel-mains + electric-HW certs (where §15.0 lodges "Electricity" for the immersion but the SH fuel is the solid fuel implicit in the SAP code). This fix handles oil 1 + oil pcdb 1/2/3 (where §14.0 is silent but §15.0 lodges "Heating oil"). Residual shifts at HEAD post-slice (5 variants legitimately re-pinned): oil 1 +13.67 SAP → -9.70 SAP (cascade now over-counts at the spec's 7.64 p/kWh — vs worksheet's 5.44) oil pcdb 1/2 +11.17 → -11.63 oil pcdb 3 +11.87 → -10.87 pcdb 1 +21.90 → -9.41 Remaining negative residuals are the price-spec-vs-worksheet gap queued for slice S0380.131 (5.44 vs 7.64 p/kWh oil). The mapper now correctly identifies the fuel; what's left is the cascade tariff. The other 36 corpus variants are unchanged — restricting the §15.0 fallback to SAP 120-141 keeps solid-fuel-mains and electric-mains certs at their existing pins. Extended handover suite at HEAD post-slice: **874 pass, 0 fail** (was 873 + 1 new AAA test). Pyright net-zero on touched files (45 → 45 — pre-existing errors unrelated). Co-Authored-By: Claude Opus 4.7 --- .../tests/test_heating_systems_corpus.py | 10 ++-- .../tests/test_summary_pdf_mapper_chain.py | 41 ++++++++++++++ datatypes/epc/domain/mapper.py | 53 ++++++++++++++++--- 3 files changed, 92 insertions(+), 12 deletions(-) diff --git a/backend/documents_parser/tests/test_heating_systems_corpus.py b/backend/documents_parser/tests/test_heating_systems_corpus.py index e89c3d1b..066c34db 100644 --- a/backend/documents_parser/tests/test_heating_systems_corpus.py +++ b/backend/documents_parser/tests/test_heating_systems_corpus.py @@ -97,16 +97,16 @@ _EXPECTATIONS: tuple[_CorpusExpectation, ...] = ( _CorpusExpectation(variant='electric 9', block='11a', expected_sap_resid=+12.0340, expected_cost_resid_gbp=-277.2813, expected_co2_resid_kg=-255.6076, expected_pe_resid_kwh=+362.4518), _CorpusExpectation(variant='gshp', block='11a', expected_sap_resid=+5.1598, expected_cost_resid_gbp=-118.8901, expected_co2_resid_kg=-41.4461, expected_pe_resid_kwh=+639.1890), _CorpusExpectation(variant='no system', block='11a', expected_sap_resid=+21.9350, expected_cost_resid_gbp=-505.4134, expected_co2_resid_kg=+689.2188, expected_pe_resid_kwh=-2454.8193), - _CorpusExpectation(variant='oil 1', block='11a', expected_sap_resid=+13.6701, expected_cost_resid_gbp=-314.9811, expected_co2_resid_kg=-1381.5125, expected_pe_resid_kwh=+612.3606), + _CorpusExpectation(variant='oil 1', block='11a', expected_sap_resid=-9.7030, expected_cost_resid_gbp=+223.5710, expected_co2_resid_kg=-242.2677, expected_pe_resid_kwh=+1259.6587), _CorpusExpectation(variant='oil 2', block='11a', expected_sap_resid=+26.0712, expected_cost_resid_gbp=-600.7179, expected_co2_resid_kg=+2230.1071, expected_pe_resid_kwh=+801.2920), _CorpusExpectation(variant='oil 3', block='11a', expected_sap_resid=+30.9500, expected_cost_resid_gbp=-712.1785, expected_co2_resid_kg=+2859.5796, expected_pe_resid_kwh=+738.4592), _CorpusExpectation(variant='oil 4', block='11a', expected_sap_resid=+28.5927, expected_cost_resid_gbp=-655.6129, expected_co2_resid_kg=+2636.9526, expected_pe_resid_kwh=+701.8340), _CorpusExpectation(variant='oil 5', block='11a', expected_sap_resid=+120.7457, expected_cost_resid_gbp=-6312.0020, expected_co2_resid_kg=+1345.3630, expected_pe_resid_kwh=-2780.6222), _CorpusExpectation(variant='oil 6', block='11a', expected_sap_resid=+24.4087, expected_cost_resid_gbp=-561.8886, expected_co2_resid_kg=-658.8928, expected_pe_resid_kwh=-478.5733), - _CorpusExpectation(variant='oil pcdb 1', block='11a', expected_sap_resid=+11.1667, expected_cost_resid_gbp=-257.2961, expected_co2_resid_kg=-1147.3111, expected_pe_resid_kwh=+1455.2982), - _CorpusExpectation(variant='oil pcdb 2', block='11a', expected_sap_resid=+11.1667, expected_cost_resid_gbp=-257.2961, expected_co2_resid_kg=-1147.3111, expected_pe_resid_kwh=+1455.2982), - _CorpusExpectation(variant='oil pcdb 3', block='11a', expected_sap_resid=+11.8747, expected_cost_resid_gbp=-273.6108, expected_co2_resid_kg=-1161.6582, expected_pe_resid_kwh=+1267.6118), - _CorpusExpectation(variant='pcdb 1', block='11a', expected_sap_resid=+21.8997, expected_cost_resid_gbp=-502.0190, expected_co2_resid_kg=-2392.1531, expected_pe_resid_kwh=-1050.3031), + _CorpusExpectation(variant='oil pcdb 1', block='11a', expected_sap_resid=-11.6343, expected_cost_resid_gbp=+268.0722, expected_co2_resid_kg=-35.9551, expected_pe_resid_kwh=+2086.7505), + _CorpusExpectation(variant='oil pcdb 2', block='11a', expected_sap_resid=-11.6343, expected_cost_resid_gbp=+268.0722, expected_co2_resid_kg=-35.9551, expected_pe_resid_kwh=+2086.7505), + _CorpusExpectation(variant='oil pcdb 3', block='11a', expected_sap_resid=-10.8674, expected_cost_resid_gbp=+250.4014, expected_co2_resid_kg=-53.1709, expected_pe_resid_kwh=+1897.4341), + _CorpusExpectation(variant='pcdb 1', block='11a', expected_sap_resid=-9.4083, expected_cost_resid_gbp=+228.9812, expected_co2_resid_kg=-845.8065, expected_pe_resid_kwh=-171.6971), _CorpusExpectation(variant='pcdb 3', block='11a', expected_sap_resid=+27.7563, expected_cost_resid_gbp=-637.0435, expected_co2_resid_kg=-446.3815, expected_pe_resid_kwh=+2097.4553), _CorpusExpectation(variant='solid fuel 10', block='11a', expected_sap_resid=+14.7769, expected_cost_resid_gbp=-340.4814, expected_co2_resid_kg=+1906.2620, expected_pe_resid_kwh=-584.5284), _CorpusExpectation(variant='solid fuel 11', block='11a', expected_sap_resid=+8.4098, expected_cost_resid_gbp=-193.7739, expected_co2_resid_kg=+2262.3481, expected_pe_resid_kwh=+2583.7764), diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 1d556d78..86eaf02b 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -236,6 +236,47 @@ def test_summary_001479_mapper_extensions_count_matches_extension_bps() -> None: assert len(epc.sap_building_parts) == 3 +def test_summary_001431_oil_1_main_fuel_inferred_from_section_15_water_heating_fuel_type() -> None: + # Arrange — Heating-systems corpus fixture 001431 / "oil 1" lodges a + # Table 4b oil boiler (SAP code 127) at §14.0 Main Heating1 but with + # NO §14.0 "Fuel Type" lodging — the actual fuel only appears in + # §15.0 as "Water Heating Fuel Type: Heating oil". Same applies to + # the other Table 4b oil variants (oil pcdb 1/2/3 et al) and to the + # gov.uk EPC API's `main_fuel_type=28` ("oil (not community)") per + # epc_codes.csv. + # + # Pre-slice the mapper's `_elmhurst_main_fuel_int(mh.fuel_type)` + # returned None for the empty §14.0 fuel string, the electric-SAP- + # code inference didn't fire (SAP 127 isn't in + # `_ELECTRIC_SAP_MAIN_HEATING_CODES`), so `main_fuel_type` fell + # through to the raw empty string. `cert_to_inputs._main_fuel_code` + # then returned None (string is not int), and + # `table_32.unit_price_p_per_kwh(None)` defaulted to mains gas + # (3.48 p/kWh). The cascade therefore priced ~13.7k kWh/yr of oil + # heating at the gas tariff — a 56% under-count vs the worksheet's + # spec-lodged oil rate. + # + # The fix routes the §15.0 water_heating fuel through + # `_elmhurst_main_fuel_int` (which now knows "Heating oil" → 28 + # per epc_codes.csv main_fuel row) and falls back to it for the + # main heating fuel when §14.0 is silent. The cascade then prices + # SH + HW at the heating-oil tariff per Table 32. + summary_pdf = ( + Path(__file__).parents[3] + / "sap worksheets/heating systems examples/oil 1/Summary_001431.pdf" + ) + pages = _summary_pdf_to_textract_style_pages(summary_pdf) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + main_1 = epc.sap_heating.main_heating_details[0] + assert main_1.main_fuel_type == 28 + assert epc.sap_heating.water_heating_fuel == 28 + + def test_summary_001431_community_heating_1_main_heating_sap_code_extracted_when_no_main_heating_2_block() -> None: # Arrange — Heating-systems corpus fixture 001431 / "community heating 1" # lodges §14.0 Main Heating1 directly followed by §14.1 Community diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 33f996a1..04dc6e25 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3815,6 +3815,14 @@ _ELMHURST_MAIN_FUEL_TO_SAP10: Dict[str, int] = { "LPG bulk": 6, "LPG special condition": 7, "Oil": 8, + # Elmhurst Summary §15.0 "Water Heating Fuel Type" lodging form for + # Table 32 code 4 (heating oil, 7.64 p/kWh + 0.298 kg CO2/kWh + 1.180 + # PE factor — RdSAP 10 spec p.95). Distinct from the legacy "Oil" + # label above (API code 8 = "wood chips" — pre-existing oddity in + # this dict that no live fixture surfaces). 28 = epc_codes.csv + # main_fuel row for "oil (not community)", which routes via + # `API_FUEL_TO_TABLE_32` → Table 32 code 4 for cost / CO2 / PE. + "Heating oil": 28, "Coal": 11, "Electricity": 30, "Electricity (off-peak 7hr)": 33, @@ -4075,6 +4083,21 @@ _HEAT_PUMP_SAP_MAIN_HEATING_CODES: Final[frozenset[int]] = frozenset( + list(range(521, 528)) ) +# SAP 10.2 Table 4b liquid-fuel-boiler code range. Table 4b carries +# "Seasonal efficiency for gas and liquid fuel boilers"; rows 101-119 +# are gas boilers (where Elmhurst Summary §14.0 lodges "Fuel Type: +# Mains gas" explicitly) and rows 120-141 are the liquid-fuel boilers +# (heating oil / HVO / FAME / B30K / bioethanol). For the latter, +# Elmhurst conventionally leaves §14.0 "Fuel Type" empty and the +# specific fuel only appears in §15.0 "Water Heating Fuel Type" — +# the same boiler heats space + water. This range gates the §15.0 +# fallback in `_map_elmhurst_sap_heating` so it can't accidentally +# fire on solid-fuel-mains + electric-HW certs (where §15.0 would +# wrongly populate the SH fuel). +_LIQUID_FUEL_BOILER_SAP_MAIN_HEATING_CODES: Final[frozenset[int]] = ( + frozenset(range(120, 142)) +) + class UnmappedElmhurstLabel(ValueError): """An Elmhurst Summary lodged a finite-enum label that the mapper @@ -4436,6 +4459,13 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: ) pcdb_index = _elmhurst_pcdb_boiler_index(mh.pcdf_boiler_reference) main_fuel_int = _elmhurst_main_fuel_int(mh.fuel_type) + # Water heating fuel: Summary §15.0 "Water Heating Fuel Type" lodges + # the fuel name as a string ("Mains gas", "Electricity", "Heating + # oil", ...). Map to the SAP10 int code via the same lookup used + # for main fuel; falls back to None for unrecognised strings. + water_heating_fuel = _elmhurst_main_fuel_int( + survey.water_heating.water_heating_fuel_type, + ) # Elmhurst §14.0 leaves "Fuel Type" empty for electric main heating # systems (HP / electric boiler / storage / underfloor); the SAP # code identifies the carrier. Infer electricity (Table 32 code 30) @@ -4445,6 +4475,22 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: and mh.main_heating_sap_code in _ELECTRIC_SAP_MAIN_HEATING_CODES ): main_fuel_int = _STANDARD_ELECTRICITY_FUEL_CODE + # Elmhurst §14.0 also leaves "Fuel Type" empty for Table 4b liquid- + # fuel boilers (heating oil / HVO / FAME / B30K / bioethanol — SAP + # codes 120-141). For these the fuel is lodged in §15.0 "Water + # Heating Fuel Type" (the same boiler heats space + water), so + # when the mapper can resolve §15.0 to a SAP10 fuel code use it as + # the main fuel too. Gated on the SAP code being in the Table 4b + # liquid-fuel range so this can't accidentally fire on + # solid-fuel-mains + electric-HW certs (where §15.0 lodges + # "Electricity" for the immersion but the SH fuel is the solid + # fuel implicit in the SAP code). + if ( + main_fuel_int is None + and water_heating_fuel is not None + and mh.main_heating_sap_code in _LIQUID_FUEL_BOILER_SAP_MAIN_HEATING_CODES + ): + main_fuel_int = water_heating_fuel heat_emitter_int = _elmhurst_heat_emitter_int( mh.heat_emitter, main_floor=survey.floor, @@ -4487,13 +4533,6 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: 1 for s in survey.baths_and_showers.showers if s.outlet_type != "Electric shower" ) - # Water heating fuel: Summary §15 "Water Heating Fuel Type" lodges - # the fuel name as a string ("Mains gas", "Electricity", ...). Map - # to the SAP10 int code via the same lookup used for main fuel; - # falls back to None for unrecognised strings. - water_heating_fuel = _elmhurst_main_fuel_int( - survey.water_heating.water_heating_fuel_type, - ) main_1_detail = MainHeatingDetail( has_fghrs=survey.renewables.flue_gas_heat_recovery_present, # Prefer SAP integer codes when the Elmhurst string maps