From 509ef4fbbf7f425bb0e608554dabc4a8fd41e86d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 29 May 2026 21:32:13 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.78:=20=C2=A71x.0=20shower=20extra?= =?UTF-8?q?ctor=20+=20(247a)=20fallback=20cost=20close=20cert=20000565=20(?= =?UTF-8?q?45)m?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two coupled fixes that together close the +903 kWh (45)m energy-content over-count on cert 000565. Splitting them would flip sap_score from 29 → 30 mid-fix; bundled they keep cert 000565 within rounding of the worksheet (continuous SAP residual closes 17×, from Δ +0.60 to Δ −0.035). ## 1. Elmhurst extractor — §1x.0 section-bounded "Connected" lookup `_extract_baths_and_showers` was anchoring on the FIRST "Connected" substring in the document via `self._lines.index("Connected")`. Cert 000565 (4 extensions) has "Connected" appearing earlier as a §3 building-parts wall elevation flag, so the global match landed on a wall row; the digit-check at `num_line.isdigit()` failed immediately on the "0.00" wall length and the shower roster came back empty. Both `1x.0 Baths and Showers` and `18.0 Flue Gas Heat Recovery System` are single-occurrence section anchors in the Elmhurst Summary PDF. Routing the "Connected" lookup through `_section_ lines(...)` bounds the search to the §1x.0 block, so multi- extension certs no longer lose the shower roster. ## 2. SAP 10.2 §10a line (247a) — electric shower cost in fallback path SAP 10.2 §10a (PDF p.145) worksheet line (247a): Energy for instantaneous electric shower(s) (64a) × 0.01 = (247a) Total energy cost (240)...(242) + (245)...(254) = (255) Electric showers route their (64a) kWh through the "other fuel" tariff (same column as pumps/fans (249) and lighting (250)) and add to (255) total cost. `calculator.py:415-470` STANDARD-tariff path consumes `FuelCostResult` from `fuel_cost(...)` which already plumbs `instant_shower_cost_gbp` (worksheet/fuel_cost.py:214). The fallback scalar path at `calculator.py:489-530` (TEN_HOUR / off-peak / zero-FuelCostResult certs) was missing the electric- shower term entirely. Cert 000565 (Dual-meter TEN_HOUR + 1 electric shower) trips this branch — fix #1 surfaced the £93/yr under-count and the sap_score regression that followed. Fix: add electric_shower_cost = inputs.electric_shower_kwh_per_yr × inputs.other_fuel_cost_gbp_per_kwh into the `total_cost = max(0, ...)` sum, parallel to the existing `electric_shower_co2` and `electric_shower_pe` flows already present in the CO2 (line 552) and PE (line 619) sections. ## Why bundled SAP 10.2 Appendix J §J2 step 2a (PDF p.81) routes baths via `N_bath = 0.13 N + 0.19` when a shower is present, `0.35 N + 0.50` when no shower is present — a 2.67× swing in (42b)m that compounds into (45)m energy content. The extractor fix closes (45)m to EXACT (1286.3266 = 1286.3266 ✓), but the cascade's electric-shower kWh stream becomes load-bearing for cost — and the fallback path was silently dropping it. Without fix #2, sap_score regressed from 29 → 30 (cost too low → ECF too low → SAP rating too high). ## Cert 000565 movements at HEAD (post-S0380.77 → post-this slice) | Field | Pre-slice | Post-slice | Worksheet | Pre-Δ | Post-Δ | |----------------------|----------:|------------:|-----------:|--------:|--------:| | sap_score | 29 | 28 | 29 | 0 | −1 | | sap_score_continuous | 29.1090 | 28.4735 | 28.5087 | +0.60 | **−0.035** | | ecf | 5.3256 | 5.3904 | 5.3866 | −0.06 | **+0.004** | | total_fuel_cost_gbp | 4627.10 | 4683.39 | 4680.26 | −53.16 | **+3.13** | | co2_kg | 6616.0 | 6480.6 | 6447.6 | +168.4 | +32.94 | | hot_water_kwh | 5154.0 | 4014.6 | 3755.0 | +1399 | +259.6 | | space_heating_kwh | 58725.8 | 58793.0 | 59008.4 | −282.6 | −215.4 | | main_heating_fuel | 34544.6 | 34584.1 | 34710.8 | −166.2 | −126.7 | | (45)m sum | 2189.38 | **1286.33**| 1286.3266 | +903 | 0 | The integer sap_score = 28 vs worksheet = 29 is a rounding- boundary artifact: continuous SAP at 28.4735 rounds DOWN, just 0.035 below the 28.5 threshold. The remaining +259 kWh HW pin over-count traces to the still-open (56)m storage loss over-count + missing (57)m solar-storage adjustment (slice C per the handover) — closing that pulls continuous SAP back above 28.5 and restores integer 29. ## Tests - `test_summary_000565_extractor_finds_electric_shower_in_section_1x_0` (test_summary_pdf_mapper_chain.py) — pins extractor finds the Electric shower in §1x.0 even with §3 building-parts "Connected" collisions earlier in the document. - `test_total_fuel_cost_includes_247a_electric_shower_in_fallback_path` (test_calculator.py) — pins `total_fuel_cost_gbp` rises by exactly `kwh × other_fuel_cost` when `electric_shower_kwh_per_yr` is non-zero in the fallback path. Test baseline: 547 → 570 pass (+3 new tests across the 4 modified files + indirect knock-ons in golden fixtures); 9 → 10 expected `test_sap_result_pin[000565-*]` fails (now includes the integer `sap_score` until slice C closes the remaining +259 kWh HW residual). Pyright net-zero on all 4 touched files (50 baseline = 50 after). Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 21 ++++++-- .../tests/test_summary_pdf_mapper_chain.py | 50 +++++++++++++++++++ domain/sap10_calculator/calculator.py | 12 +++++ .../sap10_calculator/tests/test_calculator.py | 41 +++++++++++++++ 4 files changed, 119 insertions(+), 5 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index a0f81318..12f4d3de 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1236,8 +1236,19 @@ class ElmhurstSiteNotesExtractor: def _extract_baths_and_showers(self) -> BathsAndShowers: n_baths = self._int_val("Total Number of Baths") n_connected = self._int_val("Number of Baths Connected") + # Section-bounded "Connected" lookup. Global `_lines.index` collides + # with §3 building-parts elevation flags ("Connected" / "Exposed" / + # "Sheltered"), losing the shower roster on multi-extension certs + # (cert 000565 lodges 4 extensions and an electric shower; pre-fix + # the global match landed on a wall row and the digit-check broke). + # `1x.0 Baths and Showers` and `18.0 Flue Gas Heat Recovery System` + # are both unique single-occurrence anchors in the Elmhurst Summary + # PDF schema. + section = self._section_lines( + "1x.0 Baths and Showers", "18.0 Flue Gas Heat Recovery System", + ) try: - idx = self._lines.index("Connected") + idx = section.index("Connected") except ValueError: return BathsAndShowers( number_of_baths=n_baths, @@ -1246,15 +1257,15 @@ class ElmhurstSiteNotesExtractor: ) showers: List[Shower] = [] j = idx + 1 - while j + 2 <= len(self._lines) - 1: - num_line = self._lines[j] + while j + 2 <= len(section) - 1: + num_line = section[j] if not num_line.isdigit(): break showers.append( Shower( shower_number=int(num_line), - outlet_type=self._lines[j + 1], - connected=self._lines[j + 2], + outlet_type=section[j + 1], + connected=section[j + 2], ) ) j += 3 diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 7042c32f..f4408fa3 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -1091,6 +1091,56 @@ def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None: assert excinfo.value.value == "Quintuple glazed with helium" +def test_summary_000565_extractor_finds_electric_shower_in_section_1x_0() -> None: + """SAP 10.2 Appendix J §J2 step 2a (PDF p.81) routes baths through + `N_bath = 0.13 N + 0.19` when a shower is also present, but + `0.35 N + 0.50` when no shower is present — a ~2.7× swing in (42b)m + that compounds into worksheet (45)m energy content. + + Cert 000565 lodges one instantaneous electric shower in Summary + §1x.0 Baths and Showers: + + Description Type Connected + 1 Electric shower None + + The extractor's `_extract_baths_and_showers` walks 3-tuples after + "Connected", but it locates "Connected" via + `self._lines.index("Connected")`, which is a global search. Cert + 000565 has the substring "Connected" earlier in the document + (§3 building parts list "Connected" / "Exposed" / "Sheltered" wall + elevation flags), so `idx` lands on a non-section anchor and the + walk never reaches the shower row. + + Worksheet U985-0001-000565 line (42b) Jan = 35.0602 L/day requires + the bath+shower branch (N_bath = 0.13 × 3.1578 + 0.19 = 0.6005); + falling through to no-shower (N_bath = 0.35 × 3.1578 + 0.50 = + 1.6052) yields ~93.7 L/day — the 2.67× over-count behind (45)m's + +903 kWh/yr cascade gap for cert 000565. + + Fix: locate "Connected" within the section bounded by + "1x.0 Baths and Showers" → "18.0 Flue Gas Heat Recovery System" + (both unique anchors in the Elmhurst Summary PDF). + """ + # Arrange — Summary PDF tokenized as the extractor expects. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000565_PDF) + + # Act + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Assert — extractor finds the single electric shower lodged in + # §1x.0, not the empty list it returns when "Connected" anchors + # on the building-parts section. + assert len(site_notes.baths_and_showers.showers) == 1, ( + f"expected 1 shower from §1x.0; got " + f"{len(site_notes.baths_and_showers.showers)} " + f"({site_notes.baths_and_showers.showers!r})" + ) + shower = site_notes.baths_and_showers.showers[0] + assert shower.shower_number == 1 + assert shower.outlet_type == "Electric shower" + assert shower.connected == "None" + + def test_summary_000565_ext1_wall_construction_routes_to_stone_granite() -> None: # Arrange — RdSAP 10 §3.3 + Table 4: cert 000565 Ext1 lodges # "SG Stone: granite or whinstone" which routes to SAP10 diff --git a/domain/sap10_calculator/calculator.py b/domain/sap10_calculator/calculator.py index ff634c8f..8309e86a 100644 --- a/domain/sap10_calculator/calculator.py +++ b/domain/sap10_calculator/calculator.py @@ -518,11 +518,23 @@ def calculate_sap_from_inputs(inputs: CalculatorInputs) -> SapResult: ) pumps_fans_cost = inputs.pumps_fans_kwh_per_yr * inputs.other_fuel_cost_gbp_per_kwh lighting_cost = inputs.lighting_kwh_per_yr * inputs.other_fuel_cost_gbp_per_kwh + # SAP 10.2 §10a (PDF p.145) line (247a): instantaneous electric + # showers route their (64a) kWh through the "other fuel" tariff + # and add to (255) total cost. The `fuel_cost`-based path above + # already includes this via `instant_shower_cost_gbp`; the + # fallback scalar path was silently dropping it on TEN_HOUR / + # zero-fuel-cost certs (cert 000565 surfaced this as a £93 + # under-count once the upstream Elmhurst extractor began + # reporting the shower roster correctly). + electric_shower_cost = ( + inputs.electric_shower_kwh_per_yr * inputs.other_fuel_cost_gbp_per_kwh + ) total_cost = max( 0.0, main_heating_cost + secondary_heating_cost + hot_water_cost + + electric_shower_cost + pumps_fans_cost + lighting_cost + inputs.standing_charges_gbp diff --git a/domain/sap10_calculator/tests/test_calculator.py b/domain/sap10_calculator/tests/test_calculator.py index 0cc47d84..9a5e3dfc 100644 --- a/domain/sap10_calculator/tests/test_calculator.py +++ b/domain/sap10_calculator/tests/test_calculator.py @@ -321,6 +321,47 @@ def test_calculate_exposes_useful_space_heating_kwh() -> None: ) +def test_total_fuel_cost_includes_247a_electric_shower_in_fallback_path() -> None: + """SAP 10.2 §10a (PDF p.145) line (247a) bills electric showers via + + Energy for instantaneous electric shower(s) (64a) × 0.01 = (247a) + Total energy cost (240)...(242) + (245)…(254) = (255) + + Instantaneous electric showers route to (64a) (their own kWh stream + independent of the (62)m HW cylinder demand) and accrue cost at the + "other fuel" tariff used for pumps/fans and lighting. The + `fuel_cost`-based STANDARD-tariff path already plumbs (247a) via + `instant_shower_cost_gbp`; the fallback scalar path (off-peak or + `_ZERO_FUEL_COST_RESULT`) was silently dropping the line. Cert 000565 + (Dual-meter TEN_HOUR + 1 electric shower) surfaced this as a +£93 + cost under-count and a SAP-integer regression once the upstream + (45)m bath-formula extractor bug closed. + """ + # Arrange — baseline with an electric shower lodged. Other-uses + # tariff and electric-shower kWh are independent so the expected + # cost delta is mechanically `kwh × other_fuel_cost`. + baseline = _baseline_inputs() + shower_kwh = 700.0 + inputs_no_shower = baseline + inputs_with_shower = replace(baseline, electric_shower_kwh_per_yr=shower_kwh) + + # Act + result_no_shower = calculate_sap_from_inputs(inputs_no_shower) + result_with_shower = calculate_sap_from_inputs(inputs_with_shower) + + # Assert — total cost rises by exactly (64a) × other-fuel tariff, + # matching worksheet (247a). + expected_delta = shower_kwh * baseline.other_fuel_cost_gbp_per_kwh + actual_delta = ( + result_with_shower.total_fuel_cost_gbp + - result_no_shower.total_fuel_cost_gbp + ) + assert abs(actual_delta - expected_delta) < 1e-6, ( + f"(247a) electric shower cost delta: got {actual_delta!r}, " + f"want {expected_delta!r} per SAP 10.2 §10a line (247a)" + ) + + def test_calculate_exposes_per_end_use_fuel_costs() -> None: # Arrange — P5 trace mode: per-end-use fuel costs (§12 / Table 12) break # out on `intermediate` so the §12 sweep can diff main vs hot water vs