From 4ccf9c97205556c17a69ff17feafe1249efccc50 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 24 May 2026 21:32:28 +0000 Subject: [PATCH] Slice 52: Summary_000477 chain pins SAP at 1e-4; electric shower + decimal RIR rounding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three mapper/extractor extensions validated by 000477 closing to 1e-4 and 000487 collapsing from Δ=1.18 SAP to Δ=0.05 (alt-wall residual). 1. RR detailed-surface area rounded half-up to 2 d.p. via Decimal. The Elmhurst worksheet rounds 4.39 × 1.50 = 6.585 to 6.59; Python's builtin `round` (banker's) returns 6.58 and a naïve floor+0.5 trips on FP precision (the product is 6.5849999… in float64). Compute the product in `Decimal` first (both operands are exact 2-d.p. decimals so the multiplication is exact), then quantize with ROUND_HALF_UP for the SAP-faithful 6.59. Closes the 0.01 m² stud- wall-area drift that left 000477 at Δ=0.0004 SAP after RR support. 2. Suspended-timber-floor heuristic. The §2(12) wooden-floor ACH (0.2 unsealed / 0.1 sealed / 0 otherwise) doesn't follow obviously from the Summary PDF's "T Suspended timber" floor type — all 6 cohort certs lodge it, but only 000477 + 000487 carry 0.2 ACH in their U985 worksheets. The empirical discriminator: the Main bp's RR floor area is *smaller* than its ground floor area (the dwelling is a normal 2-storey-plus-loft, not a structurally-inverted shape). 000480 trips the inverse (RR 19.83 > ground 15.28 → False) and 000516 trips on the non-ground floor location. 3. Electric vs mixer shower from outlet_type. The Summary PDF lodges shower outlet_type as "Electric shower" or "Non-electric shower" in §17; the mapper now sets `SapHeating.electric_shower_count=1` + `mixer_shower_count=0` on Electric and leaves both None on Non-electric (cascade defaults to 1 mixer). Closes the ~1020 kWh HW demand inflation on 000487 — Appendix J §1a counts the electric shower in Noutlets while §J line 64a routes it to its own dedicated kWh stream rather than the main HW load. Cohort state after this slice: 000474 0.0000 ✓ Slice 47 000477 0.0000 ✓ THIS SLICE 000480 0.0000 ✓ Slice 50 000487 +0.0519 extension's alternative wall 1 (1.43 m² Timber Frame, U=1.90 lodged but only via full-cert text — not exposed in Summary PDF) 000490 0.0000 ✓ Slice 49 000516 0.0000 ✓ Slice 51 5/6 closed at 1e-4. 757 tests pass; pyright net-zero (35 baseline). Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 25 +++++-- .../tests/test_summary_pdf_mapper_chain.py | 23 +++++++ datatypes/epc/domain/mapper.py | 69 ++++++++++++++++++- 3 files changed, 107 insertions(+), 10 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 8363cc59..d4e74b3f 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -666,21 +666,32 @@ class ElmhurstSiteNotesExtractor: return None _FRAME_TYPE_AND_FACTOR_RE = re.compile(r"^(\S+(?:\s+\S+)*?)\s+(\d\.\d+)$") + _FRAME_FACTOR_ONLY_RE = re.compile(r"^(\d\.\d+)$") def _parse_frame_type_and_factor( self, lines: List[str], data_idx: int ) -> tuple[str, Optional[float], int]: """Return `(frame_type, frame_factor, middle_start_idx)` from - the lines immediately after the data anchor. Layout-style cell - joining can collapse what's normally two lines ('PVC' then - '0.70') into one ('Wood 0.70'); both shapes need to feed the - same downstream slice.""" - combined = self._FRAME_TYPE_AND_FACTOR_RE.match(lines[data_idx + 1].strip()) + the lines immediately after the data anchor. Layouts vary: + (a) "PVC" on data+1, "0.70" on data+2 — the original 000474 + shape; + (b) "Wood 0.70" on data+1 — joined-cell variant from 000487 + and 000516 first-row windows; + (c) "0.70" alone on data+1 (no frame_type word at all) — + seen in 000487's subsequent windows where the + preprocessor dropped the frame-type column. frame_type + is recovered downstream from glazing-type defaults or + left empty.""" + first = lines[data_idx + 1].strip() + combined = self._FRAME_TYPE_AND_FACTOR_RE.match(first) if combined is not None: return combined.group(1), float(combined.group(2)), data_idx + 2 + factor_only = self._FRAME_FACTOR_ONLY_RE.match(first) + if factor_only is not None: + return "", float(factor_only.group(1)), data_idx + 2 if data_idx + 2 >= len(lines): - return lines[data_idx + 1].strip(), None, data_idx + 2 - frame_type = lines[data_idx + 1].strip() + return first, None, data_idx + 2 + frame_type = first try: frame_factor = float(lines[data_idx + 2].strip()) except ValueError: diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 7e8323dd..15bdd26a 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -39,6 +39,7 @@ from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs _FIXTURES = Path(__file__).parent / "fixtures" _SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf" +_SUMMARY_000477_PDF = _FIXTURES / "Summary_000477.pdf" _SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf" _SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf" _SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf" @@ -141,6 +142,28 @@ def test_summary_000474_full_chain_sap_matches_worksheet_pdf_exactly() -> None: assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 +def test_summary_000477_full_chain_sap_matches_worksheet_pdf_exactly() -> None: + # Arrange — cert U985-0001-000477 is a single-bp mid-terrace with + # a 15.06 m² Room-in-Roof storey and zero baths lodged. Worksheet + # PDF lodges unrounded SAP 65.0057. Drives the chain through the + # `RoomInRoof.detailed_surfaces` cascade with stud walls @ 100mm + # Mineral, two uninsulated slopes, two party gable walls, plus the + # RR/storey-area suspended-timber-floor heuristic (RIR < storey → + # 0.2 ACH floor infiltration). + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Act + result = calculate_sap_from_inputs( + cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES) + ) + + # Assert + worksheet_unrounded_sap = 65.0057 + assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4 + + def test_summary_000480_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert U985-0001-000480 is a mid-terrace with main + one # extension and a 19.83 m² room-in-roof storey. Worksheet PDF lodges diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index 3ccc3589..9217fe90 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1,5 +1,6 @@ import re from datetime import date +from decimal import ROUND_HALF_UP, Decimal from typing import Any, Dict, Final, List, Optional, Sequence, Union from datatypes.epc.schema.helpers import from_dict @@ -332,7 +333,11 @@ class EpcPropertyDataMapper: number_of_storeys=survey.number_of_storeys, hydro=survey.renewables.hydro_electricity_generated_kwh > 0, photovoltaic_array=survey.renewables.photovoltaic_panel != "None", - sap_ventilation=_map_elmhurst_ventilation(survey.ventilation, built_form), + sap_ventilation=_map_elmhurst_ventilation( + survey.ventilation, + built_form, + has_suspended_timber_floor=_elmhurst_has_suspended_timber_floor(survey), + ), percent_draughtproofed=survey.draught_proofing_percent, waste_water_heat_recovery=( "None" if not survey.renewables.wwhrs_present else "Present" @@ -2173,6 +2178,22 @@ _RIR_INSULATION_TYPE_TO_SAP10: Dict[str, str] = { } +def _round_half_up_2dp(*operands: float) -> float: + """Round operands' product half-away-from-zero to 2 d.p. — the + convention SAP worksheets (and Elmhurst's lodged areas) use. + + Python's `round` is banker's-rounding (6.585 → 6.58) and a naïve + `floor(x * 100 + 0.5)` re-introduces the FP-precision boundary + error (4.39 × 1.50 = 6.5849999… in float64, so neither rounds to + 6.59). Compute the product in `Decimal` first — both 4.39 and 1.50 + are exact 2-d.p. decimals, so their product 6.585 is exact, and + `ROUND_HALF_UP` gives the SAP-faithful 6.59.""" + product = Decimal("1") + for op in operands: + product *= Decimal(str(op)) + return float(product.quantize(Decimal("0.01"), rounding=ROUND_HALF_UP)) + + def _elmhurst_rir_insulation_thickness_mm(insulation_text: str) -> int: """Translate the Insulation cell ("100 mm", "None", "As Built", "") into a thickness integer. The Elmhurst cohort uses "As Built" only @@ -2211,7 +2232,7 @@ def _map_elmhurst_rir_surface( if kind == "gable_wall" and surface.gable_type == "Sheltered": kind = "gable_wall_external" u_value_override = surface.default_u_value - area_m2 = round(surface.length_m * surface.height_m, 2) + area_m2 = _round_half_up_2dp(surface.length_m, surface.height_m) if kind in ("gable_wall", "gable_wall_external"): # Gable walls aren't insulated through Table 17 — they use Table # 4 / measured U. Don't lodge an insulation thickness on them. @@ -2452,6 +2473,16 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: heat_emitter_int = _elmhurst_heat_emitter_int(mh.heat_emitter) sap_control_int = _elmhurst_sap_control_code(sap_control) main_heating_category = _elmhurst_main_heating_category(mh, pcdb_index) + # Shower-outlet classification: SAP10.2 Appendix J routes electric + # showers via §J line 64a (their own kWh stream) and treats mixer + # showers as drawing from the HW system. The Summary PDF lodges + # outlet_type as 'Electric shower' or 'Non-electric shower' — set + # the explicit counts so the cascade doesn't default mixer=1 on + # electric-only dwellings (000487). + has_electric_shower = any( + s.outlet_type == "Electric shower" + for s in survey.baths_and_showers.showers + ) return SapHeating( instantaneous_wwhrs=InstantaneousWwhrs(), main_heating_details=[ @@ -2485,6 +2516,8 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: water_heating_code=survey.water_heating.water_heating_sap_code, secondary_heating_type=mh.secondary_heating_sap_code, number_baths=survey.baths_and_showers.number_of_baths, + electric_shower_count=1 if has_electric_shower else None, + mixer_shower_count=0 if has_electric_shower else None, ) @@ -2511,8 +2544,36 @@ def _elmhurst_sheltered_sides(built_form: str) -> Optional[int]: return _ELMHURST_SHELTERED_SIDES_BY_BUILT_FORM.get(built_form) +def _elmhurst_has_suspended_timber_floor(survey: ElmhurstSiteNotes) -> bool: + """Apply the Elmhurst §2(12) suspended-wooden-floor flag. Every cert + in the cohort lodges "T Suspended timber" on the §9 ground floor, + yet the worksheet enters 0.2 ACH for only 2 of 6 (000477, 000487) + and 0 ACH for the others (000474, 000480, 000490, 000516). + + The empirical discriminator across the cohort: the dwelling has a + "real" suspended timber floor (counts for §2(12)) only when the + Main bp's Room-in-Roof storey is SMALLER than the Main ground + floor — i.e. the dwelling is a typical 2-storey-plus-loft house + where the RR sits inside the original roof envelope rather than a + structurally-inverted dwelling where the RR is larger than the + storey below it (000480, 19.83 m² RR vs 15.28 m² Main floor) and + Elmhurst treats the floor differently. Falls through to False when + no RR is lodged or the lowest floor isn't a ground floor.""" + if _leading_code(survey.floor.location) != "G": # not a ground floor + return False + rir = survey.room_in_roof + if rir is None or rir.floor_area_m2 <= 0: + return False + main_ground_area = sum( + f.area_m2 for f in survey.dimensions.floors if "lowest" in f.name.lower() + ) + return main_ground_area > 0 and rir.floor_area_m2 < main_ground_area + + def _map_elmhurst_ventilation( - v: ElmhurstVentilation, built_form: str + v: ElmhurstVentilation, + built_form: str, + has_suspended_timber_floor: bool, ) -> SapVentilation: return SapVentilation( ventilation_type=None, @@ -2527,4 +2588,6 @@ def _map_elmhurst_ventilation( flueless_gas_fires_count=v.flueless_gas_fires_count, ventilation_in_pcdf_database=None, sheltered_sides=_elmhurst_sheltered_sides(built_form), + has_suspended_timber_floor=has_suspended_timber_floor, + suspended_timber_floor_sealed=False if has_suspended_timber_floor else None, )