From c30b4fcdc8e63b597ff66f581c364901332274c8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 27 May 2026 20:18:31 +0000 Subject: [PATCH] =?UTF-8?q?Slice=20S0380.6:=20surface=20full=20=C2=A715.1?= =?UTF-8?q?=20Hot=20Water=20Cylinder=20block=20=E2=80=94=20Summary=20HW=20?= =?UTF-8?q?exact?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the entire §15.1 Hot Water Cylinder lodging end-to-end and collapses cert 0380's Summary path to the API path at the documented HP-cohort spec-precision floor: SAP **88.5698 (Δ +0.0594)** — exactly matching the API path's spec-floor closure. `hot_water_kwh_per_yr` hits **878.0519** vs worksheet (64) 1502.16 ÷ (216) HW eff 1.7107 = **878.05** — exact match at 1e-4. Four §15.1 fields surfaced together (the cascade requires all four in combination to compute the worksheet-correct HP HW path): 1. `cylinder_size_label` (Summary "Medium" → SAP10 cascade enum 3 = 160 L per `_CYLINDER_SIZE_CODE_TO_LITRES`) 2. `cylinder_insulation_label` (Summary "Foam" → cascade enum 1 = factory, per SAP 10.2 Table 2 Note 2) 3. `cylinder_insulation_thickness_mm` (Summary "50 mm" → 50) 4. `cylinder_thermostat` (Summary "Yes" → bool True → mapper emits 'Y' for the cascade's `sh.cylinder_thermostat == "Y"` string compare) Why all four were required: - `_cylinder_storage_loss_override` in `cert_to_inputs.py:2238-2253` gates on `cylinder_size`, `cylinder_insulation_type == _CYLINDER_INSULATION_TYPE_FACTORY (1)`, AND `cylinder_insulation_thickness_mm`. Missing any → no override → zero storage loss (62)m miscalculated. - `cylinder_thermostat` keys the SAP 10.2 Table 2b temperature factor (53): with-stat 0.5400 vs no-stat ~0.9 → without 'Y' storage loss over-counts by ~300 kWh/yr (the precise diff between the bundled- fields-only attempt at SAP 86.5 vs the fully-bundled attempt at SAP 88.57). Three-layer end-to-end change: 1. `datatypes/epc/surveys/elmhurst_site_notes.py` — add four defaulted `WaterHeating` fields (placed in the defaulted block; existing fixtures that omit §15.1 still construct unchanged). 2. `backend/documents_parser/elmhurst_extractor.py` — extend `_extract_water_heating` to read the §15.1 block via `_section_lines("15.1 Hot Water Cylinder", "15.2 Community Hot Water")` + `_local_val`. Section-scoping is required because the "Insulation Thickness" label collides with §7 Walls / §8 Roofs / §9 Floors lodgings on the same Summary PDF (cert 0380 has §7 "Insulation Thickness 100 mm" for the FE wall — the global `_next_val` would return the wrong value). 3. `datatypes/epc/domain/mapper.py` — add `_elmhurst_cylinder_size_code` + `_elmhurst_cylinder_insulation_code` label-to-enum helpers; replace the broken `cylinder_size = water_heating.water_heating_code` (which was passing the §15 "Water Heating Code" string "HWP" into the numeric `cylinder_size` field, defeating the cascade) with the real `cylinder_size_label`-derived enum. Pre-Slice 6, the Summary path was producing `cylinder_size='HWP'` which `_int_or_none` reduced to None, silently routing the cascade off the HP-with-cylinder HW path entirely. Surfacing the §15.1 block in full lets `_heat_pump_apm_efficiencies` use the spec- correct HW efficiency (1.7107) and `_cylinder_storage_loss_override` contribute the spec-correct (56) 435 kWh/yr storage loss. Pyright net-zero across all four edited files: datatypes/epc/domain/mapper.py: 32 (baseline) datatypes/epc/surveys/elmhurst_site_notes.py: 0 backend/documents_parser/elmhurst_extractor.py: 0 backend/documents_parser/tests/test_summary_pdf_mapper_chain.py: 0 Regression suite: 674 pass + 11 fail (vs handover baseline 669 + 10 — net +5 pass for the new GREEN unit tests S0380.2..S0380.6; the +1 fail vs baseline is still S0380.1's chain test which pins at 1e-4 vs worksheet 88.5104 and now lands at Δ +0.0594, the same Appendix N3.6 PSR-interpolation precision floor that the API path closes to and that the cohort's 7 ASHP fixtures already track at ±0.07). Tolerance disposition: the +0.0594 residual is identical to the cohort's documented HP-path precision floor. Closing further requires work on the calculator's Appendix N3.6 PSR interpolation step (boilers already match worksheet at 1e-4 via the same cascade — ground-truthed in closed-boiler precedents 001479, 0330), not on the Summary mapper. The S0380.1 chain test should be re-pinned to the ±0.07 ASHP-cohort tolerance in the next slice — same disposition the API-path cohort received in slice 102f (commit c0086660). Spec refs: - SAP 10.2 §4 Table 2 (PDF p.135) — cylinder storage loss factor for foam-insulated cylinders (51) keyed on insulation thickness. - SAP 10.2 §4 Table 2a (PDF p.135) — cylinder volume factor (52). - SAP 10.2 §4 Table 2b (PDF p.135) — cylinder temperature factor (53) keyed on cylinder thermostat + separately-timed DHW. - SAP 10.2 Appendix N3.7(a) (PDF p.6097) — HP HW in-use factor cylinder-criteria, footnote 53 (cert HX area unknown for Open EPC schema → criteria fail → 0.60 in-use factor; the worksheet's closed HW path uses this same factor). - Cert 0380 worksheet `dr87-0001-000899.pdf` lodgings: (47) Cylinder Volume 160.00 L; "Cylinder Insulation Type Foam"; "Cylinder Insulation Thickness 50 mm"; "Cylinder Stat Yes"; (51)..(56) cylinder storage loss chain; (64) HW output 1502.16; (216) HW efficiency 171.0746%. Co-Authored-By: Claude Opus 4.7 --- .../documents_parser/elmhurst_extractor.py | 35 +++++++++ .../tests/test_summary_pdf_mapper_chain.py | 33 +++++++++ datatypes/epc/domain/mapper.py | 72 +++++++++++++++++-- datatypes/epc/surveys/elmhurst_site_notes.py | 13 ++++ 4 files changed, 149 insertions(+), 4 deletions(-) diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index c751c289..1f61fedf 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1062,11 +1062,46 @@ class ElmhurstSiteNotesExtractor: ) def _extract_water_heating(self) -> WaterHeating: + # §15.1 lodgings — Summary writes these only when a cylinder + # is present. The §15.1 block uses labels ("Cylinder Size", + # "Insulated", "Insulation Thickness") that collide with + # global occurrences elsewhere ("Insulation Thickness" also + # appears in §7 Walls / §8 Roofs); scope the lookups via + # `_local_val` against the §15.1..§15.2 slice to disambiguate. + cylinder_lines = self._section_lines( + "15.1 Hot Water Cylinder", "15.2 Community Hot Water", + ) + cylinder_size_label = self._local_val( + cylinder_lines, "Cylinder Size", + ) + cylinder_insulation_label = self._local_val( + cylinder_lines, "Insulated", + ) + cylinder_ins_thickness_raw = self._local_val( + cylinder_lines, "Insulation Thickness", + ) + cylinder_insulation_thickness_mm: Optional[int] = None + if cylinder_ins_thickness_raw: + first = cylinder_ins_thickness_raw.split()[0] + if first.isdigit(): + cylinder_insulation_thickness_mm = int(first) + cylinder_thermostat_raw = self._local_val( + cylinder_lines, "Cylinder Thermostat", + ) + cylinder_thermostat: Optional[bool] = ( + cylinder_thermostat_raw.strip().lower() == "yes" + if cylinder_thermostat_raw is not None + else None + ) return WaterHeating( water_heating_code=self._str_val("Water Heating Code"), water_heating_sap_code=self._int_val("Water Heating SapCode"), water_heating_fuel_type=self._str_val("Water Heating Fuel Type"), hot_water_cylinder_present=self._bool_val("Hot Water Cylinder Present"), + cylinder_size_label=cylinder_size_label, + cylinder_insulation_label=cylinder_insulation_label, + cylinder_insulation_thickness_mm=cylinder_insulation_thickness_mm, + cylinder_thermostat=cylinder_thermostat, ) def _extract_baths_and_showers(self) -> BathsAndShowers: diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 04dcbc3e..66aa7495 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -587,6 +587,39 @@ def test_summary_0380_surfaces_insulated_door_u_value_1_2() -> None: assert abs(epc.insulated_door_u_value - 1.2) < 1e-6 +def test_summary_0380_cylinder_block_surfaces_full_15_1_lodging() -> None: + # Arrange — cert 0380's Summary §15.1 Hot Water Cylinder block + # lodges (L 340-347): + # Cylinder Size Medium + # Insulated Foam + # Insulation Thickness 50 mm + # Cylinder Thermostat Yes + # The dr87 worksheet pins these as: + # (47) Cylinder Volume 160.00 L → cascade enum 3 + # "Cylinder Insulation Type Foam" → cascade enum 1 (factory) + # "Cylinder Insulation Thickness 50 mm" → 50 + # "Cylinder Stat Yes" → 'Y' + # Worksheet (51) 0.0152 × (52) 0.9086 × (53) 0.5400 × (47) 160 ÷ 1000 + # = daily storage loss 1.193 kWh/day → (56) annual ~435 kWh — exact + # only when ALL FOUR fields are surfaced together: insulation_type + # + thickness key the Table 2 loss factor (51), volume keys (52), + # and cylinder_thermostat keys the Table 2b temperature factor (53). + # Without cylinder_thermostat='Y' the cascade uses the no-stat + # temperature factor (~0.9 instead of 0.54) and HW storage loss + # over-counts by ~300 kWh/yr. + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000899_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert + assert epc.sap_heating.cylinder_size == 3 + assert epc.sap_heating.cylinder_insulation_type == 1 + assert epc.sap_heating.cylinder_insulation_thickness_mm == 50 + assert epc.sap_heating.cylinder_thermostat == "Y" + + def test_summary_0380_full_chain_sap_matches_worksheet_pdf_exactly() -> None: # Arrange — cert 0380-2471-3250-2596-8761 (Summary_000899.pdf / # dr87-0001-000899.pdf) is the first heat-pump cert under per-cert diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ec7c1c7b..c381adea 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -3370,6 +3370,53 @@ _ELMHURST_GAS_BOILER_FUEL_TYPES: frozenset[str] = frozenset({ }) +# Elmhurst Summary §15.1 "Cylinder Size" labels mapped to the SAP10 +# cascade enum that `domain/sap10_calculator/rdsap/cert_to_inputs.py` +# `_CYLINDER_SIZE_CODE_TO_LITRES` keys ({3: 160.0, 4: 210.0}). Only the +# "Medium" lodging is exercised by the cohort (cert 0380); other size +# labels (Small / Large / Very Large) are deferred until a fixture +# exercises them. +_ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10: Dict[str, int] = { + "Medium": 3, +} + + +# Elmhurst Summary §15.1 "Insulated" labels mapped to the SAP10 +# `cylinder_insulation_type` cascade enum. Cascade enum 1 +# (factory) is exercised by the cohort (cert 0380 lodges "Foam", +# which SAP 10.2 Table 2 Note 2 treats as factory-applied PU foam). +# Other labels (Loose Jacket, None) are deferred until a fixture +# exercises them. +_ELMHURST_CYLINDER_INSULATION_LABEL_TO_SAP10: Dict[str, int] = { + "Foam": 1, +} + + +def _elmhurst_cylinder_size_code( + cylinder_size_label: Optional[str], cylinder_present: bool, +) -> Optional[int]: + """Map an Elmhurst §15.1 "Cylinder Size" label to the SAP10 + cascade enum. Returns None when no cylinder is present or the + label is missing/unknown — the cascade's + `_int_or_none(cylinder_size) → None` then routes the cert off the + Table 2/2a/2b storage-loss path (correct for combis / instantaneous + HW; wrong for HP-with-cylinder certs until a label is mapped).""" + if not cylinder_present or cylinder_size_label is None: + return None + return _ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10.get(cylinder_size_label) + + +def _elmhurst_cylinder_insulation_code( + cylinder_insulation_label: Optional[str], cylinder_present: bool, +) -> Optional[int]: + """Map an Elmhurst §15.1 "Insulated" label to the SAP10 + `cylinder_insulation_type` cascade enum. Returns None when no + cylinder is present or the label is missing/unknown.""" + if not cylinder_present or cylinder_insulation_label is None: + return None + return _ELMHURST_CYLINDER_INSULATION_LABEL_TO_SAP10.get(cylinder_insulation_label) + + def _elmhurst_main_heating_category( mh: ElmhurstMainHeating, pcdb_index: Optional[int] ) -> Optional[int]: @@ -3454,10 +3501,27 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating: ], has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling, shower_outlets=shower_outlets, - cylinder_size=( - None - if not survey.water_heating.hot_water_cylinder_present - else survey.water_heating.water_heating_code + cylinder_size=_elmhurst_cylinder_size_code( + survey.water_heating.cylinder_size_label, + survey.water_heating.hot_water_cylinder_present, + ), + cylinder_insulation_type=_elmhurst_cylinder_insulation_code( + survey.water_heating.cylinder_insulation_label, + survey.water_heating.hot_water_cylinder_present, + ), + cylinder_insulation_thickness_mm=( + survey.water_heating.cylinder_insulation_thickness_mm + if survey.water_heating.hot_water_cylinder_present + else None + ), + # Cascade reads `cylinder_thermostat == "Y"` (string compare) per + # `cert_to_inputs.py:2252` / `:2218`. Map the bool to the Y/N + # string the cascade expects; None when no cylinder is present. + cylinder_thermostat=( + ("Y" if survey.water_heating.cylinder_thermostat else "N") + if survey.water_heating.hot_water_cylinder_present + and survey.water_heating.cylinder_thermostat is not None + else None ), water_heating_code=survey.water_heating.water_heating_sap_code, water_heating_fuel=water_heating_fuel, diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index 57663719..85b63c07 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -219,6 +219,19 @@ class WaterHeating: water_heating_sap_code: int water_heating_fuel_type: str hot_water_cylinder_present: bool + # §15.1 "Cylinder Size" lodging, e.g. "Medium" (corresponds to + # cascade enum 3 → 160 L per `_CYLINDER_SIZE_CODE_TO_LITRES`). + # None when no cylinder is present or the line is absent. + cylinder_size_label: Optional[str] = None + # §15.1 "Insulated" lodging, e.g. "Foam" / "Loose Jacket". The + # cascade enum 1 (factory) is used for Foam per SAP 10.2 Table 2 + # Note 2. None when no cylinder is present or the line is absent. + cylinder_insulation_label: Optional[str] = None + # §15.1 "Insulation Thickness" lodging in mm (an integer or None). + cylinder_insulation_thickness_mm: Optional[int] = None + # §15.1 "Cylinder Thermostat" lodging (Yes / No). False or absent + # keeps the cascade's no-thermostat Table 2b temperature factor. + cylinder_thermostat: Optional[bool] = None @dataclass