mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
fix(mapper): map Elmhurst "Value known" cylinder to measured volume (code 6)
The Elmhurst Summary §15.1 lodges "Cylinder Size: Value known" with the measured volume in the "Cylinder Volume (l)" line — the Summary-path equivalent of the gov-API "Exact" descriptor. The mapper had no entry for "Value known" so `_elmhurst_cylinder_size_code` raised UnmappedElmhurstLabel, and even once mapped the measured volume was never threaded through, so the cascade dropped the cylinder storage loss (~468 kWh/yr) from (219) water heating on every measured-volume-cylinder Summary. Per RdSAP 10 §10.5 Table 28 (p.55) a measured cylinder volume is used directly. Map "Value known" → cascade code 6 (Exact) and thread the §15.1 "Cylinder Volume (l)" value into SapHeating.cylinder_volume_measured_l, which `_cylinder_volume_l_from_code` (cert_to_inputs.py:5281) already reads for code 6 — mirroring the gov-API path (mapper.py:1575/1885). Pins simulated case 39 (P960-0001-001431): an age-A mid-terrace on direct- acting electric room heaters (SAP code 691, cat 10, control 2602) with electric-immersion DHW off a 117 L "Value known" cylinder. The full extractor→mapper→calculator cascade now reproduces the worksheet's SAP-rating block EXACTLY — SAP value 36.6365 (band F) and (272) CO2 2056.0731 kg/yr, with (219) water heating 2637.5049 and (255) total energy cost 1802.0039. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
1fe67fe814
commit
b2b6f8e954
6 changed files with 214 additions and 0 deletions
|
|
@ -1528,6 +1528,18 @@ class ElmhurstSiteNotesExtractor:
|
|||
first = cylinder_ins_thickness_raw.split()[0]
|
||||
if first.isdigit():
|
||||
cylinder_insulation_thickness_mm = int(first)
|
||||
# §15.1 "Cylinder Volume (l)" — the measured volume lodged alongside
|
||||
# a "Value known" Cylinder Size. The value is written as a decimal
|
||||
# ("117.00"); take the integer part for the cascade's measured-volume
|
||||
# field (gov-API "Exact" descriptor, code 6).
|
||||
cylinder_volume_raw = self._local_val(cylinder_lines, "Cylinder Volume (l)")
|
||||
cylinder_volume_measured_l: Optional[int] = None
|
||||
if cylinder_volume_raw:
|
||||
first = cylinder_volume_raw.split()[0]
|
||||
try:
|
||||
cylinder_volume_measured_l = int(float(first))
|
||||
except ValueError:
|
||||
cylinder_volume_measured_l = None
|
||||
cylinder_thermostat_raw = self._local_val(
|
||||
cylinder_lines, "Cylinder Thermostat",
|
||||
)
|
||||
|
|
@ -1560,6 +1572,7 @@ class ElmhurstSiteNotesExtractor:
|
|||
cylinder_size_label=cylinder_size_label,
|
||||
cylinder_insulation_label=cylinder_insulation_label,
|
||||
cylinder_insulation_thickness_mm=cylinder_insulation_thickness_mm,
|
||||
cylinder_volume_measured_l=cylinder_volume_measured_l,
|
||||
cylinder_thermostat=cylinder_thermostat,
|
||||
immersion_type=immersion_type,
|
||||
)
|
||||
|
|
|
|||
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case39.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case39.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -5942,6 +5942,13 @@ def _elmhurst_cylinder_size_code(
|
|||
Table 28 page 55."""
|
||||
if not cylinder_present or cylinder_size_label is None:
|
||||
return None
|
||||
if cylinder_size_label == "Value known":
|
||||
# Measured-volume cylinder — the Summary-path equivalent of the
|
||||
# gov-API "Exact" descriptor. RdSAP 10 §10.5 Table 28 (p.55): when
|
||||
# the cylinder volume is measured it is used directly. Cascade code
|
||||
# 6 routes `_cylinder_volume_l_from_code` to the lodged
|
||||
# `cylinder_volume_measured_l` (`cert_to_inputs.py:5281`).
|
||||
return 6 # Exact / measured volume
|
||||
if cylinder_size_label == "No Access":
|
||||
if water_heating_fuel_label is None or meter_type_label is None:
|
||||
raise UnmappedElmhurstLabel(
|
||||
|
|
@ -6587,6 +6594,14 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating:
|
|||
),
|
||||
cylinder_insulation_type=cylinder_insulation_type_field,
|
||||
cylinder_insulation_thickness_mm=cylinder_insulation_thickness_mm_field,
|
||||
# §15.1 "Cylinder Volume (l)" — measured volume for a "Value known"
|
||||
# cylinder (cascade code 6 / Exact). None unless a cylinder is
|
||||
# present; the cascade reads it only when `cylinder_size == 6`.
|
||||
cylinder_volume_measured_l=(
|
||||
survey.water_heating.cylinder_volume_measured_l
|
||||
if survey.water_heating.hot_water_cylinder_present
|
||||
else None
|
||||
),
|
||||
# Cascade reads `cylinder_thermostat == "Y"` (string compare) per
|
||||
# `cert_to_inputs.py:2252` / `:2218`. Map the bool to the Y/N
|
||||
# string the cascade expects; None when no cylinder is present.
|
||||
|
|
|
|||
|
|
@ -369,6 +369,11 @@ class WaterHeating:
|
|||
cylinder_insulation_label: Optional[str] = None
|
||||
# §15.1 "Insulation Thickness" lodging in mm (an integer or None).
|
||||
cylinder_insulation_thickness_mm: Optional[int] = None
|
||||
# §15.1 "Cylinder Volume (l)" lodging — the measured cylinder volume in
|
||||
# litres, present when "Cylinder Size" is lodged as "Value known"
|
||||
# (the Summary-path equivalent of the gov-API "Exact" descriptor,
|
||||
# cascade code 6). None when no cylinder is present or the line is absent.
|
||||
cylinder_volume_measured_l: Optional[int] = None
|
||||
# §15.1 "Cylinder Thermostat" lodging (Yes / No). False or absent
|
||||
# keeps the cascade's no-thermostat Table 2b temperature factor.
|
||||
cylinder_thermostat: Optional[bool] = None
|
||||
|
|
|
|||
60
tests/datatypes/epc/domain/test_mapper_cylinder_size.py
Normal file
60
tests/datatypes/epc/domain/test_mapper_cylinder_size.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
"""Mapper boundary: the Elmhurst §15.1 "Cylinder Size" label.
|
||||
|
||||
A cylinder lodged "Value known" carries a measured volume in the §15.1
|
||||
"Cylinder Volume (l)" line — the Summary-path equivalent of the gov-API
|
||||
"Exact" descriptor. Per RdSAP 10 §10.5 Table 28 (p.55) the measured volume
|
||||
is used directly; cascade code 6 routes `_cylinder_volume_l_from_code` to
|
||||
the lodged `cylinder_volume_measured_l`. Before this was mapped the label
|
||||
raised `UnmappedElmhurstLabel`, blocking every measured-volume-cylinder
|
||||
Summary.
|
||||
"""
|
||||
|
||||
from datatypes.epc.domain.mapper import (
|
||||
UnmappedElmhurstLabel,
|
||||
_elmhurst_cylinder_size_code, # pyright: ignore[reportPrivateUsage]
|
||||
)
|
||||
|
||||
|
||||
def test_value_known_label_maps_to_exact_code_6() -> None:
|
||||
# Arrange
|
||||
label = "Value known"
|
||||
|
||||
# Act
|
||||
code = _elmhurst_cylinder_size_code(label, cylinder_present=True)
|
||||
|
||||
# Assert
|
||||
assert code == 6
|
||||
|
||||
|
||||
def test_value_known_label_with_no_cylinder_maps_to_none() -> None:
|
||||
# Arrange
|
||||
label = "Value known"
|
||||
|
||||
# Act
|
||||
code = _elmhurst_cylinder_size_code(label, cylinder_present=False)
|
||||
|
||||
# Assert
|
||||
assert code is None
|
||||
|
||||
|
||||
def test_normal_label_still_maps_to_code_2() -> None:
|
||||
# Arrange
|
||||
label = "Normal"
|
||||
|
||||
# Act
|
||||
code = _elmhurst_cylinder_size_code(label, cylinder_present=True)
|
||||
|
||||
# Assert
|
||||
assert code == 2
|
||||
|
||||
|
||||
def test_unknown_label_still_raises() -> None:
|
||||
# Arrange
|
||||
label = "Spray-on unicorn cylinder"
|
||||
|
||||
# Act / Assert
|
||||
try:
|
||||
_elmhurst_cylinder_size_code(label, cylinder_present=True)
|
||||
except UnmappedElmhurstLabel:
|
||||
return
|
||||
raise AssertionError("expected UnmappedElmhurstLabel for an unknown label")
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
|
||||
"simulated case 39" worksheet — an age-A (pre-1900) mid-terrace heated by
|
||||
**direct-acting electric room heaters** (SAP code 691, category 10, control
|
||||
2602 appliance thermostats), with an electric room-heater secondary (also
|
||||
691) and electric-immersion DHW (WHC 903) off a **measured-volume hot-water
|
||||
cylinder** ("Cylinder Size: Value known", 117 L, foam 38 mm), on a single
|
||||
(standard) electricity meter.
|
||||
|
||||
This case was generated to probe the API-corpus's worst-served cohort
|
||||
(category-10 direct-acting electric, 46% within-0.5). It exposed a real
|
||||
Summary-path gap: the §15.1 "Cylinder Size: Value known" lodging (the
|
||||
Summary equivalent of the gov-API "Exact" descriptor) was unmapped, so the
|
||||
extractor/mapper raised `UnmappedElmhurstLabel` and — once that was mapped —
|
||||
the measured "Cylinder Volume (l)" was not threaded through, dropping the
|
||||
cylinder storage loss (~468 kWh/yr) from (219) water heating. Wiring the
|
||||
measured volume (cascade code 6 → `_cylinder_volume_l_from_code`) closes the
|
||||
whole cascade EXACTLY.
|
||||
|
||||
Like 000565 / the _rr cases / case 20 / 21 / 38, this fixture does NOT hand-
|
||||
build the EpcPropertyData: it routes the Summary PDF through
|
||||
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the pin exercises
|
||||
the WHOLE extractor + mapper + calculator pipeline.
|
||||
|
||||
Source: user-simulated PDFs at `sap worksheets/golden fixture debugging/
|
||||
simulated case 39/`. The Summary is mirrored into the tracked
|
||||
`backend/documents_parser/tests/fixtures/Summary_001431_case39.pdf` so the
|
||||
test runs without depending on the unstaged workspace.
|
||||
|
||||
Worksheet pin targets (P960-0001-001431, "11a. SAP rating" / "12a. CO2
|
||||
emissions" block — the UK-average-climate rating block our cascade
|
||||
reproduces; the P960's separate postcode-climate EPC block (272)=1803.19 is
|
||||
a known regional-climate gap, not a SAP-rating divergence):
|
||||
- SAP value (un-rounded, before (258) integer rounding) = 36.6365 (band F)
|
||||
- (272) Total CO2, kg/year = 2056.0731
|
||||
|
||||
Per [[feedback-zero-error-strict]] + [[feedback-continuous-sap-tolerance]]:
|
||||
pins are abs <= 1e-3 against the worksheet PDF (printed to 4 dp).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
|
||||
from domain.sap10_calculator.rdsap.cert_to_inputs import cert_to_inputs
|
||||
|
||||
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
|
||||
# [4]=repo root.
|
||||
_SUMMARY_PDF: Final[Path] = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "backend" / "documents_parser" / "tests" / "fixtures"
|
||||
/ "Summary_001431_case39.pdf"
|
||||
)
|
||||
|
||||
LINE_258_SAP_VALUE_CONTINUOUS: Final[float] = 36.6365
|
||||
LINE_272_TOTAL_CO2_KG_PER_YR: Final[float] = 2056.0731
|
||||
_PIN_ABS: Final[float] = 1e-3
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
"""Convert a Summary PDF into the per-page text format the
|
||||
ElmhurstSiteNotesExtractor expects (label/value token sequences).
|
||||
Mirror of the helper in the other `_elmhurst_worksheet_*` fixtures.
|
||||
"""
|
||||
info = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
m = re.search(r"Pages:\s+(\d+)", info)
|
||||
if m is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(m.group(1))
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout = subprocess.run(
|
||||
[
|
||||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||||
str(pdf_path), "-",
|
||||
],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||||
tokens.extend(parts)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def build_epc() -> EpcPropertyData:
|
||||
"""Route the simulated case-39 Summary through extractor + mapper.
|
||||
No hand-built EpcPropertyData — the extractor and mapper are part of
|
||||
the test target."""
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
|
||||
def test_case39_measured_volume_cylinder_reproduces_the_worksheet_sap_and_co2() -> None:
|
||||
# Arrange — the full extractor -> mapper -> calculator pipeline on the
|
||||
# simulated case-39 Summary (direct-electric room heaters + electric
|
||||
# immersion DHW off a "Value known" 117 L measured-volume cylinder).
|
||||
epc = build_epc()
|
||||
|
||||
# Act
|
||||
result = calculate_sap_from_inputs(cert_to_inputs(epc))
|
||||
|
||||
# Assert — the SAP-rating block reproduces the worksheet exactly.
|
||||
assert (
|
||||
abs(result.sap_score_continuous - LINE_258_SAP_VALUE_CONTINUOUS)
|
||||
<= _PIN_ABS
|
||||
)
|
||||
assert abs(result.co2_kg_per_yr - LINE_272_TOTAL_CO2_KG_PER_YR) <= _PIN_ABS
|
||||
Loading…
Add table
Reference in a new issue