mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
S0380.197: simulated case 5 e2e fixture — detached sandstone RR validates S0380.196 (RdSAP 10 §3.9.1 + Table 4 p.22)
Promotes user-simulated "case 5" (detached, sandstone-walled, room-in-roof cousin of golden cert 0240) to an e2e worksheet fixture pinning the WHOLE extractor → mapper → calculator pipeline at abs=1e-4 on all 11 Block-1 line refs. Its worksheet prints the exact RR-gable routing S0380.196 implements, validating that fix against ground truth: Roof room Main Gable Wall 1 15.68 U=0.35 (29a) Exposed → walls @ main-wall U Roof room Main remaining area 61.73 U=0.30 (30) A_RR shell − Σ gables External roof Main 14.52 U=0.11 (30) loft residual Roof room Main Gable Wall 2 15.68 U=0.25 (32) Party → party @ 0.25 gable area = 6.40 × 2.45 (§3.9.1 default RR storey height); A_RR remaining = 12.5√(83.2/1.5) − 2×15.68 = 93.09 − 31.36 = 61.73 (RdSAP 10 §3.9.1(e)). Confirms a DETACHED dwelling can lodge a Party RR gable (Table 4 p.22 row 2) — so my S0380.196 mapping (gable_wall_type 0=Party, 1=Exposed) is correct; do not flip it. Two extractor/mapper gaps surfaced and fixed (case 5 is the forcing test): - Sandstone wall label "SS Stone: sandstone or limestone" had no `_ELMHURST_WALL_CODE_TO_SAP10` entry (raised UnmappedElmhurstLabel). Added "SS" → 2 (WALL_STONE_SANDSTONE), matching 0240's API wall_construction=2 (cross-mapper parity). - Roof "Insulation Thickness 400+ mm" was silently dropped: the four thickness parsers used `.split()[0].isdigit()`, which rejects the trailing "+" → None → u_roof fell back to the age-J default 0.16 instead of 0.11 (+1.09 W/K roof, the whole 0.12 SAP gap). Added `_parse_thickness_mm` (strips to leading digits) and applied it at all four sites (walls / alt-wall / roof / floor). The only existing fixture with "400+ mm" (000565 Stud Wall) routes via the RIR regex, unaffected. Result: case 5 cascade ≡ worksheet at 1e-4 on SAP/ECF/cost/CO2 + every energy stream. Neither gap affects 0240 (its API path captures both the sandstone code and "400mm+"); 0240's residual is therefore non-fabric. Suite: 2353 passed, 1 skipped. New code: 0 pyright errors. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
8861dac694
commit
570df83459
7 changed files with 167 additions and 18 deletions
|
|
@ -281,11 +281,7 @@ class ElmhurstSiteNotesExtractor:
|
|||
# with the §8 Roofs / §9 Floors blocks. None when the PDF
|
||||
# omits the line (no retrofit lodged).
|
||||
ins_thickness_raw = self._local_val(lines, "Insulation Thickness")
|
||||
insulation_thickness_mm = (
|
||||
int(ins_thickness_raw.split()[0])
|
||||
if ins_thickness_raw and ins_thickness_raw.split()[0].isdigit()
|
||||
else None
|
||||
)
|
||||
insulation_thickness_mm = self._parse_thickness_mm(ins_thickness_raw)
|
||||
return WallDetails(
|
||||
wall_type=self._local_str(lines, "Type"),
|
||||
insulation=self._local_str(lines, "Insulation"),
|
||||
|
|
@ -323,11 +319,7 @@ class ElmhurstSiteNotesExtractor:
|
|||
if area <= 0:
|
||||
continue
|
||||
thickness_raw = self._local_val(lines, f"Alternative Wall {n} Thickness")
|
||||
thickness_mm = (
|
||||
int(thickness_raw.split()[0])
|
||||
if thickness_raw and thickness_raw.split()[0].isdigit()
|
||||
else None
|
||||
)
|
||||
thickness_mm = self._parse_thickness_mm(thickness_raw)
|
||||
result.append(AlternativeWall(
|
||||
area_m2=area,
|
||||
wall_type=self._local_str(lines, f"Alternative Wall {n} Type"),
|
||||
|
|
@ -356,11 +348,25 @@ class ElmhurstSiteNotesExtractor:
|
|||
lines = [l.strip() for l in main_body.splitlines() if l.strip()]
|
||||
return self._wall_details_from_lines(lines)
|
||||
|
||||
@staticmethod
|
||||
def _parse_thickness_mm(raw: Optional[str]) -> Optional[int]:
|
||||
"""Parse an Elmhurst "Insulation Thickness" cell ("100 mm",
|
||||
"400+ mm") to integer mm. The bucket-cap "400+ mm" (Table 17/18
|
||||
max tabulated row) carries a trailing "+" that a bare
|
||||
`.split()[0].isdigit()` test rejects — strip to the leading
|
||||
digits so the cap parses through to the cascade with its numeric
|
||||
value (simulated case 5: roof "400+ mm" was silently dropped →
|
||||
u_roof fell back to the age-J default 0.16 instead of the
|
||||
300mm+ value 0.11). Returns None when the cell is absent or
|
||||
carries no leading number ("As Built", "N None")."""
|
||||
if not raw:
|
||||
return None
|
||||
match = re.match(r"\d+", raw.strip())
|
||||
return int(match.group()) if match else None
|
||||
|
||||
def _roof_details_from_lines(self, lines: List[str]) -> RoofDetails:
|
||||
thickness_raw = self._local_val(lines, "Insulation Thickness")
|
||||
thickness_mm = (
|
||||
int(thickness_raw.split()[0]) if thickness_raw and thickness_raw.split()[0].isdigit() else None
|
||||
)
|
||||
thickness_mm = self._parse_thickness_mm(thickness_raw)
|
||||
insulation = self._local_str(lines, "Insulation")
|
||||
# The Summary PDF omits the "Insulation Thickness" line entirely
|
||||
# when no retrofit insulation is lodged (e.g. "Insulation: N None"
|
||||
|
|
@ -391,11 +397,7 @@ class ElmhurstSiteNotesExtractor:
|
|||
# via the per-thickness column. Mirror of the §8 roof extractor
|
||||
# at `_roof_details_from_lines`.
|
||||
thickness_raw = self._local_val(lines, "Insulation Thickness")
|
||||
thickness_mm = (
|
||||
int(thickness_raw.split()[0])
|
||||
if thickness_raw and thickness_raw.split()[0].isdigit()
|
||||
else None
|
||||
)
|
||||
thickness_mm = self._parse_thickness_mm(thickness_raw)
|
||||
return FloorDetails(
|
||||
location=self._local_str(lines, "Location"),
|
||||
floor_type=self._local_str(lines, "Type"),
|
||||
|
|
|
|||
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case5.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case5.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -2090,6 +2090,10 @@ _ELMHURST_WALL_CODE_TO_SAP10: Dict[str, int] = {
|
|||
"SG": 1, # Stone: granite or whinstone (cert 000565 Ext1) — the
|
||||
# granite-specific Elmhurst variant of "ST"; same SAP10
|
||||
# WALL_STONE_GRANITE=1 cascade entry.
|
||||
"SS": 2, # Stone: sandstone or limestone (simulated case 5 / cert
|
||||
# 0240 archetype) — SAP10 WALL_STONE_SANDSTONE=2. The
|
||||
# sandstone-specific Elmhurst variant; the API path lodges
|
||||
# the same wall as integer wall_construction=2.
|
||||
"SB": 3, # Solid brick (cohort cert lodgement)
|
||||
"SO": 3, # Solid brick (newer Elmhurst PDF variant — same SAP10
|
||||
# mapping; cert 9501 lodges "SO Solid Brick" where the
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,122 @@
|
|||
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
|
||||
"simulated case 5" worksheet — a DETACHED, SANDSTONE-walled cousin of
|
||||
golden cert 0240 (Main + Extension + room-in-roof, age band J).
|
||||
|
||||
Like the other 001431 cases, this fixture does NOT hand-build the
|
||||
EpcPropertyData: it routes the Summary PDF through
|
||||
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the SAP-result
|
||||
pin grid exercises the WHOLE extractor + mapper + calculator pipeline.
|
||||
|
||||
Purpose: prove the calculator is spec-correct for a DETACHED room-in-roof
|
||||
with one Exposed + one Party gable, validating S0380.196 (API Simplified
|
||||
Type 1 RR gables deduct from the A_RR shell) against a real worksheet.
|
||||
The worksheet prints the exact routing the cascade implements:
|
||||
|
||||
Roof room Main Gable Wall 1 15.68 U=0.35 (29a) ← Exposed → walls @ main-wall U
|
||||
Roof room Main remaining area 61.73 U=0.30 (30) ← A_RR shell − Σ gables (residual)
|
||||
External roof Main 14.52 U=0.11 (30) ← loft residual
|
||||
Roof room Main Gable Wall 2 15.68 U=0.25 (32) ← Party → party @ 0.25
|
||||
|
||||
gable area = 6.40 × 2.45 = 15.68 m² (the §3.9.1 default RR storey height).
|
||||
A_RR remaining = 12.5√(83.2/1.5) − 2×15.68 = 93.09 − 31.36 = 61.73.
|
||||
|
||||
This case surfaced two extractor/mapper gaps fixed in the same slice
|
||||
(S0380.197):
|
||||
- the sandstone wall label "SS Stone: sandstone or limestone" had no
|
||||
`_ELMHURST_WALL_CODE_TO_SAP10` entry (→ WALL_STONE_SANDSTONE=2, matching
|
||||
0240's API `wall_construction=2`);
|
||||
- the roof "Insulation Thickness 400+ mm" was silently dropped by the
|
||||
extractor's `.split()[0].isdigit()` thickness parse (the trailing "+"),
|
||||
so u_roof fell back to the age-J default 0.16 instead of 0.11
|
||||
(`_parse_thickness_mm` now strips to leading digits).
|
||||
|
||||
Cert shape: Detached house, Main + Extension 1, sandstone insulated walls,
|
||||
2 storeys + room-in-roof on the Main (floor 83.2 m², one Exposed + one
|
||||
Party gable, L=6.40 each), oil community/boiler (SAP code 901 combi route,
|
||||
control 2106), no PV, 20 low-energy lighting bulbs.
|
||||
|
||||
Source: user-simulated PDFs at `sap worksheets/golden fixture
|
||||
debugging/simulated case 5/`. The Summary is mirrored into the tracked
|
||||
`backend/documents_parser/tests/fixtures/Summary_001431_case5.pdf`.
|
||||
|
||||
Worksheet pin targets (P960-0001-001431, Block 1 — energy rating):
|
||||
- SAP rating 61 (line 258), ECF 2.7724 (line 257)
|
||||
- Total fuel cost £1586.4549 (line 255)
|
||||
- CO2 8387.6229 kg/year (line 272)
|
||||
- Space heating 12838.6489 kWh/year (Σ monthly (98))
|
||||
- Main 1 fuel 21397.7480 kWh/year (line 211)
|
||||
- Secondary fuel 0.0 (line 215)
|
||||
- Hot water fuel 6498.2518 kWh/year (line 219)
|
||||
- Lighting 381.4601 kWh/year (line 232)
|
||||
- Pumps/fans 141.0 kWh/year (line 231)
|
||||
|
||||
Per [[feedback-zero-error-strict]] + [[feedback-e2e-validation-
|
||||
philosophy]]: pins are abs=1e-4 against the worksheet PDF.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
|
||||
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
|
||||
# [4]=repo root.
|
||||
_SUMMARY_PDF: Final[Path] = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "backend" / "documents_parser" / "tests" / "fixtures"
|
||||
/ "Summary_001431_case5.pdf"
|
||||
)
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
"""Convert a Summary PDF into the per-page text format the
|
||||
ElmhurstSiteNotesExtractor expects (label\\nvalue sequences).
|
||||
|
||||
Mirror of the helper in `test_summary_pdf_mapper_chain.py` /
|
||||
`_elmhurst_worksheet_000565.py`.
|
||||
"""
|
||||
info = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
m = re.search(r"Pages:\s+(\d+)", info)
|
||||
if m is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(m.group(1))
|
||||
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout = subprocess.run(
|
||||
[
|
||||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||||
str(pdf_path), "-",
|
||||
],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||||
tokens.extend(parts)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def build_epc() -> EpcPropertyData:
|
||||
"""Route the simulated case-5 Summary through extractor + mapper.
|
||||
|
||||
No hand-built EpcPropertyData — the extractor and mapper are part of
|
||||
the test target. Exercises the S0380.196 RR-gable deduction, the
|
||||
S0380.197 sandstone-wall-label + "400+ mm" roof-thickness fixes.
|
||||
"""
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
|
@ -41,6 +41,7 @@ from tests.domain.sap10_calculator.worksheet import (
|
|||
_elmhurst_worksheet_001431_rr as _w001431_rr,
|
||||
_elmhurst_worksheet_001431_rr8 as _w001431_rr8,
|
||||
_elmhurst_worksheet_001431_6035 as _w001431_6035,
|
||||
_elmhurst_worksheet_001431_case5 as _w001431_case5,
|
||||
)
|
||||
from tests.domain.sap10_calculator.worksheet._elmhurst_fixtures import (
|
||||
ALL_FIXTURES as _ELMHURST_FIXTURES,
|
||||
|
|
@ -217,6 +218,25 @@ _FIXTURE_PINS: Final[dict[str, FixtureCascadePins]] = {
|
|||
lighting_kwh_per_yr=262.0885,
|
||||
pumps_fans_kwh_per_yr=86.0,
|
||||
),
|
||||
# Mapper-driven cohort entry — Summary_001431_case5.pdf → extractor →
|
||||
# mapper → calculator. DETACHED, SANDSTONE-walled cousin of cert 0240:
|
||||
# Main + Extension + room-in-roof (floor 83.2 m², one Exposed + one
|
||||
# Party gable L=6.40), age J, oil combi (SAP 901), no PV. Validates
|
||||
# S0380.196 (RR gable deduction) against a real worksheet — the
|
||||
# worksheet prints Gable 1 (Exposed) at (29a) U=0.35, Gable 2 (Party)
|
||||
# at (32) U=0.25, remaining area = shell − Σ gables at (30). Also pins
|
||||
# the S0380.197 sandstone "SS" wall label + "400+ mm" roof-thickness
|
||||
# extractor fixes (without the latter, roof U fell to 0.16 not 0.11).
|
||||
"001431_case5": FixtureCascadePins(
|
||||
sap_score=61, sap_score_continuous=61.3255, ecf=2.7724,
|
||||
total_fuel_cost_gbp=1586.4549, co2_kg_per_yr=8387.6229,
|
||||
space_heating_kwh_per_yr=12838.6489,
|
||||
main_heating_fuel_kwh_per_yr=21397.7480,
|
||||
secondary_heating_fuel_kwh_per_yr=0.0,
|
||||
hot_water_kwh_per_yr=6498.2518,
|
||||
lighting_kwh_per_yr=381.4601,
|
||||
pumps_fans_kwh_per_yr=141.0,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -232,6 +252,7 @@ _FIXTURE_MODULES: Final[dict[str, ModuleType]] = {
|
|||
"001431_rr": _w001431_rr,
|
||||
"001431_rr8": _w001431_rr8,
|
||||
"001431_6035": _w001431_6035,
|
||||
"001431_case5": _w001431_case5,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue