mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
S0380.199: site-notes "Roof of Room" windows → roof windows (cross-mapper parity with S0380.198)
The Elmhurst extractor crashed parsing simulated-case-6's room-in-roof
window rows: the §11 "Location" cell "Roof of Room in Roof" wraps across
the layout prefix/suffix blocks and leaked into the glazing-type phrase
("Double between 2002 Roof of Room and 2021 in Roof" → UnmappedElmhurst-
Label). Fix (`_parse_window_from_anchors`): detect the roof-of-room
location tokens, strip them from the before/after blocks so the glazing
phrase reconstructs cleanly, and set location="Roof of Room".
Mapper: `_is_elmhurst_roof_window` gains a "Roof of Room" location branch
(highest-confidence rooflight signal, above the BP-roof-type / U>3.0
gates); `_ELMHURST_ROOF_WINDOW_U_BY_GLAZING` gains "Double between 2002
and 2021" → 2.30 (case 6 lodges the already-inclined roof-window U, so
the +0.30 inclination adjustment must not double-apply).
This is the site-notes mirror of S0380.198 (API window_wall_type=4):
both paths now route room-in-roof rooflights to (27a) at the inclined U.
Validated against the case-6 P960 worksheet at abs=1e-4:
(27) Windows = 22.7408 (cascade 22.7407)
(27a) Roof Windows = 13.0375 (cascade 13.0375, EXACT)
(31) ext area = 336.13
Case 6 is pinned only on the §3 window line refs (new standalone test,
not added to the section-pin `_FIXTURES`) because its DUAL main heating
(51% rads + 49% underfloor, oil) makes the §10/§12 per-system lines
non-comparable to SapResult's aggregated fields — documented in the
fixture module. Summary mirrored to Summary_001431_case6.pdf.
Suite: 2355 passed, 1 skipped. New code: 0 pyright errors.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
999eced9fb
commit
2b1f90a7de
7 changed files with 167 additions and 0 deletions
|
|
@ -801,6 +801,12 @@ class ElmhurstSiteNotesExtractor:
|
|||
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
|
||||
})
|
||||
_BP_INLINE_TOKENS = frozenset({"Main"}) # "Extension" only appears as suffix
|
||||
# A room-in-roof window (rooflight) lodges its §11 "Location" cell as
|
||||
# "Roof of Room in Roof", which the layout preprocessor wraps onto two
|
||||
# tokens ("Roof of Room" in the prefix block, "in Roof" in the suffix).
|
||||
# Detected so the window routes to a roof window (worksheet (27a))
|
||||
# and the tokens don't leak into the glazing-type phrase.
|
||||
_ROOF_OF_ROOM_LOCATION_TOKENS = frozenset({"Roof of Room", "in Roof"})
|
||||
# The Elmhurst Summary PDF lodges each window's glazing-type as a
|
||||
# capitalised phrase like "Double between 2002" / "Double with unknown"
|
||||
# / "Single" / "Triple" / "Secondary". The first token of that phrase
|
||||
|
|
@ -1020,6 +1026,18 @@ class ElmhurstSiteNotesExtractor:
|
|||
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
|
||||
after = [lines[j].strip() for j in range(manuf_idx + 4, after_end) if lines[j].strip()]
|
||||
|
||||
# Room-in-roof windows lodge their location as "Roof of Room in
|
||||
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull
|
||||
# those tokens out so they don't contaminate the glazing-type
|
||||
# phrase, and override the wall-keyed `location` with the roof-of-
|
||||
# room marker the roof-window classifier keys on.
|
||||
if any(
|
||||
t in self._ROOF_OF_ROOM_LOCATION_TOKENS for t in (*before, *after)
|
||||
):
|
||||
location = "Roof of Room"
|
||||
before = [t for t in before if t not in self._ROOF_OF_ROOM_LOCATION_TOKENS]
|
||||
after = [t for t in after if t not in self._ROOF_OF_ROOM_LOCATION_TOKENS]
|
||||
|
||||
glazing_type, building_part, orientation = self._compose_window_descriptors(
|
||||
before=before,
|
||||
after=after,
|
||||
|
|
|
|||
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case6.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_001431_case6.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -3837,6 +3837,11 @@ def _is_elmhurst_roof_window(
|
|||
"""
|
||||
if w.glazing_type.startswith("Single"):
|
||||
return False
|
||||
# Explicit "Roof of Room" location lodging (simulated case 6): the
|
||||
# surveyor placed the window on the room-in-roof, so it's a rooflight
|
||||
# regardless of BP roof type or U-value.
|
||||
if "roof of room" in (w.location or "").lower():
|
||||
return True
|
||||
bp_roof_type = _elmhurst_bp_roof_type(w, survey)
|
||||
if bp_roof_type is not None and bp_roof_type.startswith(
|
||||
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
|
||||
|
|
@ -3852,6 +3857,12 @@ def _is_elmhurst_roof_window(
|
|||
# worksheet's (27a) line. The cohort exercises only "Double pre 2002".
|
||||
_ELMHURST_ROOF_WINDOW_U_BY_GLAZING: Dict[str, float] = {
|
||||
"Double pre 2002": 3.4,
|
||||
# Simulated case 6 rooflights: the Summary lodges the already-inclined
|
||||
# roof-window U=2.30 for DG-2002-2021 glazing (vs 2.00 vertical for the
|
||||
# same glazing on a wall) — the worksheet bills it on (27a) at U_eff
|
||||
# 2.1062 (= 2.30 with the §3.2 R=0.04 curtain transform). Keyed here so
|
||||
# the inclination adjustment isn't double-applied.
|
||||
"Double between 2002 and 2021": 2.30,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,108 @@
|
|||
"""Mapper-driven cascade fixture for the Elmhurst P960-0001-001431
|
||||
"simulated case 6" worksheet — a DETACHED, dual-oil cousin of golden
|
||||
cert 0240 carrying ROOM-IN-ROOF WINDOWS (rooflights).
|
||||
|
||||
Routes the Summary PDF through ElmhurstSiteNotesExtractor +
|
||||
from_elmhurst_site_notes (no hand-built EpcPropertyData) so the pin
|
||||
exercises the whole extractor + mapper + calculator pipeline.
|
||||
|
||||
Purpose: validate S0380.198/199 ROOF-WINDOW handling against a real
|
||||
worksheet. Case 6 lodges 6 windows on the room-in-roof ("Roof of Room"
|
||||
location); the worksheet bills them on line (27a) Roof Windows at
|
||||
U_eff 2.1062 (= inclined 2.30 with the §3.2 R=0.04 curtain transform),
|
||||
NOT on (27) as vertical glazing. This is the site-notes mirror of
|
||||
0240's API `window_wall_type=4` roof windows (S0380.198).
|
||||
|
||||
This cert surfaced two site-notes gaps fixed in S0380.199:
|
||||
- the extractor mangled the "Roof of Room in Roof" window-location cell
|
||||
into the glazing-type phrase ("Double between 2002 Roof of Room and
|
||||
2021 in Roof" → UnmappedElmhurstLabel); `_parse_window_from_anchors`
|
||||
now detects + strips those tokens and marks the window roof-of-room;
|
||||
- `_is_elmhurst_roof_window` gained a "Roof of Room" location branch,
|
||||
and `_ELMHURST_ROOF_WINDOW_U_BY_GLAZING` an entry for the
|
||||
already-inclined "Double between 2002 and 2021" → 2.30 (so the
|
||||
inclination adjustment isn't double-applied).
|
||||
|
||||
SCOPE: this fixture pins only the §3 heat-transmission WINDOW line refs
|
||||
(27)/(27a)/(31) — NOT the full SapResult. Case 6 has a DUAL main heating
|
||||
system (51% radiators + 49% underfloor, oil), and `SapResult`'s
|
||||
`main_heating_fuel_kwh_per_yr` / `pumps_fans_kwh_per_yr` aggregate the
|
||||
two systems differently from the worksheet's per-system (211)/(231)
|
||||
lines, so a full SapResult pin isn't apples-to-apples. Heating is also
|
||||
SAP code 127 here vs 0240's code 130 condensing combi — so case 6 pins
|
||||
to its OWN worksheet, not 0240's register.
|
||||
|
||||
Source: user-simulated PDFs at `sap worksheets/golden fixture
|
||||
debugging/simulated case 6/`. Summary mirrored into the tracked
|
||||
`backend/documents_parser/tests/fixtures/Summary_001431_case6.pdf`.
|
||||
|
||||
Worksheet §3 window pin targets (P960-0001-001431, Block 1):
|
||||
- (27) Windows = 19.3704 (Main) + 3.3704 (Ext1) = 22.7408 W/K
|
||||
- (27a) Roof Windows = 6.19 m² × 2.1062 = 13.0375 W/K (the 6 rooflights)
|
||||
- (31) Total external element area = 336.13 m²
|
||||
|
||||
Per [[feedback-zero-error-strict]]: pins are abs=1e-4 against the PDF.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
|
||||
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
|
||||
# [4]=repo root.
|
||||
_SUMMARY_PDF: Final[Path] = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "backend" / "documents_parser" / "tests" / "fixtures"
|
||||
/ "Summary_001431_case6.pdf"
|
||||
)
|
||||
|
||||
# Worksheet §3 window line refs (Block 1 — energy rating).
|
||||
LINE_27_WINDOWS_W_PER_K: Final[float] = 22.7408
|
||||
LINE_27A_ROOF_WINDOWS_W_PER_K: Final[float] = 13.0375
|
||||
LINE_31_TOTAL_EXTERNAL_AREA_M2: Final[float] = 336.13
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
"""Convert a Summary PDF into the per-page text format the
|
||||
ElmhurstSiteNotesExtractor expects (mirror of the case-5 helper)."""
|
||||
info = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
m = re.search(r"Pages:\s+(\d+)", info)
|
||||
if m is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(m.group(1))
|
||||
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout = subprocess.run(
|
||||
[
|
||||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||||
str(pdf_path), "-",
|
||||
],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||||
tokens.extend(parts)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def build_epc() -> EpcPropertyData:
|
||||
"""Route the simulated case-6 Summary through extractor + mapper."""
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
|
@ -42,6 +42,7 @@ from tests.domain.sap10_calculator.worksheet import (
|
|||
_elmhurst_worksheet_000487 as _w000487,
|
||||
_elmhurst_worksheet_000490 as _w000490,
|
||||
_elmhurst_worksheet_000516 as _w000516,
|
||||
_elmhurst_worksheet_001431_case6 as _w001431_case6,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -248,6 +249,35 @@ def test_section_3_line_refs_match_pdf(
|
|||
_pin(actual, expected, f"§3 {fixture_attr} {fixture_name}")
|
||||
|
||||
|
||||
def test_section_3_roof_windows_case6_match_pdf() -> None:
|
||||
"""§3 (27a) roof-window pin for simulated case 6 — the 6 room-in-roof
|
||||
rooflights (window_wall_type=4 on the API side / "Roof of Room"
|
||||
location on the site-notes side) must bill on (27a) at U_eff 2.1062,
|
||||
not on (27) as vertical glazing. Validates the S0380.198/199 roof-
|
||||
window routing against a real worksheet. Case 6 is pinned only on the
|
||||
§3 window line refs (not added to `_FIXTURES`) because its dual main
|
||||
heating system makes the §10/§12 per-system lines non-comparable —
|
||||
see the fixture module docstring."""
|
||||
# Arrange
|
||||
epc = _w001431_case6.build_epc()
|
||||
|
||||
# Act
|
||||
ht = heat_transmission_section_from_cert(epc)
|
||||
|
||||
# Assert
|
||||
_pin(ht.windows_w_per_k, _w001431_case6.LINE_27_WINDOWS_W_PER_K, "§3 (27) case6")
|
||||
_pin(
|
||||
ht.roof_windows_w_per_k,
|
||||
_w001431_case6.LINE_27A_ROOF_WINDOWS_W_PER_K,
|
||||
"§3 (27a) case6",
|
||||
)
|
||||
_pin(
|
||||
ht.total_external_element_area_m2,
|
||||
_w001431_case6.LINE_31_TOTAL_EXTERNAL_AREA_M2,
|
||||
"§3 (31) case6",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# §4 Water heating — LINE_42..LINE_65 scalar + monthly tuples
|
||||
# ============================================================================
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue