mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Slice S0380.12: parse 'Alternative wall' window-location in pre-data slice
Cert 2636-0525-2600-0401-2296's Summary §11 Windows block lodges one
alt-wall window (1.19 m², north-facing). The PDF layout for alt-wall
rows puts the "Alternative wall" string in the slot BEFORE the W×H×A
data line — not after frame_factor where regular "External wall"
rows put it. Without this fix the extractor's
`_parse_window_from_anchors` only scanned the post-frame_factor
`middle` slice for wall tokens, defaulted to "External wall" for the
alt-wall row, and the cascade allocated the 1.19 m² opening to the
main wall instead of the alt-wall — under-deducting from main and
leaving the alt-wall gross instead of net.
Fix at `elmhurst_extractor.py:865`: also scan
`lines[before_start:data_idx]` (the pre-data slice) for "wall"
tokens. Search order:
1. `middle` — first preference (normal layout for regular rows)
2. `pre_data` — alt-wall rows (cert 2636)
3. "External wall" default — no wall lodging found
Forcing function: cert 2636 walls_w_per_k moves from 20.5595 to
**20.0240 — EXACT match against worksheet (29a) Main 11.9250 + alt.1
8.0990 = 20.0240**. (Header (29a) sum is now fabric-exact; the
remaining +0.52 SAP residual on cert 2636 is in the ventilation
cascade — HTC 153.97 vs API 159.02 vs worksheet (39) avg 158.85 —
to be investigated in a follow-up slice.)
Added focused unit test
`test_summary_2636_alt_wall_window_parses_alternative_wall_location`
that pins the by-area lookup: 1.19 m² → "Alternative wall"; the
six 2.25 m² windows stay on "External wall". Guards against future
window-location parser regressions.
Pyright: 0 errors on the edited extractor + test files.
Regression suite: 685 pass + 10 fail (handover baseline 669 + 10 +
16 new GREEN tests across S0380.2..S0380.12). Cohort status:
cert Δ vs worksheet spec floor?
0380 +0.0594 ✓
0350 +0.0458 ✓
2225 +0.0441 ✓
2636 +0.5167 ✗ (fabric exact; ventilation residual)
3800 +0.0442 ✓
9285 +0.0502 ✓
9418 +2.5973 ✗ (Daikin)
Spec refs:
- Slice 102f-prep.10 (commit 24a7351f) — API-path equivalent
"Alt-wall opening allocation per window_wall_type".
- SAP 10.2 §3.7 — opening (window + door) deduction from gross
wall area, per-window allocated to the lodged wall type.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
29cfdf6461
commit
03c4ea4921
2 changed files with 37 additions and 1 deletions
|
|
@ -862,7 +862,17 @@ class ElmhurstSiteNotesExtractor:
|
|||
# Variable-order tokens between frame_factor and Manufacturer.
|
||||
middle = [lines[j].strip() for j in range(middle_start, manuf_idx)]
|
||||
glazing_gap = next((t for t in middle if "mm" in t.lower()), None)
|
||||
location = next((t for t in middle if "wall" in t.lower()), "External wall")
|
||||
# Wall-location lodging. Most rows put "External wall" in
|
||||
# `middle`; alt-wall rows (cert 2636 window-4 / cert 9418 alt-
|
||||
# wall window) put "Alternative wall" in the PRE-data slice
|
||||
# (between the previous window's end and W×H×A). Search both
|
||||
# slices so either layout resolves to the correct location.
|
||||
pre_data = [lines[j].strip() for j in range(before_start, data_idx)]
|
||||
location = (
|
||||
next((t for t in middle if "wall" in t.lower()), None)
|
||||
or next((t for t in pre_data if "wall" in t.lower()), None)
|
||||
or "External wall"
|
||||
)
|
||||
bp_inline = next((t for t in middle if t in self._BP_INLINE_TOKENS), None)
|
||||
orient_inline = next(
|
||||
(t for t in middle if t in self._ORIENTATION_TOKENS), None
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ _SUMMARY_000903_PDF = _FIXTURES / "Summary_000903.pdf"
|
|||
_SUMMARY_000901_PDF = _FIXTURES / "Summary_000901.pdf" # cert 3800
|
||||
_SUMMARY_000904_PDF = _FIXTURES / "Summary_000904.pdf" # cert 9285
|
||||
_SUMMARY_000900_PDF = _FIXTURES / "Summary_000900.pdf" # cert 2225
|
||||
_SUMMARY_000898_PDF = _FIXTURES / "Summary_000898.pdf" # cert 2636
|
||||
|
||||
# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the
|
||||
# Summary_001479.pdf fixture. Together they drive the API ≡ Summary
|
||||
|
|
@ -714,6 +715,31 @@ def test_summary_0350_full_chain_sap_within_spec_floor_of_worksheet() -> None:
|
|||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < _ASHP_COHORT_CHAIN_TOLERANCE
|
||||
|
||||
|
||||
def test_summary_2636_alt_wall_window_parses_alternative_wall_location() -> None:
|
||||
# Arrange — cert 2636-0525-2600-0401-2296's §11 Windows block lodges
|
||||
# one alt-wall window (the 1.19 m² north-facing one): the row's
|
||||
# "Alternative wall" string appears BEFORE the W×H×A line, not
|
||||
# after the frame_factor (the normal position for "External wall").
|
||||
# The extractor's `_parse_window_from_anchors` was only scanning
|
||||
# the post-frame_factor `middle` slice for wall-location tokens →
|
||||
# defaulted to "External wall" for the alt-wall row → cascade
|
||||
# allocated the window to the main wall instead of the alt-wall,
|
||||
# leaving Main external walls W/K under-deducted by ~0.54 vs
|
||||
# worksheet (29a). Fix: also scan the PRE-data slice
|
||||
# `lines[before_start:data_idx]` for wall tokens.
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000898_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
|
||||
# Act
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Assert — the 1.19 m² window is recorded with wall_type =
|
||||
# "Alternative wall"; all other windows stay on "External wall".
|
||||
by_area = {round(w.window_width, 2): w.window_wall_type for w in epc.sap_windows}
|
||||
assert by_area[1.19] == "Alternative wall"
|
||||
assert by_area[2.25] == "External wall" # main-wall windows unchanged
|
||||
|
||||
|
||||
def test_summary_2225_no_showers_lodged_resolves_to_zero_counts() -> None:
|
||||
# Arrange — cert 2225-3062-8205-2856-7204's Summary §1x Baths and
|
||||
# Showers block lodges 0 baths and ZERO showers (no shower rows at
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue