mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
fix(elmhurst-extractor): allocate single-glazed alt-wall windows to the alternative wall
The §11 layout parser keys a window's wall Location on the glazing-prefix /
orientation tokens around its data row. An alt-wall window lodges its
"Alternative wall 1" Location wrapped across the lines bracketing the W×H×A
row. For a DOUBLE-glazed alt window the prefix line also carries the glazing
phrase ("Double between 2002 Alternative wall"), so the partition breaks
there and the location survives into the window's pre-data slice. For a
SINGLE-glazed alt window the "Alternative wall" line stands alone with no
glazing-type word, so _partition_after_manuf scanned past it and swallowed
it into the PREVIOUS window's suffix — the window then defaulted to
"External wall" and its opening deducted from the wrong wall.
Fix: treat a standalone wall-location line ("Alternative wall" / "External
wall" / "Party wall") as a window boundary in _partition_after_manuf, so it
attaches to the following window's prefix. Surfaced by simulated case 34
(cert 001431 electric-storage flat): 2 of 4 single-glazed alt-wall windows
were mis-allocated, splitting 2.75/10.78 m² instead of the worksheet's
4.63/8.90 corridor/external opening areas.
Elmhurst-extractor only; API gauge unaffected. Regression gate green (3
pre-existing fails unrelated); worksheet harness 47/47 unchanged. Case 34's
alt-wall opening area now matches the worksheet; the corridor wall net area
is correct (the cert's residual is now isolated to the unheated-corridor
door, a separate slice).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
48b36d3d7e
commit
06989d6b0f
3 changed files with 46 additions and 1 deletions
|
|
@ -1007,7 +1007,17 @@ class ElmhurstSiteNotesExtractor:
|
|||
joined to the data line (no separate prefix line exists), so
|
||||
the only signal of window-transition is the orientation tokens
|
||||
rotating: orient_suffix(k) → orient_prefix(k+1). Falls through
|
||||
to `next_data_idx` when neither marker is present."""
|
||||
to `next_data_idx` when neither marker is present.
|
||||
|
||||
(c) A standalone wall-location line ("Alternative wall", "External
|
||||
wall", "Party wall") in the gap belongs to the NEXT window's
|
||||
prefix — it is that window's §11 Location cell, wrapped above its
|
||||
W×H×A data row. When the next window is single-glazed its prefix
|
||||
line carries no glazing-type word (branch a never fires), so
|
||||
without this the "Alternative wall" line is swallowed into the
|
||||
current window's suffix and the next window defaults to "External
|
||||
wall" (simulated case 34: 2 of 4 single-glazed alt-wall windows
|
||||
mis-allocated → wrong corridor-wall net area)."""
|
||||
scan_start = manuf_idx + 4
|
||||
seen_orient = False
|
||||
for j in range(scan_start, next_data_idx):
|
||||
|
|
@ -1015,6 +1025,8 @@ class ElmhurstSiteNotesExtractor:
|
|||
first_word = stripped.split(" ", 1)[0]
|
||||
if first_word in self._GLAZING_TYPE_PREFIX_WORDS:
|
||||
return j
|
||||
if "wall" in stripped.lower():
|
||||
return j
|
||||
if stripped in self._ORIENTATION_TOKENS:
|
||||
if seen_orient:
|
||||
return j
|
||||
|
|
|
|||
BIN
backend/documents_parser/tests/fixtures/Summary_case34_storage_flat.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_case34_storage_flat.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -85,6 +85,11 @@ _SUMMARY_000904_PDF = _FIXTURES / "Summary_000904.pdf" # cert 9285
|
|||
_SUMMARY_000900_PDF = _FIXTURES / "Summary_000900.pdf" # cert 2225
|
||||
_SUMMARY_000898_PDF = _FIXTURES / "Summary_000898.pdf" # cert 2636
|
||||
_SUMMARY_000902_PDF = _FIXTURES / "Summary_000902.pdf" # cert 9418
|
||||
# simulated case 34 (cert 001431 reconfigured as a slimline electric-storage
|
||||
# flat with an unheated corridor / sheltered alternative wall + 4 alt-wall
|
||||
# windows). Regression net for the flat-roof, sheltered-wall, and §11
|
||||
# alt-wall-window-allocation fixes.
|
||||
_SUMMARY_CASE34_PDF = _FIXTURES / "Summary_case34_storage_flat.pdf"
|
||||
_SUMMARY_000889_PDF = _FIXTURES / "Summary_000889.pdf" # cert 2536 (Normal cylinder)
|
||||
_SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cylinder)
|
||||
_SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0)
|
||||
|
|
@ -1547,6 +1552,34 @@ def test_summary_mapper_raises_on_unmapped_cylinder_insulation_label() -> None:
|
|||
assert excinfo.value.value == "Polyester wool"
|
||||
|
||||
|
||||
def test_case34_alt_wall_windows_all_allocated_to_alternative_wall() -> None:
|
||||
# Arrange — simulated case 34 lodges 4 windows on "Alternative wall 1"
|
||||
# (0.70 + 1.75 + 1.18 + 1.00 = 4.63 m²) and 6 on the external wall. The
|
||||
# §11 layout interleaves the wrapped "Alternative wall / 1" Location cell
|
||||
# around each window's data row; for single-glazed alt windows the
|
||||
# location line carries no glazing-type word, so the partition swallowed
|
||||
# it into the previous window's suffix and the window defaulted to
|
||||
# "External wall" — mis-deducting its opening from the wrong wall.
|
||||
from domain.sap10_calculator.worksheet.heat_transmission import (
|
||||
_window_on_alt_wall, # pyright: ignore[reportPrivateUsage]
|
||||
)
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_CASE34_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Act
|
||||
alt_area = sum(
|
||||
round(float(w.window_width) * float(w.window_height), 2)
|
||||
for w in (epc.sap_windows or [])
|
||||
if _window_on_alt_wall(w)
|
||||
)
|
||||
alt_count = sum(1 for w in (epc.sap_windows or []) if _window_on_alt_wall(w))
|
||||
|
||||
# Assert — all 4 alt-wall windows recovered (worksheet alt openings 4.63).
|
||||
assert alt_count == 4
|
||||
assert abs(alt_area - 4.63) <= 0.01
|
||||
|
||||
|
||||
def test_map_elmhurst_alternative_wall_carries_sheltered_flag() -> None:
|
||||
# Arrange — Elmhurst Summary §7 lodges "Alternative Wall N Sheltered
|
||||
# Wall: Yes" for a sub-area adjacent to an unheated buffer (e.g. a flat's
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue