diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index fa5dadc0..55dd04d6 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -1007,7 +1007,17 @@ class ElmhurstSiteNotesExtractor: joined to the data line (no separate prefix line exists), so the only signal of window-transition is the orientation tokens rotating: orient_suffix(k) → orient_prefix(k+1). Falls through - to `next_data_idx` when neither marker is present.""" + to `next_data_idx` when neither marker is present. + + (c) A standalone wall-location line ("Alternative wall", "External + wall", "Party wall") in the gap belongs to the NEXT window's + prefix — it is that window's §11 Location cell, wrapped above its + W×H×A data row. When the next window is single-glazed its prefix + line carries no glazing-type word (branch a never fires), so + without this the "Alternative wall" line is swallowed into the + current window's suffix and the next window defaults to "External + wall" (simulated case 34: 2 of 4 single-glazed alt-wall windows + mis-allocated → wrong corridor-wall net area).""" scan_start = manuf_idx + 4 seen_orient = False for j in range(scan_start, next_data_idx): @@ -1015,6 +1025,8 @@ class ElmhurstSiteNotesExtractor: first_word = stripped.split(" ", 1)[0] if first_word in self._GLAZING_TYPE_PREFIX_WORDS: return j + if "wall" in stripped.lower(): + return j if stripped in self._ORIENTATION_TOKENS: if seen_orient: return j diff --git a/backend/documents_parser/tests/fixtures/Summary_case34_storage_flat.pdf b/backend/documents_parser/tests/fixtures/Summary_case34_storage_flat.pdf new file mode 100644 index 00000000..33cf586d Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_case34_storage_flat.pdf differ diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index fbc1c6f9..a0963de3 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -85,6 +85,11 @@ _SUMMARY_000904_PDF = _FIXTURES / "Summary_000904.pdf" # cert 9285 _SUMMARY_000900_PDF = _FIXTURES / "Summary_000900.pdf" # cert 2225 _SUMMARY_000898_PDF = _FIXTURES / "Summary_000898.pdf" # cert 2636 _SUMMARY_000902_PDF = _FIXTURES / "Summary_000902.pdf" # cert 9418 +# simulated case 34 (cert 001431 reconfigured as a slimline electric-storage +# flat with an unheated corridor / sheltered alternative wall + 4 alt-wall +# windows). Regression net for the flat-roof, sheltered-wall, and §11 +# alt-wall-window-allocation fixes. +_SUMMARY_CASE34_PDF = _FIXTURES / "Summary_case34_storage_flat.pdf" _SUMMARY_000889_PDF = _FIXTURES / "Summary_000889.pdf" # cert 2536 (Normal cylinder) _SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cylinder) _SUMMARY_000910_PDF = _FIXTURES / "Summary_000910.pdf" # cert 0036 (Flat, party wall U=0) @@ -1547,6 +1552,34 @@ def test_summary_mapper_raises_on_unmapped_cylinder_insulation_label() -> None: assert excinfo.value.value == "Polyester wool" +def test_case34_alt_wall_windows_all_allocated_to_alternative_wall() -> None: + # Arrange — simulated case 34 lodges 4 windows on "Alternative wall 1" + # (0.70 + 1.75 + 1.18 + 1.00 = 4.63 m²) and 6 on the external wall. The + # §11 layout interleaves the wrapped "Alternative wall / 1" Location cell + # around each window's data row; for single-glazed alt windows the + # location line carries no glazing-type word, so the partition swallowed + # it into the previous window's suffix and the window defaulted to + # "External wall" — mis-deducting its opening from the wrong wall. + from domain.sap10_calculator.worksheet.heat_transmission import ( + _window_on_alt_wall, # pyright: ignore[reportPrivateUsage] + ) + pages = _summary_pdf_to_textract_style_pages(_SUMMARY_CASE34_PDF) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Act + alt_area = sum( + round(float(w.window_width) * float(w.window_height), 2) + for w in (epc.sap_windows or []) + if _window_on_alt_wall(w) + ) + alt_count = sum(1 for w in (epc.sap_windows or []) if _window_on_alt_wall(w)) + + # Assert — all 4 alt-wall windows recovered (worksheet alt openings 4.63). + assert alt_count == 4 + assert abs(alt_area - 4.63) <= 0.01 + + def test_map_elmhurst_alternative_wall_carries_sheltered_flag() -> None: # Arrange — Elmhurst Summary §7 lodges "Alternative Wall N Sheltered # Wall: Yes" for a sub-area adjacent to an unheated buffer (e.g. a flat's