mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fix(extractor): capture all 17 openable §11 windows on cert 001431
The Modelling glazing overlay's draught-proofing recompute (RdSAP 10 §8.1 — a count over openable windows + doors) needs every openable window captured with its draught_proofed flag. cert 001431's §11 lodges 17 windows but only 14 surfaced, via two distinct gaps: 1. Extractor (_extract_windows_from_layout): the one "Double glazing, known data" row whose §11 Data-Source cell is "BFRC data" was rejected — it is laid out as a standalone keyword line with the U-value on the next line and lodges no Frame Type/Factor/Gap cells, so it never matched the joined "<source> <U>" Manufacturer-line shape. Now anchored by a standalone data-source form, with the RdSAP 10 §3.7 default frame factor (0.7) for the absent frame cell. 2. Mapper (_is_elmhurst_roof_window): the two "Double pre 2002" rows (U 3.1 / 3.4 > 3.0) were reclassified as roof windows by the U-value backstop even though both are lodged on an "External wall". A window lodged on a wall is vertical by definition; guard the U-value backstop so it only fires when location/BP give no roof signal. The backstop's only pinned cert (000516 W6) hand-builds its sap_roof_windows and so is unaffected. With both closed: 17 sap_windows, 0 misrouted to sap_roof_windows, 14 draught-proofed — reconstructing Elmhurst's lodged 84% (16/19 = (14 windows + 2 doors) / (17 windows + 2 doors)). Full calculator + modelling + orchestration suites green (1885 pass); the 2 glazing draught-proofing xfails remain (the overlay recompute is the glazing agent's front). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
f7863f986d
commit
f68cea27c9
3 changed files with 125 additions and 17 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import re
|
||||
from datetime import date, datetime
|
||||
from typing import List, Optional
|
||||
from typing import Final, List, Optional
|
||||
|
||||
from datatypes.epc.surveys.elmhurst_site_notes import (
|
||||
AlternativeWall,
|
||||
|
|
@ -811,6 +811,19 @@ class ElmhurstSiteNotesExtractor:
|
|||
r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
|
||||
)
|
||||
_MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
|
||||
# "Known data" rows (BFRC / SAP Table) lodge the §11 Data-Source cell on
|
||||
# its own layout line with the U-value following on the next line — and
|
||||
# carry no Frame Type / Frame Factor / Glazing Gap cells. The joined
|
||||
# "<source> <U>" `_MANUFACTURER_RE` shape never matches them, so they are
|
||||
# anchored by this standalone form instead (cert 001431 §11 has one
|
||||
# "BFRC data" window). "Manufacturer"/"Default" are kept here only for
|
||||
# symmetry; in practice they always join with the U-value above.
|
||||
_STANDALONE_DATA_SOURCE_RE = re.compile(
|
||||
r"^(BFRC data|BFRC|SAP Table|Assessor|Manufacturer|Default)$"
|
||||
)
|
||||
# RdSAP 10 §3.7 default window frame factor, used for "known data" rows
|
||||
# that lodge U and g directly and omit the frame-factor cell.
|
||||
_DEFAULT_FRAME_FACTOR: Final[float] = 0.7
|
||||
_ORIENTATION_TOKENS = frozenset({
|
||||
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
|
||||
})
|
||||
|
|
@ -900,7 +913,10 @@ class ElmhurstSiteNotesExtractor:
|
|||
|
||||
def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
|
||||
for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
|
||||
if self._MANUFACTURER_RE.match(lines[j].strip()):
|
||||
stripped = lines[j].strip()
|
||||
if self._MANUFACTURER_RE.match(stripped) or (
|
||||
self._STANDALONE_DATA_SOURCE_RE.match(stripped)
|
||||
):
|
||||
return j
|
||||
return None
|
||||
|
||||
|
|
@ -985,6 +1001,20 @@ class ElmhurstSiteNotesExtractor:
|
|||
# would-be glazing-prefix scan.
|
||||
inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
|
||||
|
||||
# The data-source line is either the joined "Manufacturer 4.80" shape
|
||||
# (source keyword + U on one line) or a sparse standalone "BFRC data"
|
||||
# / "SAP Table" shape (keyword alone, U on the next line, and no frame
|
||||
# cells lodged). Resolve which up front: a sparse row has no frame
|
||||
# type/factor to parse.
|
||||
data_source_line = lines[manuf_idx].strip()
|
||||
joined_match = self._MANUFACTURER_RE.match(data_source_line)
|
||||
standalone_match = (
|
||||
None if joined_match is not None
|
||||
else self._STANDALONE_DATA_SOURCE_RE.match(data_source_line)
|
||||
)
|
||||
if joined_match is None and standalone_match is None:
|
||||
return None
|
||||
|
||||
# frame_type and frame_factor immediately follow the data line.
|
||||
# Layout-style cell joining sometimes collapses them onto a
|
||||
# single "Wood 0.70" line; treat both shapes uniformly so the
|
||||
|
|
@ -992,9 +1022,15 @@ class ElmhurstSiteNotesExtractor:
|
|||
# field (glazing_gap / bp / location / orient).
|
||||
if data_idx + 1 >= len(lines):
|
||||
return None
|
||||
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
||||
lines, data_idx
|
||||
)
|
||||
if standalone_match is not None:
|
||||
# Sparse "known data" row: no frame type/factor/glazing-gap cells;
|
||||
# everything between W×H×A and the data-source is location/orient.
|
||||
frame_type, frame_factor = None, self._DEFAULT_FRAME_FACTOR
|
||||
middle_start = data_idx + 1
|
||||
else:
|
||||
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
||||
lines, data_idx
|
||||
)
|
||||
if frame_factor is None or not 0.0 < frame_factor <= 1.0:
|
||||
return None
|
||||
|
||||
|
|
@ -1017,28 +1053,40 @@ class ElmhurstSiteNotesExtractor:
|
|||
(t for t in middle if t in self._ORIENTATION_TOKENS), None
|
||||
)
|
||||
|
||||
# Manufacturer line carries data_source + u_value.
|
||||
manuf_match = self._MANUFACTURER_RE.match(lines[manuf_idx].strip())
|
||||
if manuf_match is None:
|
||||
return None
|
||||
data_source = manuf_match.group(1)
|
||||
u_value = float(manuf_match.group(2))
|
||||
# Data-source line carries the source keyword and U-value: joined on
|
||||
# one line ("Manufacturer 4.80") or, for sparse rows, the keyword alone
|
||||
# with the U-value on the next line ("BFRC data" / "1.00"). `post_idx`
|
||||
# is where g_value / draught / shutters begin in either layout.
|
||||
if joined_match is not None:
|
||||
data_source = joined_match.group(1)
|
||||
u_value = float(joined_match.group(2))
|
||||
post_idx = manuf_idx + 1
|
||||
else:
|
||||
assert standalone_match is not None
|
||||
data_source = standalone_match.group(1)
|
||||
if manuf_idx + 1 >= len(lines):
|
||||
return None
|
||||
try:
|
||||
u_value = float(lines[manuf_idx + 1].strip())
|
||||
except ValueError:
|
||||
return None
|
||||
post_idx = manuf_idx + 2
|
||||
|
||||
# Post-manufacturer: g_value, draught, shutters.
|
||||
if manuf_idx + 3 >= len(lines):
|
||||
# Post-data-source: g_value, draught, shutters.
|
||||
if post_idx + 2 >= len(lines):
|
||||
return None
|
||||
try:
|
||||
g_value = float(lines[manuf_idx + 1].strip())
|
||||
g_value = float(lines[post_idx].strip())
|
||||
except ValueError:
|
||||
return None
|
||||
draught_proofed = lines[manuf_idx + 2].strip().lower() == "yes"
|
||||
permanent_shutters = lines[manuf_idx + 3].strip()
|
||||
draught_proofed = lines[post_idx + 1].strip().lower() == "yes"
|
||||
permanent_shutters = lines[post_idx + 2].strip()
|
||||
|
||||
# Prefix / suffix tokens (variable count) carry the
|
||||
# glazing-type, building-part, and orientation strings split by
|
||||
# the layout preprocessor.
|
||||
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
|
||||
after = [lines[j].strip() for j in range(manuf_idx + 4, after_end) if lines[j].strip()]
|
||||
after = [lines[j].strip() for j in range(post_idx + 3, after_end) if lines[j].strip()]
|
||||
|
||||
# Room-in-roof windows lodge their location as "Roof of Room in
|
||||
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull
|
||||
|
|
|
|||
|
|
@ -4116,6 +4116,14 @@ def _is_elmhurst_roof_window(
|
|||
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
|
||||
):
|
||||
return True
|
||||
# A window lodged on a wall is vertical by definition. The U-value
|
||||
# backstop below only catches skylights whose location/BP gives no
|
||||
# roof signal; without this guard a high-U *wall* window (e.g. an old
|
||||
# "Double pre 2002" unit at U 3.1 / 3.4) is mis-routed to the roof-
|
||||
# window list on U-value alone — cert 001431 §11 lodges two such
|
||||
# External-wall windows that must remain vertical `sap_windows`.
|
||||
if "wall" in (w.location or "").lower():
|
||||
return False
|
||||
return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD
|
||||
|
||||
|
||||
|
|
|
|||
52
tests/domain/modelling/test_window_extraction_001431.py
Normal file
52
tests/domain/modelling/test_window_extraction_001431.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""Window-extraction completeness pin for cert 001431.
|
||||
|
||||
The Modelling glazing overlay's draught-proofing recompute (RdSAP 10 §8.1 — a
|
||||
count over openable windows + doors) needs every openable §11 window captured
|
||||
with its `draught_proofed` flag. The Elmhurst Summary §11 block lodges 17
|
||||
openable windows; two extraction gaps previously surfaced only 14:
|
||||
|
||||
1. The extractor rejected the one "Double glazing, known data" row whose
|
||||
data-source cell is "BFRC data" (laid out as its own line, with no frame
|
||||
factor) — it does not fit the `<data-source> <U>` Manufacturer-line shape.
|
||||
2. The mapper's `_is_elmhurst_roof_window` reclassified the two "Double pre
|
||||
2002" rows (U 3.1 / 3.4 > 3.0) as roof windows, even though both are
|
||||
lodged on an "External wall" — a false positive of the U-value backstop.
|
||||
|
||||
With both closed, all 17 windows are `sap_windows` (none mis-routed to
|
||||
`sap_roof_windows`), and 14 carry `draught_proofed=True` — reconstructing
|
||||
Elmhurst's lodged 84% draught-proofing (16/19 = (14 windows + 2 doors) /
|
||||
(17 windows + 2 doors)).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from tests.domain.modelling._elmhurst_recommendation import (
|
||||
parse_recommendation_summary,
|
||||
)
|
||||
|
||||
|
||||
def test_all_17_openable_windows_captured_on_001431() -> None:
|
||||
# Arrange / Act
|
||||
epc: EpcPropertyData = parse_recommendation_summary(
|
||||
"double_glazing_001431_before.pdf"
|
||||
)
|
||||
|
||||
# Assert — every openable §11 window is captured as a vertical window;
|
||||
# none of the wall-lodged rows leak into the roof-window list.
|
||||
assert len(epc.sap_windows) == 17
|
||||
assert not epc.sap_roof_windows # None or empty — no wall window misrouted
|
||||
|
||||
|
||||
def test_draughtproofing_count_reconstructs_lodged_84_percent() -> None:
|
||||
# Arrange / Act
|
||||
epc: EpcPropertyData = parse_recommendation_summary(
|
||||
"double_glazing_001431_before.pdf"
|
||||
)
|
||||
|
||||
# Assert — 14 of the 17 openable windows are draught-proofed, the numerator
|
||||
# behind Elmhurst's lodged 84% (with the 2 lodged draught-proofed doors).
|
||||
draughtproofed: int = sum(
|
||||
1 for window in epc.sap_windows if window.draught_proofed
|
||||
)
|
||||
assert draughtproofed == 14
|
||||
Loading…
Add table
Reference in a new issue