mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fix(extractor): capture all 17 openable §11 windows on cert 001431
The Modelling glazing overlay's draught-proofing recompute (RdSAP 10 §8.1 — a count over openable windows + doors) needs every openable window captured with its draught_proofed flag. cert 001431's §11 lodges 17 windows but only 14 surfaced, via two distinct gaps: 1. Extractor (_extract_windows_from_layout): the one "Double glazing, known data" row whose §11 Data-Source cell is "BFRC data" was rejected — it is laid out as a standalone keyword line with the U-value on the next line and lodges no Frame Type/Factor/Gap cells, so it never matched the joined "<source> <U>" Manufacturer-line shape. Now anchored by a standalone data-source form, with the RdSAP 10 §3.7 default frame factor (0.7) for the absent frame cell. 2. Mapper (_is_elmhurst_roof_window): the two "Double pre 2002" rows (U 3.1 / 3.4 > 3.0) were reclassified as roof windows by the U-value backstop even though both are lodged on an "External wall". A window lodged on a wall is vertical by definition; guard the U-value backstop so it only fires when location/BP give no roof signal. The backstop's only pinned cert (000516 W6) hand-builds its sap_roof_windows and so is unaffected. With both closed: 17 sap_windows, 0 misrouted to sap_roof_windows, 14 draught-proofed — reconstructing Elmhurst's lodged 84% (16/19 = (14 windows + 2 doors) / (17 windows + 2 doors)). Full calculator + modelling + orchestration suites green (1885 pass); the 2 glazing draught-proofing xfails remain (the overlay recompute is the glazing agent's front). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
f7863f986d
commit
f68cea27c9
3 changed files with 125 additions and 17 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
import re
|
import re
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from typing import List, Optional
|
from typing import Final, List, Optional
|
||||||
|
|
||||||
from datatypes.epc.surveys.elmhurst_site_notes import (
|
from datatypes.epc.surveys.elmhurst_site_notes import (
|
||||||
AlternativeWall,
|
AlternativeWall,
|
||||||
|
|
@ -811,6 +811,19 @@ class ElmhurstSiteNotesExtractor:
|
||||||
r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
|
r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
|
||||||
)
|
)
|
||||||
_MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
|
_MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
|
||||||
|
# "Known data" rows (BFRC / SAP Table) lodge the §11 Data-Source cell on
|
||||||
|
# its own layout line with the U-value following on the next line — and
|
||||||
|
# carry no Frame Type / Frame Factor / Glazing Gap cells. The joined
|
||||||
|
# "<source> <U>" `_MANUFACTURER_RE` shape never matches them, so they are
|
||||||
|
# anchored by this standalone form instead (cert 001431 §11 has one
|
||||||
|
# "BFRC data" window). "Manufacturer"/"Default" are kept here only for
|
||||||
|
# symmetry; in practice they always join with the U-value above.
|
||||||
|
_STANDALONE_DATA_SOURCE_RE = re.compile(
|
||||||
|
r"^(BFRC data|BFRC|SAP Table|Assessor|Manufacturer|Default)$"
|
||||||
|
)
|
||||||
|
# RdSAP 10 §3.7 default window frame factor, used for "known data" rows
|
||||||
|
# that lodge U and g directly and omit the frame-factor cell.
|
||||||
|
_DEFAULT_FRAME_FACTOR: Final[float] = 0.7
|
||||||
_ORIENTATION_TOKENS = frozenset({
|
_ORIENTATION_TOKENS = frozenset({
|
||||||
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
|
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
|
||||||
})
|
})
|
||||||
|
|
@ -900,7 +913,10 @@ class ElmhurstSiteNotesExtractor:
|
||||||
|
|
||||||
def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
|
def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
|
||||||
for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
|
for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
|
||||||
if self._MANUFACTURER_RE.match(lines[j].strip()):
|
stripped = lines[j].strip()
|
||||||
|
if self._MANUFACTURER_RE.match(stripped) or (
|
||||||
|
self._STANDALONE_DATA_SOURCE_RE.match(stripped)
|
||||||
|
):
|
||||||
return j
|
return j
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -985,6 +1001,20 @@ class ElmhurstSiteNotesExtractor:
|
||||||
# would-be glazing-prefix scan.
|
# would-be glazing-prefix scan.
|
||||||
inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
|
inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
|
||||||
|
|
||||||
|
# The data-source line is either the joined "Manufacturer 4.80" shape
|
||||||
|
# (source keyword + U on one line) or a sparse standalone "BFRC data"
|
||||||
|
# / "SAP Table" shape (keyword alone, U on the next line, and no frame
|
||||||
|
# cells lodged). Resolve which up front: a sparse row has no frame
|
||||||
|
# type/factor to parse.
|
||||||
|
data_source_line = lines[manuf_idx].strip()
|
||||||
|
joined_match = self._MANUFACTURER_RE.match(data_source_line)
|
||||||
|
standalone_match = (
|
||||||
|
None if joined_match is not None
|
||||||
|
else self._STANDALONE_DATA_SOURCE_RE.match(data_source_line)
|
||||||
|
)
|
||||||
|
if joined_match is None and standalone_match is None:
|
||||||
|
return None
|
||||||
|
|
||||||
# frame_type and frame_factor immediately follow the data line.
|
# frame_type and frame_factor immediately follow the data line.
|
||||||
# Layout-style cell joining sometimes collapses them onto a
|
# Layout-style cell joining sometimes collapses them onto a
|
||||||
# single "Wood 0.70" line; treat both shapes uniformly so the
|
# single "Wood 0.70" line; treat both shapes uniformly so the
|
||||||
|
|
@ -992,9 +1022,15 @@ class ElmhurstSiteNotesExtractor:
|
||||||
# field (glazing_gap / bp / location / orient).
|
# field (glazing_gap / bp / location / orient).
|
||||||
if data_idx + 1 >= len(lines):
|
if data_idx + 1 >= len(lines):
|
||||||
return None
|
return None
|
||||||
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
if standalone_match is not None:
|
||||||
lines, data_idx
|
# Sparse "known data" row: no frame type/factor/glazing-gap cells;
|
||||||
)
|
# everything between W×H×A and the data-source is location/orient.
|
||||||
|
frame_type, frame_factor = None, self._DEFAULT_FRAME_FACTOR
|
||||||
|
middle_start = data_idx + 1
|
||||||
|
else:
|
||||||
|
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
||||||
|
lines, data_idx
|
||||||
|
)
|
||||||
if frame_factor is None or not 0.0 < frame_factor <= 1.0:
|
if frame_factor is None or not 0.0 < frame_factor <= 1.0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -1017,28 +1053,40 @@ class ElmhurstSiteNotesExtractor:
|
||||||
(t for t in middle if t in self._ORIENTATION_TOKENS), None
|
(t for t in middle if t in self._ORIENTATION_TOKENS), None
|
||||||
)
|
)
|
||||||
|
|
||||||
# Manufacturer line carries data_source + u_value.
|
# Data-source line carries the source keyword and U-value: joined on
|
||||||
manuf_match = self._MANUFACTURER_RE.match(lines[manuf_idx].strip())
|
# one line ("Manufacturer 4.80") or, for sparse rows, the keyword alone
|
||||||
if manuf_match is None:
|
# with the U-value on the next line ("BFRC data" / "1.00"). `post_idx`
|
||||||
return None
|
# is where g_value / draught / shutters begin in either layout.
|
||||||
data_source = manuf_match.group(1)
|
if joined_match is not None:
|
||||||
u_value = float(manuf_match.group(2))
|
data_source = joined_match.group(1)
|
||||||
|
u_value = float(joined_match.group(2))
|
||||||
|
post_idx = manuf_idx + 1
|
||||||
|
else:
|
||||||
|
assert standalone_match is not None
|
||||||
|
data_source = standalone_match.group(1)
|
||||||
|
if manuf_idx + 1 >= len(lines):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
u_value = float(lines[manuf_idx + 1].strip())
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
post_idx = manuf_idx + 2
|
||||||
|
|
||||||
# Post-manufacturer: g_value, draught, shutters.
|
# Post-data-source: g_value, draught, shutters.
|
||||||
if manuf_idx + 3 >= len(lines):
|
if post_idx + 2 >= len(lines):
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
g_value = float(lines[manuf_idx + 1].strip())
|
g_value = float(lines[post_idx].strip())
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
draught_proofed = lines[manuf_idx + 2].strip().lower() == "yes"
|
draught_proofed = lines[post_idx + 1].strip().lower() == "yes"
|
||||||
permanent_shutters = lines[manuf_idx + 3].strip()
|
permanent_shutters = lines[post_idx + 2].strip()
|
||||||
|
|
||||||
# Prefix / suffix tokens (variable count) carry the
|
# Prefix / suffix tokens (variable count) carry the
|
||||||
# glazing-type, building-part, and orientation strings split by
|
# glazing-type, building-part, and orientation strings split by
|
||||||
# the layout preprocessor.
|
# the layout preprocessor.
|
||||||
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
|
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
|
||||||
after = [lines[j].strip() for j in range(manuf_idx + 4, after_end) if lines[j].strip()]
|
after = [lines[j].strip() for j in range(post_idx + 3, after_end) if lines[j].strip()]
|
||||||
|
|
||||||
# Room-in-roof windows lodge their location as "Roof of Room in
|
# Room-in-roof windows lodge their location as "Roof of Room in
|
||||||
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull
|
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull
|
||||||
|
|
|
||||||
|
|
@ -4116,6 +4116,14 @@ def _is_elmhurst_roof_window(
|
||||||
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
|
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
|
# A window lodged on a wall is vertical by definition. The U-value
|
||||||
|
# backstop below only catches skylights whose location/BP gives no
|
||||||
|
# roof signal; without this guard a high-U *wall* window (e.g. an old
|
||||||
|
# "Double pre 2002" unit at U 3.1 / 3.4) is mis-routed to the roof-
|
||||||
|
# window list on U-value alone — cert 001431 §11 lodges two such
|
||||||
|
# External-wall windows that must remain vertical `sap_windows`.
|
||||||
|
if "wall" in (w.location or "").lower():
|
||||||
|
return False
|
||||||
return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD
|
return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
52
tests/domain/modelling/test_window_extraction_001431.py
Normal file
52
tests/domain/modelling/test_window_extraction_001431.py
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
"""Window-extraction completeness pin for cert 001431.
|
||||||
|
|
||||||
|
The Modelling glazing overlay's draught-proofing recompute (RdSAP 10 §8.1 — a
|
||||||
|
count over openable windows + doors) needs every openable §11 window captured
|
||||||
|
with its `draught_proofed` flag. The Elmhurst Summary §11 block lodges 17
|
||||||
|
openable windows; two extraction gaps previously surfaced only 14:
|
||||||
|
|
||||||
|
1. The extractor rejected the one "Double glazing, known data" row whose
|
||||||
|
data-source cell is "BFRC data" (laid out as its own line, with no frame
|
||||||
|
factor) — it does not fit the `<data-source> <U>` Manufacturer-line shape.
|
||||||
|
2. The mapper's `_is_elmhurst_roof_window` reclassified the two "Double pre
|
||||||
|
2002" rows (U 3.1 / 3.4 > 3.0) as roof windows, even though both are
|
||||||
|
lodged on an "External wall" — a false positive of the U-value backstop.
|
||||||
|
|
||||||
|
With both closed, all 17 windows are `sap_windows` (none mis-routed to
|
||||||
|
`sap_roof_windows`), and 14 carry `draught_proofed=True` — reconstructing
|
||||||
|
Elmhurst's lodged 84% draught-proofing (16/19 = (14 windows + 2 doors) /
|
||||||
|
(17 windows + 2 doors)).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||||
|
from tests.domain.modelling._elmhurst_recommendation import (
|
||||||
|
parse_recommendation_summary,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_all_17_openable_windows_captured_on_001431() -> None:
|
||||||
|
# Arrange / Act
|
||||||
|
epc: EpcPropertyData = parse_recommendation_summary(
|
||||||
|
"double_glazing_001431_before.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert — every openable §11 window is captured as a vertical window;
|
||||||
|
# none of the wall-lodged rows leak into the roof-window list.
|
||||||
|
assert len(epc.sap_windows) == 17
|
||||||
|
assert not epc.sap_roof_windows # None or empty — no wall window misrouted
|
||||||
|
|
||||||
|
|
||||||
|
def test_draughtproofing_count_reconstructs_lodged_84_percent() -> None:
|
||||||
|
# Arrange / Act
|
||||||
|
epc: EpcPropertyData = parse_recommendation_summary(
|
||||||
|
"double_glazing_001431_before.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert — 14 of the 17 openable windows are draught-proofed, the numerator
|
||||||
|
# behind Elmhurst's lodged 84% (with the 2 lodged draught-proofed doors).
|
||||||
|
draughtproofed: int = sum(
|
||||||
|
1 for window in epc.sap_windows if window.draught_proofed
|
||||||
|
)
|
||||||
|
assert draughtproofed == 14
|
||||||
Loading…
Add table
Reference in a new issue