mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fix(extractor): capture all 17 openable §11 windows on cert 001431
cert 001431's §11 lodges 17 windows but only 14 surfaced, via two distinct gaps:
1. Extractor (_extract_windows_from_layout): the one "Double glazing, known
data" row whose §11 Data-Source cell is "BFRC data" was rejected — it is
laid out as a standalone keyword line with the U-value on the next line
and lodges no Frame Type/Factor/Gap cells, so it never matched the joined
"<source> <U>" Manufacturer-line shape. Now anchored by a standalone
data-source form, with the RdSAP 10 §3.7 default frame factor (0.7) for
the absent frame cell.
2. Mapper (_is_elmhurst_roof_window): the two "Double pre 2002" rows
(U 3.1 / 3.4 > 3.0) were reclassified as roof windows by the U-value
backstop even though both are lodged on an "External wall". A window
lodged on a wall is vertical by definition; guard the U-value backstop so
it only fires when location/BP give no roof signal.
With both closed: 17 sap_windows, 0 misrouted to sap_roof_windows.
Re-homed onto the mapper-validation line from feature/bill-derivation
(orig f68cea27); the modelling-only regression test
(tests/domain/modelling/test_window_extraction_001431.py) stays on
bill-derivation. KNOWN: the mapper guard breaks cert 000516's
test_summary_pdf_mapper_chain pins (W6 U=3.10 routing) — must be resolved
before this PRs to main.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
d559298de2
commit
97f44b5364
2 changed files with 73 additions and 17 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
import re
|
import re
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from typing import List, Optional
|
from typing import Final, List, Optional
|
||||||
|
|
||||||
from datatypes.epc.surveys.elmhurst_site_notes import (
|
from datatypes.epc.surveys.elmhurst_site_notes import (
|
||||||
AlternativeWall,
|
AlternativeWall,
|
||||||
|
|
@ -811,6 +811,19 @@ class ElmhurstSiteNotesExtractor:
|
||||||
r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
|
r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
|
||||||
)
|
)
|
||||||
_MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
|
_MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
|
||||||
|
# "Known data" rows (BFRC / SAP Table) lodge the §11 Data-Source cell on
|
||||||
|
# its own layout line with the U-value following on the next line — and
|
||||||
|
# carry no Frame Type / Frame Factor / Glazing Gap cells. The joined
|
||||||
|
# "<source> <U>" `_MANUFACTURER_RE` shape never matches them, so they are
|
||||||
|
# anchored by this standalone form instead (cert 001431 §11 has one
|
||||||
|
# "BFRC data" window). "Manufacturer"/"Default" are kept here only for
|
||||||
|
# symmetry; in practice they always join with the U-value above.
|
||||||
|
_STANDALONE_DATA_SOURCE_RE = re.compile(
|
||||||
|
r"^(BFRC data|BFRC|SAP Table|Assessor|Manufacturer|Default)$"
|
||||||
|
)
|
||||||
|
# RdSAP 10 §3.7 default window frame factor, used for "known data" rows
|
||||||
|
# that lodge U and g directly and omit the frame-factor cell.
|
||||||
|
_DEFAULT_FRAME_FACTOR: Final[float] = 0.7
|
||||||
_ORIENTATION_TOKENS = frozenset({
|
_ORIENTATION_TOKENS = frozenset({
|
||||||
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
|
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
|
||||||
})
|
})
|
||||||
|
|
@ -900,7 +913,10 @@ class ElmhurstSiteNotesExtractor:
|
||||||
|
|
||||||
def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
|
def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
|
||||||
for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
|
for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
|
||||||
if self._MANUFACTURER_RE.match(lines[j].strip()):
|
stripped = lines[j].strip()
|
||||||
|
if self._MANUFACTURER_RE.match(stripped) or (
|
||||||
|
self._STANDALONE_DATA_SOURCE_RE.match(stripped)
|
||||||
|
):
|
||||||
return j
|
return j
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -985,6 +1001,20 @@ class ElmhurstSiteNotesExtractor:
|
||||||
# would-be glazing-prefix scan.
|
# would-be glazing-prefix scan.
|
||||||
inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
|
inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
|
||||||
|
|
||||||
|
# The data-source line is either the joined "Manufacturer 4.80" shape
|
||||||
|
# (source keyword + U on one line) or a sparse standalone "BFRC data"
|
||||||
|
# / "SAP Table" shape (keyword alone, U on the next line, and no frame
|
||||||
|
# cells lodged). Resolve which up front: a sparse row has no frame
|
||||||
|
# type/factor to parse.
|
||||||
|
data_source_line = lines[manuf_idx].strip()
|
||||||
|
joined_match = self._MANUFACTURER_RE.match(data_source_line)
|
||||||
|
standalone_match = (
|
||||||
|
None if joined_match is not None
|
||||||
|
else self._STANDALONE_DATA_SOURCE_RE.match(data_source_line)
|
||||||
|
)
|
||||||
|
if joined_match is None and standalone_match is None:
|
||||||
|
return None
|
||||||
|
|
||||||
# frame_type and frame_factor immediately follow the data line.
|
# frame_type and frame_factor immediately follow the data line.
|
||||||
# Layout-style cell joining sometimes collapses them onto a
|
# Layout-style cell joining sometimes collapses them onto a
|
||||||
# single "Wood 0.70" line; treat both shapes uniformly so the
|
# single "Wood 0.70" line; treat both shapes uniformly so the
|
||||||
|
|
@ -992,9 +1022,15 @@ class ElmhurstSiteNotesExtractor:
|
||||||
# field (glazing_gap / bp / location / orient).
|
# field (glazing_gap / bp / location / orient).
|
||||||
if data_idx + 1 >= len(lines):
|
if data_idx + 1 >= len(lines):
|
||||||
return None
|
return None
|
||||||
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
if standalone_match is not None:
|
||||||
lines, data_idx
|
# Sparse "known data" row: no frame type/factor/glazing-gap cells;
|
||||||
)
|
# everything between W×H×A and the data-source is location/orient.
|
||||||
|
frame_type, frame_factor = None, self._DEFAULT_FRAME_FACTOR
|
||||||
|
middle_start = data_idx + 1
|
||||||
|
else:
|
||||||
|
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
||||||
|
lines, data_idx
|
||||||
|
)
|
||||||
if frame_factor is None or not 0.0 < frame_factor <= 1.0:
|
if frame_factor is None or not 0.0 < frame_factor <= 1.0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -1017,28 +1053,40 @@ class ElmhurstSiteNotesExtractor:
|
||||||
(t for t in middle if t in self._ORIENTATION_TOKENS), None
|
(t for t in middle if t in self._ORIENTATION_TOKENS), None
|
||||||
)
|
)
|
||||||
|
|
||||||
# Manufacturer line carries data_source + u_value.
|
# Data-source line carries the source keyword and U-value: joined on
|
||||||
manuf_match = self._MANUFACTURER_RE.match(lines[manuf_idx].strip())
|
# one line ("Manufacturer 4.80") or, for sparse rows, the keyword alone
|
||||||
if manuf_match is None:
|
# with the U-value on the next line ("BFRC data" / "1.00"). `post_idx`
|
||||||
return None
|
# is where g_value / draught / shutters begin in either layout.
|
||||||
data_source = manuf_match.group(1)
|
if joined_match is not None:
|
||||||
u_value = float(manuf_match.group(2))
|
data_source = joined_match.group(1)
|
||||||
|
u_value = float(joined_match.group(2))
|
||||||
|
post_idx = manuf_idx + 1
|
||||||
|
else:
|
||||||
|
assert standalone_match is not None
|
||||||
|
data_source = standalone_match.group(1)
|
||||||
|
if manuf_idx + 1 >= len(lines):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
u_value = float(lines[manuf_idx + 1].strip())
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
post_idx = manuf_idx + 2
|
||||||
|
|
||||||
# Post-manufacturer: g_value, draught, shutters.
|
# Post-data-source: g_value, draught, shutters.
|
||||||
if manuf_idx + 3 >= len(lines):
|
if post_idx + 2 >= len(lines):
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
g_value = float(lines[manuf_idx + 1].strip())
|
g_value = float(lines[post_idx].strip())
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
draught_proofed = lines[manuf_idx + 2].strip().lower() == "yes"
|
draught_proofed = lines[post_idx + 1].strip().lower() == "yes"
|
||||||
permanent_shutters = lines[manuf_idx + 3].strip()
|
permanent_shutters = lines[post_idx + 2].strip()
|
||||||
|
|
||||||
# Prefix / suffix tokens (variable count) carry the
|
# Prefix / suffix tokens (variable count) carry the
|
||||||
# glazing-type, building-part, and orientation strings split by
|
# glazing-type, building-part, and orientation strings split by
|
||||||
# the layout preprocessor.
|
# the layout preprocessor.
|
||||||
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
|
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
|
||||||
after = [lines[j].strip() for j in range(manuf_idx + 4, after_end) if lines[j].strip()]
|
after = [lines[j].strip() for j in range(post_idx + 3, after_end) if lines[j].strip()]
|
||||||
|
|
||||||
# Room-in-roof windows lodge their location as "Roof of Room in
|
# Room-in-roof windows lodge their location as "Roof of Room in
|
||||||
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull
|
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull
|
||||||
|
|
|
||||||
|
|
@ -4116,6 +4116,14 @@ def _is_elmhurst_roof_window(
|
||||||
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
|
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
|
# A window lodged on a wall is vertical by definition. The U-value
|
||||||
|
# backstop below only catches skylights whose location/BP gives no
|
||||||
|
# roof signal; without this guard a high-U *wall* window (e.g. an old
|
||||||
|
# "Double pre 2002" unit at U 3.1 / 3.4) is mis-routed to the roof-
|
||||||
|
# window list on U-value alone — cert 001431 §11 lodges two such
|
||||||
|
# External-wall windows that must remain vertical `sap_windows`.
|
||||||
|
if "wall" in (w.location or "").lower():
|
||||||
|
return False
|
||||||
return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD
|
return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue