mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fix(extractor): capture all 17 openable §11 windows on cert 001431
cert 001431's §11 lodges 17 windows but only 14 surfaced, via two distinct gaps:
1. Extractor (_extract_windows_from_layout): the one "Double glazing, known
data" row whose §11 Data-Source cell is "BFRC data" was rejected — it is
laid out as a standalone keyword line with the U-value on the next line
and lodges no Frame Type/Factor/Gap cells, so it never matched the joined
"<source> <U>" Manufacturer-line shape. Now anchored by a standalone
data-source form, with the RdSAP 10 §3.7 default frame factor (0.7) for
the absent frame cell.
2. Mapper (_is_elmhurst_roof_window): the two "Double pre 2002" rows
(U 3.1 / 3.4 > 3.0) were reclassified as roof windows by the U-value
backstop even though both are lodged on an "External wall". A window
lodged on a wall is vertical by definition; guard the U-value backstop so
it only fires when location/BP give no roof signal.
With both closed: 17 sap_windows, 0 misrouted to sap_roof_windows.
Re-homed onto the mapper-validation line from feature/bill-derivation
(orig f68cea27); the modelling-only regression test
(tests/domain/modelling/test_window_extraction_001431.py) stays on
bill-derivation. KNOWN: the mapper guard breaks cert 000516's
test_summary_pdf_mapper_chain pins (W6 U=3.10 routing) — must be resolved
before this PRs to main.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
d559298de2
commit
97f44b5364
2 changed files with 73 additions and 17 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import re
|
||||
from datetime import date, datetime
|
||||
from typing import List, Optional
|
||||
from typing import Final, List, Optional
|
||||
|
||||
from datatypes.epc.surveys.elmhurst_site_notes import (
|
||||
AlternativeWall,
|
||||
|
|
@ -811,6 +811,19 @@ class ElmhurstSiteNotesExtractor:
|
|||
r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
|
||||
)
|
||||
_MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
|
||||
# "Known data" rows (BFRC / SAP Table) lodge the §11 Data-Source cell on
|
||||
# its own layout line with the U-value following on the next line — and
|
||||
# carry no Frame Type / Frame Factor / Glazing Gap cells. The joined
|
||||
# "<source> <U>" `_MANUFACTURER_RE` shape never matches them, so they are
|
||||
# anchored by this standalone form instead (cert 001431 §11 has one
|
||||
# "BFRC data" window). "Manufacturer"/"Default" are kept here only for
|
||||
# symmetry; in practice they always join with the U-value above.
|
||||
_STANDALONE_DATA_SOURCE_RE = re.compile(
|
||||
r"^(BFRC data|BFRC|SAP Table|Assessor|Manufacturer|Default)$"
|
||||
)
|
||||
# RdSAP 10 §3.7 default window frame factor, used for "known data" rows
|
||||
# that lodge U and g directly and omit the frame-factor cell.
|
||||
_DEFAULT_FRAME_FACTOR: Final[float] = 0.7
|
||||
_ORIENTATION_TOKENS = frozenset({
|
||||
"North", "South", "East", "West", "NE", "NW", "SE", "SW",
|
||||
})
|
||||
|
|
@ -900,7 +913,10 @@ class ElmhurstSiteNotesExtractor:
|
|||
|
||||
def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
|
||||
for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
|
||||
if self._MANUFACTURER_RE.match(lines[j].strip()):
|
||||
stripped = lines[j].strip()
|
||||
if self._MANUFACTURER_RE.match(stripped) or (
|
||||
self._STANDALONE_DATA_SOURCE_RE.match(stripped)
|
||||
):
|
||||
return j
|
||||
return None
|
||||
|
||||
|
|
@ -985,6 +1001,20 @@ class ElmhurstSiteNotesExtractor:
|
|||
# would-be glazing-prefix scan.
|
||||
inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
|
||||
|
||||
# The data-source line is either the joined "Manufacturer 4.80" shape
|
||||
# (source keyword + U on one line) or a sparse standalone "BFRC data"
|
||||
# / "SAP Table" shape (keyword alone, U on the next line, and no frame
|
||||
# cells lodged). Resolve which up front: a sparse row has no frame
|
||||
# type/factor to parse.
|
||||
data_source_line = lines[manuf_idx].strip()
|
||||
joined_match = self._MANUFACTURER_RE.match(data_source_line)
|
||||
standalone_match = (
|
||||
None if joined_match is not None
|
||||
else self._STANDALONE_DATA_SOURCE_RE.match(data_source_line)
|
||||
)
|
||||
if joined_match is None and standalone_match is None:
|
||||
return None
|
||||
|
||||
# frame_type and frame_factor immediately follow the data line.
|
||||
# Layout-style cell joining sometimes collapses them onto a
|
||||
# single "Wood 0.70" line; treat both shapes uniformly so the
|
||||
|
|
@ -992,9 +1022,15 @@ class ElmhurstSiteNotesExtractor:
|
|||
# field (glazing_gap / bp / location / orient).
|
||||
if data_idx + 1 >= len(lines):
|
||||
return None
|
||||
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
||||
lines, data_idx
|
||||
)
|
||||
if standalone_match is not None:
|
||||
# Sparse "known data" row: no frame type/factor/glazing-gap cells;
|
||||
# everything between W×H×A and the data-source is location/orient.
|
||||
frame_type, frame_factor = None, self._DEFAULT_FRAME_FACTOR
|
||||
middle_start = data_idx + 1
|
||||
else:
|
||||
frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
|
||||
lines, data_idx
|
||||
)
|
||||
if frame_factor is None or not 0.0 < frame_factor <= 1.0:
|
||||
return None
|
||||
|
||||
|
|
@ -1017,28 +1053,40 @@ class ElmhurstSiteNotesExtractor:
|
|||
(t for t in middle if t in self._ORIENTATION_TOKENS), None
|
||||
)
|
||||
|
||||
# Manufacturer line carries data_source + u_value.
|
||||
manuf_match = self._MANUFACTURER_RE.match(lines[manuf_idx].strip())
|
||||
if manuf_match is None:
|
||||
return None
|
||||
data_source = manuf_match.group(1)
|
||||
u_value = float(manuf_match.group(2))
|
||||
# Data-source line carries the source keyword and U-value: joined on
|
||||
# one line ("Manufacturer 4.80") or, for sparse rows, the keyword alone
|
||||
# with the U-value on the next line ("BFRC data" / "1.00"). `post_idx`
|
||||
# is where g_value / draught / shutters begin in either layout.
|
||||
if joined_match is not None:
|
||||
data_source = joined_match.group(1)
|
||||
u_value = float(joined_match.group(2))
|
||||
post_idx = manuf_idx + 1
|
||||
else:
|
||||
assert standalone_match is not None
|
||||
data_source = standalone_match.group(1)
|
||||
if manuf_idx + 1 >= len(lines):
|
||||
return None
|
||||
try:
|
||||
u_value = float(lines[manuf_idx + 1].strip())
|
||||
except ValueError:
|
||||
return None
|
||||
post_idx = manuf_idx + 2
|
||||
|
||||
# Post-manufacturer: g_value, draught, shutters.
|
||||
if manuf_idx + 3 >= len(lines):
|
||||
# Post-data-source: g_value, draught, shutters.
|
||||
if post_idx + 2 >= len(lines):
|
||||
return None
|
||||
try:
|
||||
g_value = float(lines[manuf_idx + 1].strip())
|
||||
g_value = float(lines[post_idx].strip())
|
||||
except ValueError:
|
||||
return None
|
||||
draught_proofed = lines[manuf_idx + 2].strip().lower() == "yes"
|
||||
permanent_shutters = lines[manuf_idx + 3].strip()
|
||||
draught_proofed = lines[post_idx + 1].strip().lower() == "yes"
|
||||
permanent_shutters = lines[post_idx + 2].strip()
|
||||
|
||||
# Prefix / suffix tokens (variable count) carry the
|
||||
# glazing-type, building-part, and orientation strings split by
|
||||
# the layout preprocessor.
|
||||
before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
|
||||
after = [lines[j].strip() for j in range(manuf_idx + 4, after_end) if lines[j].strip()]
|
||||
after = [lines[j].strip() for j in range(post_idx + 3, after_end) if lines[j].strip()]
|
||||
|
||||
# Room-in-roof windows lodge their location as "Roof of Room in
|
||||
# Roof" (wrapped across the prefix/suffix blocks). Detect it, pull
|
||||
|
|
|
|||
|
|
@ -4116,6 +4116,14 @@ def _is_elmhurst_roof_window(
|
|||
_ELMHURST_BP_ROOF_TYPES_WITH_ROOFLIGHTS
|
||||
):
|
||||
return True
|
||||
# A window lodged on a wall is vertical by definition. The U-value
|
||||
# backstop below only catches skylights whose location/BP gives no
|
||||
# roof signal; without this guard a high-U *wall* window (e.g. an old
|
||||
# "Double pre 2002" unit at U 3.1 / 3.4) is mis-routed to the roof-
|
||||
# window list on U-value alone — cert 001431 §11 lodges two such
|
||||
# External-wall windows that must remain vertical `sap_windows`.
|
||||
if "wall" in (w.location or "").lower():
|
||||
return False
|
||||
return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue