mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
fix(elmhurst-mapper): strip wrapped building-part fragment from glazing label
pdftotext can wrap the §11 building-part column onto the glazing-TYPE
token without an intervening glazing-gap descriptor, e.g. "Double between
2002 and 2021 1st" (the "1st" marks the 1st Extension). The existing
trailing-gap fallback only strips the fragment when preceded by "N mm";
the bare ordinal raised UnmappedElmhurstLabel.
New `_ELMHURST_GLAZING_LABEL_TRAILING_BP_RE` strips a trailing ordinal
("1st"/"2nd"/…) or "Main" and retries the lookup. No glazing-type key
ends in an ordinal or "Main", so it is loss-free. Surfaced by worksheet
`simulated case 33` (direct-acting electric boiler + immersion), which
previously could not be routed through the Summary cascade.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
3cb2711418
commit
020ac6f220
2 changed files with 30 additions and 0 deletions
|
|
@ -1639,6 +1639,20 @@ def test_elmhurst_glazing_label_full_coverage_per_sap10_table_6b() -> None:
|
|||
)
|
||||
|
||||
|
||||
def test_elmhurst_glazing_label_strips_wrapped_building_part_fragment() -> None:
|
||||
# Arrange — pdftotext wraps the §11 building-part column (e.g. "1st"
|
||||
# for the 1st Extension) onto the glazing-TYPE token even when no
|
||||
# glazing-GAP descriptor ("16 mm") sits between them, so the lodged
|
||||
# label reads "Double between 2002 and 2021 1st". The fragment is a
|
||||
# building-part marker, not part of the glazing type — it must be
|
||||
# stripped so the label resolves to its base code. Worksheet
|
||||
# `simulated case 33` (direct-acting electric boiler + immersion)
|
||||
# surfaced this.
|
||||
# Act / Assert — base "Double between 2002 and 2021" → code 3.
|
||||
assert _elmhurst_glazing_type_code("Double between 2002 and 2021 1st") == 3
|
||||
assert _elmhurst_glazing_type_code("Single glazing 2nd") == 1
|
||||
|
||||
|
||||
def test_extension_party_wall_type_read_independently_of_as_main_wall() -> None:
|
||||
# Arrange — RdSAP 10 §3.3: "As Main Wall: Yes" inherits only the
|
||||
# external wall CONSTRUCTION; the party wall type is lodged
|
||||
|
|
|
|||
|
|
@ -5435,6 +5435,15 @@ _ELMHURST_GLAZING_LABEL_NOISE_SUFFIX_RE: Final[re.Pattern[str]] = re.compile(
|
|||
_ELMHURST_GLAZING_LABEL_TRAILING_GAP_RE: Final[re.Pattern[str]] = re.compile(
|
||||
r"\s+\d+\s*mm\b.*$"
|
||||
)
|
||||
# Fallback only: pdftotext can wrap the §11 building-part column onto the
|
||||
# glazing-TYPE token WITHOUT an intervening glazing-gap descriptor, e.g.
|
||||
# "Double between 2002 and 2021 1st" (the "1st" marks the 1st Extension).
|
||||
# The ordinal / "Main" fragment is a building-part marker, not part of the
|
||||
# glazing type — strip it and retry. No glazing-type key ends in an ordinal
|
||||
# or "Main", so this is loss-free. Surfaced by `simulated case 33`.
|
||||
_ELMHURST_GLAZING_LABEL_TRAILING_BP_RE: Final[re.Pattern[str]] = re.compile(
|
||||
r"\s+(?:\d+(?:st|nd|rd|th)|Main)$"
|
||||
)
|
||||
|
||||
|
||||
def _elmhurst_glazing_type_code(label: Optional[str]) -> int:
|
||||
|
|
@ -5459,6 +5468,13 @@ def _elmhurst_glazing_type_code(label: Optional[str]) -> int:
|
|||
code = _ELMHURST_GLAZING_LABEL_TO_SAP10.get(degapped)
|
||||
if code is not None:
|
||||
return code
|
||||
# Fallback: strip a trailing wrapped building-part fragment (ordinal /
|
||||
# "Main") and retry.
|
||||
debp = _ELMHURST_GLAZING_LABEL_TRAILING_BP_RE.sub("", cleaned).strip()
|
||||
if debp != cleaned:
|
||||
code = _ELMHURST_GLAZING_LABEL_TO_SAP10.get(debp)
|
||||
if code is not None:
|
||||
return code
|
||||
raise UnmappedElmhurstLabel("glazing_type", label)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue