mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Slice S0380.128: extractor §14.0 closure falls back to "14.1 Community Heating"
Elmhurst Summary §14.0 Main Heating1 normally closes at "14.1 Main
Heating2", but community-heated dwellings and "no system" certs lodge
§14.0 followed directly by "14.1 Community Heating/Heat Network" (no
second main system exists on a community-heated dwelling). Pre-slice
the extractor's `_between("14.0 Main Heating1", "14.1 Main Heating2")`
returned an empty string for these shapes — every §14.0 field
(including `Main Heating SAP Code`) came back None, then the mapper
strict-raised `UnmappedElmhurstLabel` with "§14.0 Main Heating1 has
neither PCDF boiler reference (None) nor SAP code (None)".
The fix adds a `_section_lines_first_end(start, ends)` helper that
accepts a tuple of end-marker candidates and uses whichever appears
first after `start`. `_extract_main_heating` now closes §14.0 at
either "14.1 Main Heating2" or "14.1 Community Heating" — whichever
Summary lodges.
Impact on heating-systems corpus 001431 at `sap worksheets/heating
systems examples/`:
Variant Pre-S0380.128 -> Post-S0380.128
------------------------ ------------------ -----------------
community heating 1 mapper-raise -> SAP code 301 OK
community heating 2 mapper-raise -> SAP code 302 OK
community heating 3 mapper-raise -> SAP code 304 OK
community heating 4 mapper-raise -> SAP code 302 OK
community heating 6 mapper-raise -> SAP code 302 OK
no system mapper-raise -> SAP code 699 OK
Corpus tally: **35/41 -> 41/41 cascade-OK**. With all populated
variants now executing, the cascade-vs-worksheet residual cluster is
fully visible for the first time. Notably community heating 6 surfaces
the FIRST negative ΔSAP in the corpus (-6.87 — cascade undershooting
the worksheet rather than overshooting), a distinct diagnostic shape
worth investigating next.
The fix is structural (extractor section bracketing) — no spec rule
to cite. RdSAP 10 §17 page 85 row 1.0 ("Main Heating") + §17 row
10-1a ("Community Heat Source") confirm that community-heated certs
have only one main heating system (no Main 2 block).
Extended handover suite at HEAD post-slice: **832 pass, 0 fail**
(was 831 + 1 new AAA test).
Pyright net-zero on touched files (13 → 13 — pre-existing errors
unrelated).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
11ecac94dc
commit
729ee29c84
2 changed files with 72 additions and 1 deletions
|
|
@ -128,6 +128,32 @@ class ElmhurstSiteNotesExtractor:
|
|||
text = self._between(start, end)
|
||||
return [l.strip() for l in text.splitlines() if l.strip()]
|
||||
|
||||
def _section_lines_first_end(
|
||||
self, start: str, ends: tuple[str, ...],
|
||||
) -> List[str]:
|
||||
"""Like `_section_lines` but accepts multiple end-marker candidates
|
||||
and uses whichever appears first after `start`. Defends against
|
||||
Summary-shape variants where the next-section heading differs
|
||||
(e.g. §14.0 Main Heating1 closes at "14.1 Main Heating2" on
|
||||
boiler/HP certs but at "14.1 Community Heating" on community-
|
||||
heated certs)."""
|
||||
try:
|
||||
s = self._text.index(start) + len(start)
|
||||
except ValueError:
|
||||
return []
|
||||
earliest: int | None = None
|
||||
for end in ends:
|
||||
try:
|
||||
idx = self._text.index(end, s)
|
||||
except ValueError:
|
||||
continue
|
||||
if earliest is None or idx < earliest:
|
||||
earliest = idx
|
||||
if earliest is None:
|
||||
return []
|
||||
text = self._text[s:earliest]
|
||||
return [l.strip() for l in text.splitlines() if l.strip()]
|
||||
|
||||
def _local_val(self, lines: List[str], label: str) -> Optional[str]:
|
||||
lb = label.rstrip(":")
|
||||
lc = lb + ":"
|
||||
|
|
@ -1171,7 +1197,18 @@ class ElmhurstSiteNotesExtractor:
|
|||
)
|
||||
|
||||
def _extract_main_heating(self) -> MainHeating:
|
||||
lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2")
|
||||
# Community-heated dwellings (e.g. SAP code 301 "Community heating
|
||||
# scheme" per SAP10.2 Table 4a category 6) and "no system" certs
|
||||
# (SAP code 699 "Electric heaters assumed where no system lodged")
|
||||
# lodge §14.0 Main Heating1 directly followed by §14.1 Community
|
||||
# Heating/Heat Network rather than §14.1 Main Heating2 — there is
|
||||
# no second main system on a community-heated dwelling. Close the
|
||||
# §14.0 block at whichever §14.1 form appears first so every
|
||||
# Summary shape surfaces the SAP code.
|
||||
lines = self._section_lines_first_end(
|
||||
"14.0 Main Heating1",
|
||||
("14.1 Main Heating2", "14.1 Community Heating"),
|
||||
)
|
||||
pct_raw = self._local_val(lines, "Percentage of Heat")
|
||||
pct = int(pct_raw.split()[0]) if pct_raw else 0
|
||||
# §14.0 "Main Heating SAP Code" identifies Main 1 by SAP 10.2
|
||||
|
|
|
|||
|
|
@ -236,6 +236,40 @@ def test_summary_001479_mapper_extensions_count_matches_extension_bps() -> None:
|
|||
assert len(epc.sap_building_parts) == 3
|
||||
|
||||
|
||||
def test_summary_001431_community_heating_1_main_heating_sap_code_extracted_when_no_main_heating_2_block() -> None:
|
||||
# Arrange — Heating-systems corpus fixture 001431 / "community heating 1"
|
||||
# lodges §14.0 Main Heating1 directly followed by §14.1 Community
|
||||
# Heating/Heat Network (no §14.1 Main Heating2 block, since community-
|
||||
# heated dwellings don't have a second main system to lodge). The §14.0
|
||||
# block carries `Main Heating SAP Code: 301` (Community heating per
|
||||
# SAP10.2 Table 4a category 6 — "Heat networks").
|
||||
#
|
||||
# Pre-slice the extractor's `_section_lines("14.0 Main Heating1",
|
||||
# "14.1 Main Heating2")` returned an empty list because the end marker
|
||||
# was missing, so every §14.0 field (incl. `Main Heating SAP Code`)
|
||||
# came back as None. The mapper then raised `UnmappedElmhurstLabel`
|
||||
# with "§14.0 Main Heating1 has neither PCDF boiler reference (None)
|
||||
# nor SAP code (None)" — blocking all 6 community-heated + "no system"
|
||||
# corpus variants from cascade execution.
|
||||
#
|
||||
# The fix closes the §14.0 block at whichever §14.1 marker appears
|
||||
# first ("14.1 Main Heating2" or "14.1 Community Heating"), so the
|
||||
# SAP code surfaces correctly on every Summary shape.
|
||||
summary_pdf = (
|
||||
Path(__file__).parents[3]
|
||||
/ "sap worksheets/heating systems examples/community heating 1/Summary_001431.pdf"
|
||||
)
|
||||
pages = _summary_pdf_to_textract_style_pages(summary_pdf)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
|
||||
# Act
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Assert
|
||||
main_1 = epc.sap_heating.main_heating_details[0]
|
||||
assert main_1.sap_main_heating_code == 301
|
||||
|
||||
|
||||
def test_summary_001431_pcdb_1_inaccessible_cylinder_resolves_to_normal_per_rdsap_10_table_28() -> None:
|
||||
# Arrange — Heating-systems corpus fixture 001431 / "pcdb 1" lodges
|
||||
# §15.1 "Cylinder Size: No Access" (the Elmhurst inaccessible-cylinder
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue