mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
S0380.195: pin sim case 4 (6035 floor geometry) e2e at 1e-4 — 6035 +19 PE is lodged divergence
Adds the user-simulated case-4 worksheet as e2e fixture `001431_6035` — reproduces golden cert 6035's full floor geometry (Main ground-floor HLP 15.99 + first-floor HLP 8.32, the asymmetric upper storey) and 8 windows. All 11 Block-1 line refs pin at abs=1e-4 against the worksheet (SAP 68, ECF 2.2802, cost 937.2341, CO2 4682.3494, space 15745.3260, main fuel 18744.4357). This is the 4th independent 1e-4 confirmation across the 6035 archetype (sim cases 1-4). Case 4 matches 6035 on floors + window areas; the residual ~50 kWh / £11 cascade delta vs 6035 is two lodged inputs only (largest window orientation N vs S; meter type "Dual" vs API 2), not calculator behaviour. Conclusion: the cascade reproduces the spec engine exactly for 6035's geometry, so 6035's +19 PE vs the lodged register is lodged-register divergence (the gov.uk register's rounded value vs the spec-exact worksheet), NOT a calculator gap. 6035 is a "pin-forever" lodged-only cert. Bugs surfaced + fixed along the way: S0380.192 (Simplified-RR remaining area) and S0380.193 (suspended-floor sealed rule). 2341 passed (+11), 0 failed; pyright net-zero. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
e7a0c9885e
commit
4a21717de6
5 changed files with 131 additions and 0 deletions
BIN
backend/documents_parser/tests/fixtures/Summary_001431_6035.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_001431_6035.pdf
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,114 @@
|
|||
"""Mapper-driven cascade pin against the Elmhurst P960-0001-001431
|
||||
"simulated case 4" worksheet — a near-exact replica of golden cert
|
||||
6035 (Main + Extension + Simplified room-in-roof, 8 windows).
|
||||
|
||||
Like 000565 / sim case 1 / sim case 2, this fixture does NOT hand-build
|
||||
the EpcPropertyData: it routes the Summary PDF through
|
||||
ElmhurstSiteNotesExtractor + from_elmhurst_site_notes so the SAP-result
|
||||
pin grid exercises the WHOLE extractor + mapper + calculator pipeline.
|
||||
|
||||
Purpose: prove the calculator is spec-correct for the 6035 archetype
|
||||
(after S0380.192 Simplified-RR + S0380.193 suspended-floor fixes). This
|
||||
cert reproduces 6035's full floor geometry — Main ground-floor HLP
|
||||
15.99 m AND first-floor HLP 8.32 m (the asymmetric upper-storey
|
||||
perimeter) — plus 6035's 8 windows (≈14.15 m²). Two minor inputs still
|
||||
differ from 6035 (the largest window's orientation is N here vs S in
|
||||
6035; meter type "Dual" vs API code 2), accounting for a residual ~50
|
||||
kWh / £11 cascade delta — both are lodged inputs, not calculator
|
||||
behaviour. All 11 Block-1 line refs pin at abs=1e-4 against this cert's
|
||||
OWN worksheet, confirming the cascade reproduces the spec engine exactly
|
||||
for 6035's geometry — so 6035's residual +19 PE vs the lodged register
|
||||
is lodged-register divergence, not a cascade gap.
|
||||
|
||||
Cert shape: Main + Extension 1, both solid brick WITH internal
|
||||
insulation (Main) / as-built (Ext1), 3 storeys, Simplified room-in-roof
|
||||
on the Main (floor 29.75 m², exposed + party gables), suspended
|
||||
uninsulated ground floors (Main ground HLP 15.99 / first 8.32),
|
||||
gas-combi SAP code 104, 8 windows, no PV.
|
||||
|
||||
Source: user-simulated PDFs at `sap worksheets/golden fixture
|
||||
debugging/simulated case 4/`. The Summary is mirrored into the tracked
|
||||
`backend/documents_parser/tests/fixtures/Summary_001431_6035.pdf`
|
||||
(distinct name — the corpus reuses cert 001431).
|
||||
|
||||
Worksheet pin targets (P960-0001-001431, Block 1 — energy rating):
|
||||
- SAP rating 68 (line 258), ECF 2.2802 (line 257)
|
||||
- Total fuel cost £937.2341 (line 255)
|
||||
- CO2 4682.3494 kg/year (line 272)
|
||||
- Space heating 15745.3260 kWh/year (Σ monthly (98))
|
||||
- Main 1 fuel 18744.4357 kWh/year (line 211)
|
||||
- Secondary fuel 0.0 (line 215)
|
||||
- Hot water fuel 3307.8383 kWh/year (line 219)
|
||||
- Lighting 262.0885 kWh/year (line 232)
|
||||
- Pumps/fans 86.0 kWh/year (line 231)
|
||||
|
||||
Per [[feedback-zero-error-strict]] + [[feedback-e2e-validation-
|
||||
philosophy]]: pins are abs=1e-4 against the worksheet PDF.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
|
||||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
|
||||
# parents[0]=worksheet/, [1]=sap10_calculator/, [2]=domain/, [3]=tests/,
|
||||
# [4]=repo root.
|
||||
_SUMMARY_PDF: Final[Path] = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "backend" / "documents_parser" / "tests" / "fixtures"
|
||||
/ "Summary_001431_6035.pdf"
|
||||
)
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
"""Convert a Summary PDF into the per-page text format the
|
||||
ElmhurstSiteNotesExtractor expects (label\\nvalue sequences).
|
||||
|
||||
Mirror of the helper in `test_summary_pdf_mapper_chain.py` /
|
||||
`_elmhurst_worksheet_000565.py`.
|
||||
"""
|
||||
info = subprocess.run(
|
||||
["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
m = re.search(r"Pages:\s+(\d+)", info)
|
||||
if m is None:
|
||||
raise RuntimeError(f"Could not parse page count from {pdf_path}")
|
||||
page_count = int(m.group(1))
|
||||
|
||||
pages: list[str] = []
|
||||
for i in range(1, page_count + 1):
|
||||
layout = subprocess.run(
|
||||
[
|
||||
"pdftotext", "-layout", "-f", str(i), "-l", str(i),
|
||||
str(pdf_path), "-",
|
||||
],
|
||||
capture_output=True, text=True, check=True,
|
||||
).stdout
|
||||
tokens: list[str] = []
|
||||
for line in layout.splitlines():
|
||||
if not line.strip():
|
||||
tokens.append("")
|
||||
continue
|
||||
parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
|
||||
tokens.extend(parts)
|
||||
pages.append("\n".join(tokens))
|
||||
return pages
|
||||
|
||||
|
||||
def build_epc() -> EpcPropertyData:
|
||||
"""Route the simulated case-2 Summary through extractor + mapper.
|
||||
|
||||
No hand-built EpcPropertyData — the extractor and mapper are part of
|
||||
the test target. Exercises the S0380.192 Simplified-RR fix and the
|
||||
S0380.193 suspended-floor sealed-rule fix.
|
||||
"""
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
|
@ -40,6 +40,7 @@ from tests.domain.sap10_calculator.worksheet import (
|
|||
_elmhurst_worksheet_001431 as _w001431,
|
||||
_elmhurst_worksheet_001431_rr as _w001431_rr,
|
||||
_elmhurst_worksheet_001431_rr8 as _w001431_rr8,
|
||||
_elmhurst_worksheet_001431_6035 as _w001431_6035,
|
||||
)
|
||||
from tests.domain.sap10_calculator.worksheet._elmhurst_fixtures import (
|
||||
ALL_FIXTURES as _ELMHURST_FIXTURES,
|
||||
|
|
@ -201,6 +202,21 @@ _FIXTURE_PINS: Final[dict[str, FixtureCascadePins]] = {
|
|||
lighting_kwh_per_yr=262.0885,
|
||||
pumps_fans_kwh_per_yr=86.0,
|
||||
),
|
||||
# Mapper-driven cohort entry — Summary_001431_6035.pdf → extractor →
|
||||
# mapper → calculator. Reproduces 6035's full floor geometry (Main
|
||||
# ground HLP 15.99 + first 8.32, asymmetric) and 8 windows. Residual
|
||||
# vs 6035 is two lodged inputs only (largest window orientation,
|
||||
# meter type). Pins at 1e-4 → 6035's +19 PE is lodged divergence.
|
||||
"001431_6035": FixtureCascadePins(
|
||||
sap_score=68, sap_score_continuous=68.1906, ecf=2.2802,
|
||||
total_fuel_cost_gbp=937.2341, co2_kg_per_yr=4682.3494,
|
||||
space_heating_kwh_per_yr=15745.3260,
|
||||
main_heating_fuel_kwh_per_yr=18744.4357,
|
||||
secondary_heating_fuel_kwh_per_yr=0.0,
|
||||
hot_water_kwh_per_yr=3307.8383,
|
||||
lighting_kwh_per_yr=262.0885,
|
||||
pumps_fans_kwh_per_yr=86.0,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -215,6 +231,7 @@ _FIXTURE_MODULES: Final[dict[str, ModuleType]] = {
|
|||
"001431": _w001431,
|
||||
"001431_rr": _w001431_rr,
|
||||
"001431_rr8": _w001431_rr8,
|
||||
"001431_6035": _w001431_6035,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue