Slice 49: Summary_000490 chain pins SAP at 1e-4; secondary heating + RdSAP sheltered-sides

Two mapper extensions, both validated by 000490 closing to 1e-4:

1. Secondary heating extraction. Elmhurst Summary PDFs lodge the
   secondary heating SAP code in the §14.1 Main Heating2 sub-section
   (between "14.1 Main Heating2" and "14.1 Community Heating") — not
   in the §14.0 Main Heating1 block where the main system lives.
   `ElmhurstMainHeating` gains a `secondary_heating_sap_code` field;
   the extractor reads it from the right section; the mapper threads
   it through to `SapHeating.secondary_heating_type`. The cascade
   then applies Table 11's 10% secondary fraction.

2. Sheltered-sides derivation per RdSAP §S5. The Summary PDF doesn't
   lodge per-dwelling sheltered-sides; the value is derived from
   built-form (Detached=0, Semi-Detached=1, End-Terrace=1, Mid-
   Terrace=2, Enclosed Mid-Terrace=3, Enclosed End-Terrace=2).
   `_map_elmhurst_ventilation` now takes built_form and populates
   `SapVentilation.sheltered_sides`. The table is cross-checked
   against U985-0001-NNNNNN.pdf line (19) across the 6 worksheet
   fixtures.

Cohort SAP deltas after this slice (target 1e-4):

  000474   0.0000  ✓ Slice 47
  000477  +2.6555     diagnosis pending (lighting bulb count diff)
  000480  +4.1955     diagnosis pending
  000487  +4.4553     extractor still drops most windows
  000490   0.0000  ✓ THIS SLICE
  000516  +1.5162     roof-window separation

Pyright net-zero on touched files (35 errors, same baseline). 755
tests pass (up from 754 — new `test_summary_000490_full_chain_sap_
matches_worksheet_pdf_exactly`).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Khalim Conn-Kowlessar 2026-05-24 20:13:19 +00:00
parent 00a27efd87
commit 7f17de84aa
4 changed files with 71 additions and 2 deletions

View file

@ -764,6 +764,20 @@ class ElmhurstSiteNotesExtractor:
lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2")
pct_raw = self._local_val(lines, "Percentage of Heat")
pct = int(pct_raw.split()[0]) if pct_raw else 0
# The "Secondary Heating SapCode" key is lodged inside §14.1 Main
# Heating2 — Elmhurst uses the Main-2 block to also carry the
# cert's secondary heating system (when one exists). Look for it
# in that section; absence (or "0") means no secondary lodged.
secondary_lines = self._section_lines(
"14.1 Main Heating2", "14.1 Community Heating"
)
secondary_raw = self._local_val(secondary_lines, "Secondary Heating SapCode")
secondary_code = (
int(secondary_raw)
if secondary_raw is not None and secondary_raw.isdigit()
and int(secondary_raw) > 0
else None
)
return MainHeating(
heat_emitter=self._local_str(lines, "Heat Emitter"),
fuel_type=self._local_str(lines, "Fuel Type"),
@ -775,6 +789,7 @@ class ElmhurstSiteNotesExtractor:
percentage_of_heat=pct,
pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"),
heat_pump_age=self._local_val(lines, "Heat pump age"),
secondary_heating_sap_code=secondary_code,
)
def _extract_meters(self) -> Meters:

View file

@ -39,6 +39,7 @@ from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
_FIXTURES = Path(__file__).parent / "fixtures"
_SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf"
_SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf"
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
@ -136,3 +137,24 @@ def test_summary_000474_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Elmhurst exactly and we expect identical outputs.
worksheet_unrounded_sap = 62.2584
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
def test_summary_000490_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
# Arrange — cert U985-0001-000490 is an end-terrace with main +
# 1st extension. The worksheet PDF lodges unrounded SAP 57.3979.
# End-terrace built-form drives sheltered_sides=1 (RdSAP §S5) and
# the cert's Summary §14.1 Main Heating2 sub-section carries a
# secondary heating SAP code (691, electric panel) — both required
# for the mapped chain to reproduce the worksheet to 1e-4.
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000490_PDF)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Act
result = calculate_sap_from_inputs(
cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
)
# Assert
worksheet_unrounded_sap = 57.3979
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4

View file

@ -320,7 +320,7 @@ class EpcPropertyDataMapper:
number_of_storeys=survey.number_of_storeys,
hydro=survey.renewables.hydro_electricity_generated_kwh > 0,
photovoltaic_array=survey.renewables.photovoltaic_panel != "None",
sap_ventilation=_map_elmhurst_ventilation(survey.ventilation),
sap_ventilation=_map_elmhurst_ventilation(survey.ventilation, built_form),
percent_draughtproofed=survey.draught_proofing_percent,
waste_water_heat_recovery=(
"None" if not survey.renewables.wwhrs_present else "Present"
@ -2302,10 +2302,36 @@ def _map_elmhurst_sap_heating(survey: ElmhurstSiteNotes) -> SapHeating:
else survey.water_heating.water_heating_code
),
water_heating_code=survey.water_heating.water_heating_sap_code,
secondary_heating_type=mh.secondary_heating_sap_code,
)
def _map_elmhurst_ventilation(v: ElmhurstVentilation) -> SapVentilation:
# RdSAP §S5 — default sheltered-sides count for each built-form code,
# applied when the cert doesn't lodge a per-dwelling value (Elmhurst's
# Summary PDF doesn't). The cohort uses the RdSAP convention: terraces
# pick up exposure-shielding from neighbours; detached/semi-detached
# don't. Values cross-checked against U985-0001-NNNNNN.pdf line (19)
# across the 6 Elmhurst worksheet fixtures.
_ELMHURST_SHELTERED_SIDES_BY_BUILT_FORM: Dict[str, int] = {
"Detached": 0,
"Semi-Detached": 1,
"End-Terrace": 1,
"Mid-Terrace": 2,
"Enclosed End-Terrace": 2,
"Enclosed Mid-Terrace": 3,
}
def _elmhurst_sheltered_sides(built_form: str) -> Optional[int]:
"""Default sheltered-sides count per RdSAP §S5 based on the cert's
built-form. Returns None when the form isn't recognised so the
cascade applies its own default (currently 2)."""
return _ELMHURST_SHELTERED_SIDES_BY_BUILT_FORM.get(built_form)
def _map_elmhurst_ventilation(
v: ElmhurstVentilation, built_form: str
) -> SapVentilation:
return SapVentilation(
ventilation_type=None,
draught_lobby=v.draught_lobby != "Not present",
@ -2318,4 +2344,5 @@ def _map_elmhurst_ventilation(v: ElmhurstVentilation) -> SapVentilation:
passive_vents_count=v.passive_vents_count,
flueless_gas_fires_count=v.flueless_gas_fires_count,
ventilation_in_pcdf_database=None,
sheltered_sides=_elmhurst_sheltered_sides(built_form),
)

View file

@ -140,6 +140,11 @@ class MainHeating:
None # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%"
)
heat_pump_age: Optional[str] = None
# Section 14.0 also lodges a secondary heating system (when one is
# installed). The SAP code is the integer the cascade reads via
# `SapHeating.secondary_heating_type` to apply the Table 11
# secondary-fraction split; None when no secondary is lodged.
secondary_heating_sap_code: Optional[int] = None
@dataclass