mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Slice S0380.16: add 'Normal' → cylinder_size=2 (110 L) for cohort 2
Unblocks two 38-cert-cohort certs that previously raised
`UnmappedElmhurstLabel("cylinder_size", 'Normal')` at extraction:
cert 2536-2525-0600-0788-2292 ws SAP=79.7264
cert 9421-3045-3205-1646-6200 ws SAP=87.4495
Both Summary §15.1 lodgements read "Cylinder Size: Normal"; both dr87
worksheets lodge line ref (47) "Store volume = 110.0000" L (extracted
from `Hot Water Cylinder → Cylinder Volume 110.00`). RdSAP 10 §10.5
Table 28 documents the "Normal (90-130 litres)" descriptor whose
midpoint is 110 L — the canonical Elmhurst label string in
`datatypes/epc/surveys/elmhurst_site_notes.py` is "Normal (90-130
litres)", and the worksheet's exact 110 L matches the midpoint.
Two-line fix:
+ "Normal": 2, in `_ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10`
+ 2: 110.0, in `_CYLINDER_SIZE_CODE_TO_LITRES`
The cascade enum 2 is consistent with the existing
`cert_to_inputs.py` docstring's documented (but not-yet-observed)
code 2 → Normal slot, alongside code 3 (Medium / 160 L) and code 4
(Large / 210 L) added in earlier slices.
Slice keeps tight: two mapping unit tests pinning `cylinder_size == 2`
for both certs at extraction. Post-fix the first-attempt cascade
deltas vs worksheet are:
cert 2536 Δ +0.0244 (was: RAISES)
cert 9421 Δ +0.0296 (was: RAISES)
Both deltas now sit in the same systematic +0.02..+0.07 small-gap
band as ~12 other first-attempt certs in cohort 2 — chain test +
±0.07 pin would just paper over a known systematic residual that the
user has explicitly asked to drive towards 1e-4, not toward ±0.07.
Following slice will investigate the shared systematic offset and
close cert 2536 / 9421 along with the rest of the +0.04 band on
the chain.
Pyright net-zero per file:
- datatypes/epc/domain/mapper.py: 32 (baseline 32)
- domain/sap10_calculator/rdsap/cert_to_inputs.py: 35 (baseline 35)
- backend/documents_parser/tests/test_summary_pdf_mapper_chain.py: 0
Regression baseline: 691 pass + 10 fail (= prior 689 + 10 + 2 new GREEN).
Spec refs:
- RdSAP 10 §10.5 Table 28 — "Cylinder Volume" Normal band 90-130 L,
midpoint 110 L (also the canonical Elmhurst label suffix).
- Cert 2536 worksheet `dr87-0001-000889.pdf` line ref (47) = 110.0000.
- Cert 9421 worksheet `dr87-0001-000884.pdf` line ref (47) = 110.0000.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
c732c21836
commit
69668ec634
5 changed files with 49 additions and 8 deletions
BIN
backend/documents_parser/tests/fixtures/Summary_000884.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_000884.pdf
vendored
Normal file
Binary file not shown.
BIN
backend/documents_parser/tests/fixtures/Summary_000889.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_000889.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -70,6 +70,8 @@ _SUMMARY_000904_PDF = _FIXTURES / "Summary_000904.pdf" # cert 9285
|
|||
_SUMMARY_000900_PDF = _FIXTURES / "Summary_000900.pdf" # cert 2225
|
||||
_SUMMARY_000898_PDF = _FIXTURES / "Summary_000898.pdf" # cert 2636
|
||||
_SUMMARY_000902_PDF = _FIXTURES / "Summary_000902.pdf" # cert 9418
|
||||
_SUMMARY_000889_PDF = _FIXTURES / "Summary_000889.pdf" # cert 2536 (Normal cylinder)
|
||||
_SUMMARY_000884_PDF = _FIXTURES / "Summary_000884.pdf" # cert 9421 (Normal cylinder)
|
||||
|
||||
# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the
|
||||
# Summary_001479.pdf fixture. Together they drive the API ≡ Summary
|
||||
|
|
@ -876,6 +878,39 @@ def test_all_seven_ashp_cohort_certs_extract_without_unmapped_label_raise() -> N
|
|||
EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
|
||||
def test_summary_2536_normal_cylinder_routes_to_code_2() -> None:
|
||||
# Arrange — cert 2536-2525-0600-0788-2292's Summary §15.1 lodges
|
||||
# "Cylinder Size: Normal". The dr87 worksheet lodges "Cylinder
|
||||
# Volume 110.00" L on line ref (47); the cascade lookup
|
||||
# `_CYLINDER_SIZE_CODE_TO_LITRES` now maps code 2 → 110 L per
|
||||
# RdSAP 10 §10.5 Table 28's Normal (90-130 L) band midpoint.
|
||||
# First cohort cert to exercise the "Normal" cylinder lodging.
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000889_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
|
||||
# Act
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Assert
|
||||
assert epc.sap_heating.cylinder_size == 2
|
||||
|
||||
|
||||
def test_summary_9421_normal_cylinder_routes_to_code_2() -> None:
|
||||
# Arrange — cert 9421-3045-3205-1646-6200's Summary §15.1 also
|
||||
# lodges "Cylinder Size: Normal" (same 110 L cylinder as cert
|
||||
# 2536). Second cohort cert exercising the "Normal" mapping —
|
||||
# pinned to guard against silent regression of either the mapper
|
||||
# dict entry OR the cascade volume default.
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000884_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
|
||||
# Act
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Assert
|
||||
assert epc.sap_heating.cylinder_size == 2
|
||||
|
||||
|
||||
def test_summary_9418_large_cylinder_routes_to_code_4() -> None:
|
||||
# Arrange — cert 9418-3062-8205-3566-7200's Summary §15.1 lodges
|
||||
# "Cylinder Size: Large". The dr87 worksheet lodges "Cylinder
|
||||
|
|
|
|||
|
|
@ -3401,12 +3401,13 @@ class UnmappedElmhurstLabel(ValueError):
|
|||
|
||||
# Elmhurst Summary §15.1 "Cylinder Size" labels mapped to the SAP10
|
||||
# cascade enum that `domain/sap10_calculator/rdsap/cert_to_inputs.py`
|
||||
# `_CYLINDER_SIZE_CODE_TO_LITRES` keys ({3: 160.0, 4: 210.0}). Exercised
|
||||
# by the cohort: "Medium" (cert 0380 et al — 160 L) and "Large" (cert
|
||||
# 9418 — 210 L). "Small" and "Very Large" labels are deferred until a
|
||||
# fixture exercises them — when encountered they raise
|
||||
# `UnmappedElmhurstLabel` rather than silently returning None.
|
||||
# `_CYLINDER_SIZE_CODE_TO_LITRES` keys. Exercised by the cohort:
|
||||
# "Normal" (certs 2536, 9421 — 110 L), "Medium" (cert 0380 et al —
|
||||
# 160 L) and "Large" (cert 9418 — 210 L). "Small" and "Very Large"
|
||||
# labels are deferred until a fixture exercises them — when encountered
|
||||
# they raise `UnmappedElmhurstLabel` rather than silently returning None.
|
||||
_ELMHURST_CYLINDER_SIZE_LABEL_TO_SAP10: Dict[str, int] = {
|
||||
"Normal": 2,
|
||||
"Medium": 3,
|
||||
"Large": 4,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1869,13 +1869,18 @@ _TABLE_3A_COMBI_LOSS_MAIN_HEATING_CATEGORIES: Final[frozenset[int]] = frozenset(
|
|||
# RdSAP 10 §10.5 Table 28: lodged "Cylinder size" descriptors → SAP
|
||||
# calculation litres. The Open EPC API encodes the descriptor as an
|
||||
# integer per the cohort below (ground-truthed against worksheet (47)
|
||||
# line refs in /sap worksheets/Additional data with api/<cert>/dr87-*.pdf):
|
||||
# line refs in /sap worksheets/Additional data with api/<cert>/dr87-*.pdf
|
||||
# and /sap worksheets/additional with api 2/<cert>/dr87-*.pdf):
|
||||
# code 1 → no cylinder (gated via `has_hot_water_cylinder`)
|
||||
# code 2 → Normal (110 litres) (certs 2536, 9421 — worksheet (47)
|
||||
# lodges 110.0)
|
||||
# code 3 → Medium (160 litres) (certs 0350, 0380, 2225, 2636,
|
||||
# 3800, 9285)
|
||||
# code 4 → Large (210 litres) (cert 9418)
|
||||
# Codes 2 / 5 / 6 (Normal / Inaccessible / Exact) not yet observed.
|
||||
_CYLINDER_SIZE_CODE_TO_LITRES: Final[dict[int, float]] = {3: 160.0, 4: 210.0}
|
||||
# Codes 5 / 6 (Inaccessible / Exact) not yet observed.
|
||||
_CYLINDER_SIZE_CODE_TO_LITRES: Final[dict[int, float]] = {
|
||||
2: 110.0, 3: 160.0, 4: 210.0
|
||||
}
|
||||
|
||||
# RdSAP 10 §10.5 code 7-11: cylinder insulation type. Empirical mapping
|
||||
# from the ASHP cohort (all 7 certs lodge code 1, worksheet shows
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue