mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Slice 54: Elmhurst mapper sets extensions_count from len(survey.extensions)
`from_elmhurst_site_notes` hard-coded `extensions_count=0` regardless of how many extensions the survey lodged. The 6 cohort certs from Slices 47-53 all happened to have 0-2 extensions whose count nothing load-bearing read, so this latent bug was invisible. Cert 001479 (Summary_001479.pdf, GOV.UK EPB cert 0535-9020-6509-0821-6222) has Main + Extension 1 + Extension 2 and is the first cohort cert with a real API counterpart — accurate `extensions_count` becomes load-bearing the moment the cross-mapper parity assertion compares API vs Elmhurst EpcPropertyData side by side. No SAP-cascade impact (the cascade iterates `sap_building_parts`, not `extensions_count`) — but a real data-integrity bug surfaced by the cross-mapper diff. Adds Summary_001479.pdf as a new chain-test fixture and `_SUMMARY_001479_PDF` constant for follow-up slices that will land per-bp ages, exposed floors, secondary-heating SAP codes, etc. All 9 chain tests green; 321 mapper/site-notes/rdsap tests green; pyright net-zero (35-error baseline preserved on mapper.py). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
a756114aed
commit
4427b58a44
3 changed files with 20 additions and 1 deletions
BIN
backend/documents_parser/tests/fixtures/Summary_001479.pdf
vendored
Normal file
BIN
backend/documents_parser/tests/fixtures/Summary_001479.pdf
vendored
Normal file
Binary file not shown.
|
|
@ -44,6 +44,7 @@ _SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf"
|
|||
_SUMMARY_000487_PDF = _FIXTURES / "Summary_000487.pdf"
|
||||
_SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf"
|
||||
_SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf"
|
||||
_SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf"
|
||||
|
||||
|
||||
def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
|
||||
|
|
@ -249,3 +250,21 @@ def test_summary_000490_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
|
|||
# Assert
|
||||
worksheet_unrounded_sap = 57.3979
|
||||
assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
|
||||
|
||||
|
||||
def test_summary_001479_mapper_extensions_count_matches_extension_bps() -> None:
|
||||
# Arrange — cert 0535-9020-6509-0821-6222 (Summary_001479) is the first
|
||||
# cohort cert with an actual GOV.UK API counterpart. Worksheet PDF
|
||||
# lodges Main + Extension 1 + Extension 2 (3 building parts, 2
|
||||
# extensions). Pre-slice the Elmhurst mapper hard-coded
|
||||
# `extensions_count=0` regardless of survey.extensions; this asserts
|
||||
# the count flows through.
|
||||
pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
|
||||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||||
|
||||
# Act
|
||||
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||||
|
||||
# Assert
|
||||
assert epc.extensions_count == 2
|
||||
assert len(epc.sap_building_parts) == 3
|
||||
|
|
|
|||
|
|
@ -309,7 +309,7 @@ class EpcPropertyDataMapper:
|
|||
has_hot_water_cylinder=survey.water_heating.hot_water_cylinder_present,
|
||||
has_fixed_air_conditioning=survey.ventilation.fixed_space_cooling,
|
||||
wet_rooms_count=0,
|
||||
extensions_count=0,
|
||||
extensions_count=len(survey.extensions),
|
||||
heated_rooms_count=survey.heated_habitable_rooms,
|
||||
open_chimneys_count=survey.ventilation.open_chimneys_count,
|
||||
habitable_rooms_count=survey.habitable_rooms,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue