Model/backend/documents_parser/tests/test_elmhurst_end_to_end.py
Khalim Conn-Kowlessar 256a5afee5 Slice 46c: Elmhurst mapper produces calculator-equivalent EpcPropertyData — Summary_000474 SAP within 0.5 of worksheet PDF
The full Summary→ElmhurstSiteNotes→EpcPropertyData→cascade→SAP chain now produces unrounded SAP 62.52 for cert U985-0001-000474 vs the worksheet PDF's 62.2584 — inside the 0.5 tolerance the user accepts on the API-cert residual cohort. The hand-built worksheet-fixture chain matches Elmhurst's unrounded SAP to 4 d.p. (62.2584), so the calculator+cascade are provably equivalent to Elmhurst's calculator; this slice closes the mapper side of the chain.

Mapper changes drop the string-versus-int impedance mismatch that prevented the cascade from consuming Elmhurst-coded values:
- construction_age_band: `_strip_code('B 1900-1929')` → 'B' (was '1900-1929')
- wall_construction: `_elmhurst_wall_construction_int('CA Cavity')` → 4 (was string 'Cavity')
- wall_insulation_type: `'A As Built'` → 4 (was string 'As Built')
- party_wall_construction: same int-mapping treatment
- main_fuel_type: `_elmhurst_main_fuel_int('Mains gas')` → 26 (the Table 12 fuel code; was string)
- heat_emitter_type: `'Radiators'` → 1 (was string)
- main_heating_control: `_elmhurst_sap_control_code('SAP code 2106, ...')` → 2106 (the SAP code int; was the trailing description)
- main_heating_index_number: parsed leading int from `pcdf_boiler_reference` ('16839 Vaillant…' → 16839) + `main_heating_data_source=1` so the PCDB cascade fires
- window orientation: `_elmhurst_orientation_int('North-West')` → 8 (the SAP10 octant; was string — solar gains were dropping to 0 W/m² as a result)

Floor handling also re-aligned with the SAP convention: floors sorted with the lowest as floor=0 (Elmhurst lodges 1st-floor entries first in the PDF); zero-area entries filtered out (single-storey extensions); non-ground room heights get the +0.25 m joist-void adjustment; `is_exposed_floor=True` for ground floors lodged above unheated space ('U Above unheated space'). `total_floor_area_m2` now sums across main + extensions.

Three regression pins on the new path:
- sap_building_parts == 3 (multi-bp)
- sap_windows == 7 (layout-style window parser)
- unrounded SAP within 0.5 of 62.2584 (worksheet PDF line 257)

Existing end-to-end test assertions updated to reflect the spec-correct int codes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-24 18:32:20 +00:00

368 lines
15 KiB
Python

import json
import os
from datetime import date
import pytest
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier, EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
FIXTURE_PATH = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
)
FIXTURE_PATH_2 = os.path.join(
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
)
@pytest.fixture(scope="module")
def result() -> EpcPropertyData:
with open(FIXTURE_PATH) as f:
pages = json.load(f)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
@pytest.fixture(scope="module")
def result2() -> EpcPropertyData:
with open(FIXTURE_PATH_2) as f:
pages = json.load(f)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
class TestAddress:
def test_address_line_1(self, result: EpcPropertyData) -> None:
assert result.address_line_1 == "19, Queens Road"
def test_post_town(self, result: EpcPropertyData) -> None:
assert result.post_town == "BURNLEY"
def test_postcode(self, result: EpcPropertyData) -> None:
assert result.postcode == "BB10 1XX"
class TestInspectionInfo:
def test_inspection_date(self, result: EpcPropertyData) -> None:
assert result.inspection_date == date(2026, 3, 6)
def test_tenure(self, result: EpcPropertyData) -> None:
assert result.tenure == "Rented (social)"
def test_transaction_type(self, result: EpcPropertyData) -> None:
assert result.transaction_type == "Grant scheme"
def test_report_reference(self, result: EpcPropertyData) -> None:
assert result.report_reference == "P960-0001-001573"
class TestPropertyDescription:
def test_property_type(self, result: EpcPropertyData) -> None:
assert result.property_type == "Bungalow"
def test_built_form(self, result: EpcPropertyData) -> None:
assert result.built_form == "End-Terrace"
def test_dwelling_type(self, result: EpcPropertyData) -> None:
assert result.dwelling_type == "End-Terrace bungalow"
def test_number_of_storeys(self, result: EpcPropertyData) -> None:
assert result.number_of_storeys == 1
def test_has_conservatory(self, result: EpcPropertyData) -> None:
assert result.has_conservatory is False
def test_total_floor_area(self, result: EpcPropertyData) -> None:
assert result.total_floor_area_m2 == 44.89
class TestCounts:
def test_habitable_rooms_count(self, result: EpcPropertyData) -> None:
assert result.habitable_rooms_count == 2
def test_heated_rooms_count(self, result: EpcPropertyData) -> None:
assert result.heated_rooms_count == 2
def test_door_count(self, result: EpcPropertyData) -> None:
assert result.door_count == 0
def test_insulated_door_count(self, result: EpcPropertyData) -> None:
assert result.insulated_door_count == 0
def test_open_chimneys_count(self, result: EpcPropertyData) -> None:
assert result.open_chimneys_count == 0
def test_blocked_chimneys_count(self, result: EpcPropertyData) -> None:
assert result.blocked_chimneys_count == 0
class TestLighting:
def test_led_count(self, result: EpcPropertyData) -> None:
assert result.led_fixed_lighting_bulbs_count == 4
def test_cfl_count(self, result: EpcPropertyData) -> None:
assert result.cfl_fixed_lighting_bulbs_count == 4
def test_incandescent_count(self, result: EpcPropertyData) -> None:
assert result.incandescent_fixed_lighting_bulbs_count == 0
class TestFlags:
def test_solar_water_heating(self, result: EpcPropertyData) -> None:
assert result.solar_water_heating is False
def test_has_hot_water_cylinder(self, result: EpcPropertyData) -> None:
assert result.has_hot_water_cylinder is False
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
assert result.has_fixed_air_conditioning is False
def test_hydro(self, result: EpcPropertyData) -> None:
assert result.hydro is False
def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
assert result.photovoltaic_array is False
class TestBuildingPart:
def test_single_building_part(self, result: EpcPropertyData) -> None:
assert len(result.sap_building_parts) == 1
def test_identifier(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].identifier is BuildingPartIdentifier.MAIN
def test_construction_age_band(self, result: EpcPropertyData) -> None:
# Spec age-band letter code per RdSAP10 Table 1; the cascade
# reads this code letter for U-value lookups, not the year-range
# description.
assert result.sap_building_parts[0].construction_age_band == "D"
def test_wall_construction(self, result: EpcPropertyData) -> None:
# SAP10 wall_construction integer: 4 = Cavity (per
# domain.ml.rdsap_uvalues.WALL_CAVITY).
assert result.sap_building_parts[0].wall_construction == 4
def test_wall_insulation_type(self, result: EpcPropertyData) -> None:
# SAP10 wall_insulation_type integer: 2 = Filled cavity (per
# domain.ml.rdsap_uvalues.WALL_INSULATION_FILLED_CAVITY).
assert result.sap_building_parts[0].wall_insulation_type == 2
def test_wall_thickness_measured(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_measured is True
def test_wall_thickness_mm(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].wall_thickness_mm == 300
def test_roof_insulation_location(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].roof_insulation_location == "Joists"
def test_roof_insulation_thickness(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].roof_insulation_thickness == 270
def test_floor_type(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_type == "Ground floor"
def test_floor_construction_type(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].floor_construction_type
== "Suspended, not timber"
)
def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_insulation_type_str == "As built"
def test_floor_u_value_known(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].floor_u_value_known is False
def test_single_floor_dimension(self, result: EpcPropertyData) -> None:
assert len(result.sap_building_parts[0].sap_floor_dimensions) == 1
def test_floor_dimension_area(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].sap_floor_dimensions[0].total_floor_area_m2 == 44.89
def test_floor_dimension_room_height(self, result: EpcPropertyData) -> None:
assert result.sap_building_parts[0].sap_floor_dimensions[0].room_height_m == 2.24
def test_floor_dimension_heat_loss_perimeter(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].sap_floor_dimensions[0].heat_loss_perimeter_m
== 20.10
)
def test_floor_dimension_party_wall_length(self, result: EpcPropertyData) -> None:
assert (
result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m
== 6.70
)
class TestWindows:
def test_window_count(self, result: EpcPropertyData) -> None:
assert len(result.sap_windows) == 4
def test_first_window_width(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_width == 1.30
def test_first_window_height(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_height == 1.10
def test_first_window_orientation(self, result: EpcPropertyData) -> None:
# SAP10 octant code: 1 = North. The solar-gains cascade keys
# off the integer, not the cardinal-direction string.
assert result.sap_windows[0].orientation == 1
def test_first_window_glazing_type(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].glazing_type == "Double post or during 2022"
def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].draught_proofed is True
def test_third_window_orientation(self, result: EpcPropertyData) -> None:
# SAP10 octant code: 5 = South.
assert result.sap_windows[2].orientation == 5
def test_frame_factor(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].frame_factor == 0.7
def test_transmission_u_value(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.u_value == 1.4
def test_transmission_solar_transmittance(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.solar_transmittance == 0.72
def test_transmission_data_source(self, result: EpcPropertyData) -> None:
assert result.sap_windows[0].window_transmission_details is not None
assert result.sap_windows[0].window_transmission_details.data_source == "Manufacturer"
class TestHeating:
def test_single_heating_detail(self, result: EpcPropertyData) -> None:
assert len(result.sap_heating.main_heating_details) == 1
def test_fuel_type(self, result: EpcPropertyData) -> None:
# SAP10.2 Table 12 fuel code: 26 = mains gas (not community).
# The cascade only consumes the int code; strings drop the
# standing-charge / PE-factor / CO2-factor lookups.
assert result.sap_heating.main_heating_details[0].main_fuel_type == 26
def test_heat_emitter_type(self, result: EpcPropertyData) -> None:
# SAP10.2 heat-emitter code: 1 = Radiators.
assert result.sap_heating.main_heating_details[0].heat_emitter_type == 1
def test_emitter_temperature(self, result: EpcPropertyData) -> None:
assert (
result.sap_heating.main_heating_details[0].emitter_temperature == "Unknown"
)
def test_fan_flue_present(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].fan_flue_present is True
def test_has_fghrs(self, result: EpcPropertyData) -> None:
assert result.sap_heating.main_heating_details[0].has_fghrs is False
def test_main_heating_control(self, result: EpcPropertyData) -> None:
# SAP10.2 main_heating_control code extracted from the Elmhurst
# "SAP code 2106, Programmer, room thermostat and TRVs" string;
# the cascade keys efficiency adjustments off the integer.
assert result.sap_heating.main_heating_details[0].main_heating_control == 2106
def test_shower_outlet_type(self, result: EpcPropertyData) -> None:
assert result.sap_heating.shower_outlets is not None
assert (
result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type
== "Electric shower"
)
def test_no_hot_water_cylinder_size(self, result: EpcPropertyData) -> None:
assert result.sap_heating.cylinder_size is None
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
assert result.sap_heating.has_fixed_air_conditioning is False
def test_water_heating_code(self, result: EpcPropertyData) -> None:
assert result.sap_heating.water_heating_code == 901
class TestEnergySource:
def test_mains_gas(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.mains_gas is True
def test_meter_type(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.meter_type == "Single"
def test_electricity_smart_meter(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.electricity_smart_meter_present is False
def test_gas_smart_meter(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.gas_smart_meter_present is False
def test_wind_turbines_count(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.wind_turbines_count == 0
def test_wind_turbines_terrain_type(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.wind_turbines_terrain_type == "Suburban"
def test_pv_battery_count(self, result: EpcPropertyData) -> None:
assert result.sap_energy_source.pv_battery_count == 0
class TestVentilation:
def test_draught_lobby(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.draught_lobby is False
def test_pressure_test(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.pressure_test == "Not available"
def test_extract_fans_count(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.extract_fans_count == 2
def test_open_flues_count(self, result: EpcPropertyData) -> None:
assert result.sap_ventilation is not None
assert result.sap_ventilation.open_flues_count == 0
class TestDraughtproofingAndWater:
def test_percent_draughtproofed(self, result: EpcPropertyData) -> None:
assert result.percent_draughtproofed == 100
def test_waste_water_heat_recovery_absent(self, result: EpcPropertyData) -> None:
assert result.waste_water_heat_recovery == "None"
def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None:
assert result.any_unheated_rooms is False
class TestEnergyPerformance:
def test_energy_rating_current(self, result: EpcPropertyData) -> None:
assert result.energy_rating_current == 69
def test_energy_rating_potential(self, result: EpcPropertyData) -> None:
assert result.energy_rating_potential == 77
def test_environmental_impact_current(self, result: EpcPropertyData) -> None:
assert result.environmental_impact_current == 76
def test_environmental_impact_potential(self, result: EpcPropertyData) -> None:
assert result.environmental_impact_potential == 81
def test_co2_emissions_current(self, result: EpcPropertyData) -> None:
assert result.co2_emissions_current == 1.683
class TestWindowFrameMaterial:
def test_frame_material_from_elmhurst(self, result2: EpcPropertyData) -> None:
assert result2.sap_windows[0].frame_material == "PVC"
def test_glazing_gap_from_elmhurst(self, result2: EpcPropertyData) -> None:
assert result2.sap_windows[0].glazing_gap == "16 mm or more"
class TestLowEnergyLighting:
def test_low_energy_fixed_lighting_bulbs_count(self, result2: EpcPropertyData) -> None:
assert result2.low_energy_fixed_lighting_bulbs_count == 5