mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Closes a systematic +0.02..+0.07 SAP over-prediction on every triple-
glazed cert in cohort 2 (13 of 38) and removes a silent-default
failure mode flagged via cert 3336-2825-9400-0512-8292 (+0.0674 Δ).
Root cause: `_map_elmhurst_window` (datatypes/epc/domain/mapper.py)
was passing the Elmhurst-lodged glazing-type string verbatim into
`SapWindow.glazing_type` (declared `Union[int, str]`). The §5 (66)..
(67) daylight-factor cascade at
`domain/sap10_calculator/worksheet/internal_gains.py:512` requires
`isinstance(w.glazing_type, int)` to look up Table 6b col light g_L —
string lodgings silently fell through to the `_G_LIGHT_DEFAULT = 0.80`
(double-glazed) branch. Cert 3336 (Triple glazed, worksheet "Window,
Triple glazed") got g_L = 0.80 instead of the correct 0.70, inflating
C_daylight from 1.072 to 1.041 → lighting kWh under-predicted by
−4.53 kWh/yr → total fuel cost under by −1.17 GBP → ECF Δ −0.0049 →
SAP continuous over by +0.0674.
Fix: `_ELMHURST_GLAZING_LABEL_TO_SAP10` dict + `_elmhurst_glazing_
type_code` helper translate the Elmhurst Summary §11 lodged strings
to the SAP 10.2 Table U2 integer codes the cascade keys on:
"Single" → 1
"Double pre 2002" → 2
"Double between 2002 and 2021" → 3
"Double with unknown install date" → 3
"Double with unknown 16 mm or install date more" → 3
"Double post or during 2022" → 5
"Triple post or during 2022" → 6
"Triple post or during" → 6 (year-trunc.)
"Secondary" → 7
Two regex passes strip the layout noise the extractor sometimes folds
into the glazing-type token: a `(?:Part )?value value Proofed Shutters`
prefix (from adjacent column headers) and a ` Summary Information` /
` Alternative wall…` suffix. Verified against the union of cohort-1
(7 certs) + cohort-2 (38 certs) + test-fixture (9 PDFs) glazing
labels: 18 distinct surface forms, all closed by the dict + noise
patterns; one window in cert 2636's Summary_000898.pdf lodged the
year-truncated "Triple post or during" — added as an alias for code 6
per worksheet "Triple glazed" lodging.
Strict-enum gate: `_elmhurst_glazing_type_code` raises
`UnmappedElmhurstLabel("glazing_type", label)` (Slice S0380.15
pattern, extended to the new helper) when the label is None or not
in the dict — surfaces mapper-coverage gaps at extraction time rather
than masking them as a SAP precision floor.
Cohort-2 Summary-path delta progression (38 certs):
bucket before slice 2 after slice 2
exact (<1e-4) 11 11
<0.005 0 5 ← 9421 +0.0012, 2536 +0.0016, 9370 +0.0017, 0100 +0.0028, 2800 +0.0044
0.005-0.07 15 10 ← all triple-glazed
0.07-0.5 5 5
0.5-1 4 4
1-5 1 1
5+ 2 2
RAISES 0 0
3336 (user's flag) closes from +0.0674 → +0.0400 — the residual is
the remaining systematic offset the next slice will investigate.
Tests added (3):
- `test_summary_3336_triple_glazed_windows_route_to_code_6` — pins
the mapper output for the user's flagged cert.
- `test_summary_000474_double_glazed_windows_route_to_code_3` —
exercises the DG branch + the year-unknown alias mapping.
- `test_summary_mapper_raises_on_unmapped_glazing_type_label` —
strict-enum coverage gate via mutated site notes.
Tests updated (1):
- `test_first_window_glazing_type` (test_elmhurst_end_to_end.py):
asserts int code 5 (DG low-E argon — "Double post or during 2022")
not the string verbatim. The string-passthrough behaviour was
always a latent bug; this test was the only direct pin on it.
Pyright net-zero per file:
- datatypes/epc/domain/mapper.py: 32 (baseline 32)
- backend/documents_parser/tests/test_summary_pdf_mapper_chain.py: 0
- backend/documents_parser/tests/test_elmhurst_end_to_end.py: 0
Regression baseline: 694 pass + 10 fail (= prior 691 + 10 + 3 new).
Triple-glazed original-cohort certs are now closer to worksheet too;
the ±0.07 chain tests on the original cohort still hold, and a future
slice tightens them once the next-largest residual is closed.
Spec refs:
- SAP 10.2 Table U2 — glazing-type integer enum.
- SAP 10.2 Table 6b col light — light-transmission g_L by glazing
type (triple 0.70, double-glazed variants 0.80, single 0.90).
- RdSAP 10 §11 Windows — Summary lodging of glazing type as a
type+install-date phrase.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
388 lines
16 KiB
Python
388 lines
16 KiB
Python
import json
|
||
import os
|
||
from datetime import date
|
||
|
||
import pytest
|
||
|
||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||
from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier, EpcPropertyData
|
||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||
|
||
FIXTURE_PATH = os.path.join(
|
||
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
|
||
)
|
||
FIXTURE_PATH_2 = os.path.join(
|
||
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
|
||
)
|
||
|
||
|
||
@pytest.fixture(scope="module")
|
||
def result() -> EpcPropertyData:
|
||
with open(FIXTURE_PATH) as f:
|
||
pages = json.load(f)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
|
||
@pytest.fixture(scope="module")
|
||
def result2() -> EpcPropertyData:
|
||
with open(FIXTURE_PATH_2) as f:
|
||
pages = json.load(f)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
|
||
class TestAddress:
|
||
def test_address_line_1(self, result: EpcPropertyData) -> None:
|
||
assert result.address_line_1 == "19, Queens Road"
|
||
|
||
def test_post_town(self, result: EpcPropertyData) -> None:
|
||
assert result.post_town == "BURNLEY"
|
||
|
||
def test_postcode(self, result: EpcPropertyData) -> None:
|
||
assert result.postcode == "BB10 1XX"
|
||
|
||
|
||
class TestInspectionInfo:
|
||
def test_inspection_date(self, result: EpcPropertyData) -> None:
|
||
assert result.inspection_date == date(2026, 3, 6)
|
||
|
||
def test_tenure(self, result: EpcPropertyData) -> None:
|
||
assert result.tenure == "Rented (social)"
|
||
|
||
def test_transaction_type(self, result: EpcPropertyData) -> None:
|
||
assert result.transaction_type == "Grant scheme"
|
||
|
||
def test_report_reference(self, result: EpcPropertyData) -> None:
|
||
assert result.report_reference == "P960-0001-001573"
|
||
|
||
|
||
class TestPropertyDescription:
|
||
def test_property_type(self, result: EpcPropertyData) -> None:
|
||
assert result.property_type == "Bungalow"
|
||
|
||
def test_built_form(self, result: EpcPropertyData) -> None:
|
||
assert result.built_form == "End-Terrace"
|
||
|
||
def test_dwelling_type(self, result: EpcPropertyData) -> None:
|
||
assert result.dwelling_type == "End-Terrace bungalow"
|
||
|
||
def test_number_of_storeys(self, result: EpcPropertyData) -> None:
|
||
assert result.number_of_storeys == 1
|
||
|
||
def test_has_conservatory(self, result: EpcPropertyData) -> None:
|
||
assert result.has_conservatory is False
|
||
|
||
def test_total_floor_area(self, result: EpcPropertyData) -> None:
|
||
assert result.total_floor_area_m2 == 44.89
|
||
|
||
|
||
class TestCounts:
|
||
def test_habitable_rooms_count(self, result: EpcPropertyData) -> None:
|
||
assert result.habitable_rooms_count == 2
|
||
|
||
def test_heated_rooms_count(self, result: EpcPropertyData) -> None:
|
||
assert result.heated_rooms_count == 2
|
||
|
||
def test_door_count(self, result: EpcPropertyData) -> None:
|
||
assert result.door_count == 0
|
||
|
||
def test_insulated_door_count(self, result: EpcPropertyData) -> None:
|
||
assert result.insulated_door_count == 0
|
||
|
||
def test_open_chimneys_count(self, result: EpcPropertyData) -> None:
|
||
assert result.open_chimneys_count == 0
|
||
|
||
def test_blocked_chimneys_count(self, result: EpcPropertyData) -> None:
|
||
assert result.blocked_chimneys_count == 0
|
||
|
||
|
||
class TestLighting:
|
||
def test_led_count(self, result: EpcPropertyData) -> None:
|
||
assert result.led_fixed_lighting_bulbs_count == 4
|
||
|
||
def test_cfl_count(self, result: EpcPropertyData) -> None:
|
||
assert result.cfl_fixed_lighting_bulbs_count == 4
|
||
|
||
def test_incandescent_count(self, result: EpcPropertyData) -> None:
|
||
assert result.incandescent_fixed_lighting_bulbs_count == 0
|
||
|
||
|
||
class TestFlags:
|
||
def test_solar_water_heating(self, result: EpcPropertyData) -> None:
|
||
assert result.solar_water_heating is False
|
||
|
||
def test_has_hot_water_cylinder(self, result: EpcPropertyData) -> None:
|
||
assert result.has_hot_water_cylinder is False
|
||
|
||
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
|
||
assert result.has_fixed_air_conditioning is False
|
||
|
||
def test_hydro(self, result: EpcPropertyData) -> None:
|
||
assert result.hydro is False
|
||
|
||
def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
|
||
assert result.photovoltaic_array is False
|
||
|
||
|
||
class TestBuildingPart:
|
||
def test_single_building_part(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_building_parts) == 1
|
||
|
||
def test_identifier(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].identifier is BuildingPartIdentifier.MAIN
|
||
|
||
def test_construction_age_band(self, result: EpcPropertyData) -> None:
|
||
# Spec age-band letter code per RdSAP10 Table 1; the cascade
|
||
# reads this code letter for U-value lookups, not the year-range
|
||
# description.
|
||
assert result.sap_building_parts[0].construction_age_band == "D"
|
||
|
||
def test_wall_construction(self, result: EpcPropertyData) -> None:
|
||
# SAP10 wall_construction integer: 4 = Cavity (per
|
||
# domain.sap10_ml.rdsap_uvalues.WALL_CAVITY).
|
||
assert result.sap_building_parts[0].wall_construction == 4
|
||
|
||
def test_wall_insulation_type(self, result: EpcPropertyData) -> None:
|
||
# SAP10 wall_insulation_type integer: 2 = Filled cavity (per
|
||
# domain.sap10_ml.rdsap_uvalues.WALL_INSULATION_FILLED_CAVITY).
|
||
assert result.sap_building_parts[0].wall_insulation_type == 2
|
||
|
||
def test_wall_thickness_measured(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].wall_thickness_measured is True
|
||
|
||
def test_wall_thickness_mm(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].wall_thickness_mm == 300
|
||
|
||
def test_roof_insulation_location(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].roof_insulation_location == "Joists"
|
||
|
||
def test_roof_insulation_thickness(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].roof_insulation_thickness == 270
|
||
|
||
def test_floor_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].floor_type == "Ground floor"
|
||
|
||
def test_floor_construction_type(self, result: EpcPropertyData) -> None:
|
||
assert (
|
||
result.sap_building_parts[0].floor_construction_type
|
||
== "Suspended, not timber"
|
||
)
|
||
|
||
def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].floor_insulation_type_str == "As built"
|
||
|
||
def test_floor_u_value_known(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].floor_u_value_known is False
|
||
|
||
def test_single_floor_dimension(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_building_parts[0].sap_floor_dimensions) == 1
|
||
|
||
def test_floor_dimension_area(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].sap_floor_dimensions[0].total_floor_area_m2 == 44.89
|
||
|
||
def test_floor_dimension_room_height(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].sap_floor_dimensions[0].room_height_m == 2.24
|
||
|
||
def test_floor_dimension_heat_loss_perimeter(self, result: EpcPropertyData) -> None:
|
||
assert (
|
||
result.sap_building_parts[0].sap_floor_dimensions[0].heat_loss_perimeter_m
|
||
== 20.10
|
||
)
|
||
|
||
def test_floor_dimension_party_wall_length(self, result: EpcPropertyData) -> None:
|
||
assert (
|
||
result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m
|
||
== 6.70
|
||
)
|
||
|
||
|
||
class TestWindows:
|
||
def test_window_count(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_windows) == 4
|
||
|
||
def test_first_window_area(self, result: EpcPropertyData) -> None:
|
||
# The Elmhurst mapper lodges the Summary PDF's precomputed Area
|
||
# (1.30 × 1.10 = 1.43 m²) as `window_width × 1.0` to avoid the
|
||
# 2-d.p. round-trip drift that W × H reintroduces. The cascade
|
||
# reads only the product, so flattening to (area, 1.0) is
|
||
# behaviourally equivalent to (1.30, 1.10) modulo precision.
|
||
w = result.sap_windows[0]
|
||
assert w.window_width * w.window_height == 1.43
|
||
|
||
def test_first_window_height(self, result: EpcPropertyData) -> None:
|
||
# See `test_first_window_area` — the mapper normalises height
|
||
# to 1.0 so the lodged Area can be carried as the canonical
|
||
# geometry without re-multiplying.
|
||
assert result.sap_windows[0].window_height == 1.0
|
||
|
||
def test_first_window_orientation(self, result: EpcPropertyData) -> None:
|
||
# SAP10 octant code: 1 = North. The solar-gains cascade keys
|
||
# off the integer, not the cardinal-direction string.
|
||
assert result.sap_windows[0].orientation == 1
|
||
|
||
def test_first_window_glazing_type(self, result: EpcPropertyData) -> None:
|
||
# SAP 10.2 Table U2 glazing-type code: 5 = double glazed (low-E
|
||
# argon). The Elmhurst Summary's "Double post or during 2022"
|
||
# label maps to code 5 via `_ELMHURST_GLAZING_LABEL_TO_SAP10` —
|
||
# the §5 daylight factor + §6 solar gains key off the integer
|
||
# not the string.
|
||
assert result.sap_windows[0].glazing_type == 5
|
||
|
||
def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].draught_proofed is True
|
||
|
||
def test_third_window_orientation(self, result: EpcPropertyData) -> None:
|
||
# SAP10 octant code: 5 = South.
|
||
assert result.sap_windows[2].orientation == 5
|
||
|
||
def test_frame_factor(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].frame_factor == 0.7
|
||
|
||
def test_transmission_u_value(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].window_transmission_details is not None
|
||
assert result.sap_windows[0].window_transmission_details.u_value == 1.4
|
||
|
||
def test_transmission_solar_transmittance(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].window_transmission_details is not None
|
||
assert result.sap_windows[0].window_transmission_details.solar_transmittance == 0.72
|
||
|
||
def test_transmission_data_source(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].window_transmission_details is not None
|
||
assert result.sap_windows[0].window_transmission_details.data_source == "Manufacturer"
|
||
|
||
|
||
class TestHeating:
|
||
def test_single_heating_detail(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_heating.main_heating_details) == 1
|
||
|
||
def test_fuel_type(self, result: EpcPropertyData) -> None:
|
||
# SAP10.2 Table 12 fuel code: 26 = mains gas (not community).
|
||
# The cascade only consumes the int code; strings drop the
|
||
# standing-charge / PE-factor / CO2-factor lookups.
|
||
assert result.sap_heating.main_heating_details[0].main_fuel_type == 26
|
||
|
||
def test_heat_emitter_type(self, result: EpcPropertyData) -> None:
|
||
# SAP10.2 heat-emitter code: 1 = Radiators.
|
||
assert result.sap_heating.main_heating_details[0].heat_emitter_type == 1
|
||
|
||
def test_emitter_temperature(self, result: EpcPropertyData) -> None:
|
||
# The Elmhurst Summary §14 lodges "Design flow temperature: Unknown"
|
||
# for this cert. `_elmhurst_emitter_temperature_int` (mapper.py)
|
||
# converts that to SAP10.2 Table 4d code 1 (high-temp / ≥45 °C —
|
||
# the worst-case assumption for an unmeasured gas boiler). This
|
||
# int encoding mirrors the API mapper's `MainHeatingDetail.
|
||
# emitter_temperature` for cross-mapper field parity; the older
|
||
# behaviour of surfacing the raw "Unknown" string was replaced
|
||
# when the int conversion landed.
|
||
assert result.sap_heating.main_heating_details[0].emitter_temperature == 1
|
||
|
||
def test_fan_flue_present(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.main_heating_details[0].fan_flue_present is True
|
||
|
||
def test_has_fghrs(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.main_heating_details[0].has_fghrs is False
|
||
|
||
def test_main_heating_control(self, result: EpcPropertyData) -> None:
|
||
# SAP10.2 main_heating_control code extracted from the Elmhurst
|
||
# "SAP code 2106, Programmer, room thermostat and TRVs" string;
|
||
# the cascade keys efficiency adjustments off the integer.
|
||
assert result.sap_heating.main_heating_details[0].main_heating_control == 2106
|
||
|
||
def test_shower_outlet_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.shower_outlets is not None
|
||
assert (
|
||
result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type
|
||
== "Electric shower"
|
||
)
|
||
|
||
def test_no_hot_water_cylinder_size(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.cylinder_size is None
|
||
|
||
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.has_fixed_air_conditioning is False
|
||
|
||
def test_water_heating_code(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.water_heating_code == 901
|
||
|
||
|
||
class TestEnergySource:
|
||
def test_mains_gas(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.mains_gas is True
|
||
|
||
def test_meter_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.meter_type == "Single"
|
||
|
||
def test_electricity_smart_meter(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.electricity_smart_meter_present is False
|
||
|
||
def test_gas_smart_meter(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.gas_smart_meter_present is False
|
||
|
||
def test_wind_turbines_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.wind_turbines_count == 0
|
||
|
||
def test_wind_turbines_terrain_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.wind_turbines_terrain_type == "Suburban"
|
||
|
||
def test_pv_battery_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.pv_battery_count == 0
|
||
|
||
|
||
class TestVentilation:
|
||
def test_draught_lobby(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.draught_lobby is False
|
||
|
||
def test_pressure_test(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.pressure_test == "Not available"
|
||
|
||
def test_extract_fans_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.extract_fans_count == 2
|
||
|
||
def test_open_flues_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.open_flues_count == 0
|
||
|
||
|
||
class TestDraughtproofingAndWater:
|
||
def test_percent_draughtproofed(self, result: EpcPropertyData) -> None:
|
||
assert result.percent_draughtproofed == 100
|
||
|
||
def test_waste_water_heat_recovery_absent(self, result: EpcPropertyData) -> None:
|
||
assert result.waste_water_heat_recovery == "None"
|
||
|
||
def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None:
|
||
assert result.any_unheated_rooms is False
|
||
|
||
|
||
class TestEnergyPerformance:
|
||
def test_energy_rating_current(self, result: EpcPropertyData) -> None:
|
||
assert result.energy_rating_current == 69
|
||
|
||
def test_energy_rating_potential(self, result: EpcPropertyData) -> None:
|
||
assert result.energy_rating_potential == 77
|
||
|
||
def test_environmental_impact_current(self, result: EpcPropertyData) -> None:
|
||
assert result.environmental_impact_current == 76
|
||
|
||
def test_environmental_impact_potential(self, result: EpcPropertyData) -> None:
|
||
assert result.environmental_impact_potential == 81
|
||
|
||
def test_co2_emissions_current(self, result: EpcPropertyData) -> None:
|
||
assert result.co2_emissions_current == 1.683
|
||
|
||
|
||
class TestWindowFrameMaterial:
|
||
def test_frame_material_from_elmhurst(self, result2: EpcPropertyData) -> None:
|
||
assert result2.sap_windows[0].frame_material == "PVC"
|
||
|
||
def test_glazing_gap_from_elmhurst(self, result2: EpcPropertyData) -> None:
|
||
assert result2.sap_windows[0].glazing_gap == "16 mm or more"
|
||
|
||
|
||
class TestLowEnergyLighting:
|
||
def test_low_energy_fixed_lighting_bulbs_count(self, result2: EpcPropertyData) -> None:
|
||
assert result2.low_energy_fixed_lighting_bulbs_count == 5
|