mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Three orthogonal issues surfaced by the full project test sweep: 1. Dockerfile.test: install poppler-utils alongside postgresql. The 20× `pdfinfo: No such file or directory` failures in test_summary_pdf_mapper_chain.py traced to the CI test image missing the poppler-utils system package (pdfinfo + pdftotext). `_summary_pdf_to_textract_style_pages` shells out to these for layout-preserving PDF text extraction. Pure-Python alternatives (pymupdf, pypdf) don't reproduce pdftotext -layout's row-major table cell ordering, which the Elmhurst Summary extractor depends on. So system poppler is the right fix; added to apt-get install with an explanatory comment. 2. test_from_rdsap_schema.py::test_total_floor_area: expected 55.0, got 45.82. Slice 95 (commitf502db8c) changed the API mapper to compute total_floor_area_m2 from the precise sum of per-bp sap_floor_dimensions[*].total_floor_area rather than the lodged scalar. The synthetic 21_0_1.json fixture has lodged total_floor_ area=55 + a single fd of 45.82 (per-bp sum doesn't match lodged). Updated the expected to 45.82 with a comment explaining the Slice 95 per-bp-sum precedence. 3. test_elmhurst_end_to_end.py::test_emitter_temperature: expected "Unknown", got int 1. Pre-existing failure (confirmed by checking out commit985a59e1and reproducing). `_elmhurst_emitter_ temperature_int` in datatypes/epc/domain/mapper.py converts the Elmhurst Summary §14 "Design flow temperature: Unknown" to SAP10.2 Table 4d code 1 (high-temp / ≥45 °C, worst-case for unmeasured boilers). The int encoding mirrors the API mapper's MainHeating Detail.emitter_temperature for cross-mapper field parity. Test updated to expect 1 (with comment) since the conversion is the correct production behaviour. Verified: - Layer 4 1e-4 gate (test_api_001479_full_chain_sap_matches_worksheet_ pdf_exactly) still GREEN. - Wider domain sweep (domain/sap10_calculator + domain/sap10_ml): 1654 passed / 20 failed, exact pre-fix baseline. - All three originally-failing tests now PASS. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
383 lines
16 KiB
Python
383 lines
16 KiB
Python
import json
|
||
import os
|
||
from datetime import date
|
||
|
||
import pytest
|
||
|
||
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
||
from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier, EpcPropertyData
|
||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||
|
||
FIXTURE_PATH = os.path.join(
|
||
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json"
|
||
)
|
||
FIXTURE_PATH_2 = os.path.join(
|
||
os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_2_text.json"
|
||
)
|
||
|
||
|
||
@pytest.fixture(scope="module")
|
||
def result() -> EpcPropertyData:
|
||
with open(FIXTURE_PATH) as f:
|
||
pages = json.load(f)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
|
||
@pytest.fixture(scope="module")
|
||
def result2() -> EpcPropertyData:
|
||
with open(FIXTURE_PATH_2) as f:
|
||
pages = json.load(f)
|
||
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
||
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
||
|
||
|
||
class TestAddress:
|
||
def test_address_line_1(self, result: EpcPropertyData) -> None:
|
||
assert result.address_line_1 == "19, Queens Road"
|
||
|
||
def test_post_town(self, result: EpcPropertyData) -> None:
|
||
assert result.post_town == "BURNLEY"
|
||
|
||
def test_postcode(self, result: EpcPropertyData) -> None:
|
||
assert result.postcode == "BB10 1XX"
|
||
|
||
|
||
class TestInspectionInfo:
|
||
def test_inspection_date(self, result: EpcPropertyData) -> None:
|
||
assert result.inspection_date == date(2026, 3, 6)
|
||
|
||
def test_tenure(self, result: EpcPropertyData) -> None:
|
||
assert result.tenure == "Rented (social)"
|
||
|
||
def test_transaction_type(self, result: EpcPropertyData) -> None:
|
||
assert result.transaction_type == "Grant scheme"
|
||
|
||
def test_report_reference(self, result: EpcPropertyData) -> None:
|
||
assert result.report_reference == "P960-0001-001573"
|
||
|
||
|
||
class TestPropertyDescription:
|
||
def test_property_type(self, result: EpcPropertyData) -> None:
|
||
assert result.property_type == "Bungalow"
|
||
|
||
def test_built_form(self, result: EpcPropertyData) -> None:
|
||
assert result.built_form == "End-Terrace"
|
||
|
||
def test_dwelling_type(self, result: EpcPropertyData) -> None:
|
||
assert result.dwelling_type == "End-Terrace bungalow"
|
||
|
||
def test_number_of_storeys(self, result: EpcPropertyData) -> None:
|
||
assert result.number_of_storeys == 1
|
||
|
||
def test_has_conservatory(self, result: EpcPropertyData) -> None:
|
||
assert result.has_conservatory is False
|
||
|
||
def test_total_floor_area(self, result: EpcPropertyData) -> None:
|
||
assert result.total_floor_area_m2 == 44.89
|
||
|
||
|
||
class TestCounts:
|
||
def test_habitable_rooms_count(self, result: EpcPropertyData) -> None:
|
||
assert result.habitable_rooms_count == 2
|
||
|
||
def test_heated_rooms_count(self, result: EpcPropertyData) -> None:
|
||
assert result.heated_rooms_count == 2
|
||
|
||
def test_door_count(self, result: EpcPropertyData) -> None:
|
||
assert result.door_count == 0
|
||
|
||
def test_insulated_door_count(self, result: EpcPropertyData) -> None:
|
||
assert result.insulated_door_count == 0
|
||
|
||
def test_open_chimneys_count(self, result: EpcPropertyData) -> None:
|
||
assert result.open_chimneys_count == 0
|
||
|
||
def test_blocked_chimneys_count(self, result: EpcPropertyData) -> None:
|
||
assert result.blocked_chimneys_count == 0
|
||
|
||
|
||
class TestLighting:
|
||
def test_led_count(self, result: EpcPropertyData) -> None:
|
||
assert result.led_fixed_lighting_bulbs_count == 4
|
||
|
||
def test_cfl_count(self, result: EpcPropertyData) -> None:
|
||
assert result.cfl_fixed_lighting_bulbs_count == 4
|
||
|
||
def test_incandescent_count(self, result: EpcPropertyData) -> None:
|
||
assert result.incandescent_fixed_lighting_bulbs_count == 0
|
||
|
||
|
||
class TestFlags:
|
||
def test_solar_water_heating(self, result: EpcPropertyData) -> None:
|
||
assert result.solar_water_heating is False
|
||
|
||
def test_has_hot_water_cylinder(self, result: EpcPropertyData) -> None:
|
||
assert result.has_hot_water_cylinder is False
|
||
|
||
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
|
||
assert result.has_fixed_air_conditioning is False
|
||
|
||
def test_hydro(self, result: EpcPropertyData) -> None:
|
||
assert result.hydro is False
|
||
|
||
def test_photovoltaic_array(self, result: EpcPropertyData) -> None:
|
||
assert result.photovoltaic_array is False
|
||
|
||
|
||
class TestBuildingPart:
|
||
def test_single_building_part(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_building_parts) == 1
|
||
|
||
def test_identifier(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].identifier is BuildingPartIdentifier.MAIN
|
||
|
||
def test_construction_age_band(self, result: EpcPropertyData) -> None:
|
||
# Spec age-band letter code per RdSAP10 Table 1; the cascade
|
||
# reads this code letter for U-value lookups, not the year-range
|
||
# description.
|
||
assert result.sap_building_parts[0].construction_age_band == "D"
|
||
|
||
def test_wall_construction(self, result: EpcPropertyData) -> None:
|
||
# SAP10 wall_construction integer: 4 = Cavity (per
|
||
# domain.sap10_ml.rdsap_uvalues.WALL_CAVITY).
|
||
assert result.sap_building_parts[0].wall_construction == 4
|
||
|
||
def test_wall_insulation_type(self, result: EpcPropertyData) -> None:
|
||
# SAP10 wall_insulation_type integer: 2 = Filled cavity (per
|
||
# domain.sap10_ml.rdsap_uvalues.WALL_INSULATION_FILLED_CAVITY).
|
||
assert result.sap_building_parts[0].wall_insulation_type == 2
|
||
|
||
def test_wall_thickness_measured(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].wall_thickness_measured is True
|
||
|
||
def test_wall_thickness_mm(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].wall_thickness_mm == 300
|
||
|
||
def test_roof_insulation_location(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].roof_insulation_location == "Joists"
|
||
|
||
def test_roof_insulation_thickness(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].roof_insulation_thickness == 270
|
||
|
||
def test_floor_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].floor_type == "Ground floor"
|
||
|
||
def test_floor_construction_type(self, result: EpcPropertyData) -> None:
|
||
assert (
|
||
result.sap_building_parts[0].floor_construction_type
|
||
== "Suspended, not timber"
|
||
)
|
||
|
||
def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].floor_insulation_type_str == "As built"
|
||
|
||
def test_floor_u_value_known(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].floor_u_value_known is False
|
||
|
||
def test_single_floor_dimension(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_building_parts[0].sap_floor_dimensions) == 1
|
||
|
||
def test_floor_dimension_area(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].sap_floor_dimensions[0].total_floor_area_m2 == 44.89
|
||
|
||
def test_floor_dimension_room_height(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_building_parts[0].sap_floor_dimensions[0].room_height_m == 2.24
|
||
|
||
def test_floor_dimension_heat_loss_perimeter(self, result: EpcPropertyData) -> None:
|
||
assert (
|
||
result.sap_building_parts[0].sap_floor_dimensions[0].heat_loss_perimeter_m
|
||
== 20.10
|
||
)
|
||
|
||
def test_floor_dimension_party_wall_length(self, result: EpcPropertyData) -> None:
|
||
assert (
|
||
result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m
|
||
== 6.70
|
||
)
|
||
|
||
|
||
class TestWindows:
|
||
def test_window_count(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_windows) == 4
|
||
|
||
def test_first_window_area(self, result: EpcPropertyData) -> None:
|
||
# The Elmhurst mapper lodges the Summary PDF's precomputed Area
|
||
# (1.30 × 1.10 = 1.43 m²) as `window_width × 1.0` to avoid the
|
||
# 2-d.p. round-trip drift that W × H reintroduces. The cascade
|
||
# reads only the product, so flattening to (area, 1.0) is
|
||
# behaviourally equivalent to (1.30, 1.10) modulo precision.
|
||
w = result.sap_windows[0]
|
||
assert w.window_width * w.window_height == 1.43
|
||
|
||
def test_first_window_height(self, result: EpcPropertyData) -> None:
|
||
# See `test_first_window_area` — the mapper normalises height
|
||
# to 1.0 so the lodged Area can be carried as the canonical
|
||
# geometry without re-multiplying.
|
||
assert result.sap_windows[0].window_height == 1.0
|
||
|
||
def test_first_window_orientation(self, result: EpcPropertyData) -> None:
|
||
# SAP10 octant code: 1 = North. The solar-gains cascade keys
|
||
# off the integer, not the cardinal-direction string.
|
||
assert result.sap_windows[0].orientation == 1
|
||
|
||
def test_first_window_glazing_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].glazing_type == "Double post or during 2022"
|
||
|
||
def test_first_window_draught_proofed(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].draught_proofed is True
|
||
|
||
def test_third_window_orientation(self, result: EpcPropertyData) -> None:
|
||
# SAP10 octant code: 5 = South.
|
||
assert result.sap_windows[2].orientation == 5
|
||
|
||
def test_frame_factor(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].frame_factor == 0.7
|
||
|
||
def test_transmission_u_value(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].window_transmission_details is not None
|
||
assert result.sap_windows[0].window_transmission_details.u_value == 1.4
|
||
|
||
def test_transmission_solar_transmittance(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].window_transmission_details is not None
|
||
assert result.sap_windows[0].window_transmission_details.solar_transmittance == 0.72
|
||
|
||
def test_transmission_data_source(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_windows[0].window_transmission_details is not None
|
||
assert result.sap_windows[0].window_transmission_details.data_source == "Manufacturer"
|
||
|
||
|
||
class TestHeating:
|
||
def test_single_heating_detail(self, result: EpcPropertyData) -> None:
|
||
assert len(result.sap_heating.main_heating_details) == 1
|
||
|
||
def test_fuel_type(self, result: EpcPropertyData) -> None:
|
||
# SAP10.2 Table 12 fuel code: 26 = mains gas (not community).
|
||
# The cascade only consumes the int code; strings drop the
|
||
# standing-charge / PE-factor / CO2-factor lookups.
|
||
assert result.sap_heating.main_heating_details[0].main_fuel_type == 26
|
||
|
||
def test_heat_emitter_type(self, result: EpcPropertyData) -> None:
|
||
# SAP10.2 heat-emitter code: 1 = Radiators.
|
||
assert result.sap_heating.main_heating_details[0].heat_emitter_type == 1
|
||
|
||
def test_emitter_temperature(self, result: EpcPropertyData) -> None:
|
||
# The Elmhurst Summary §14 lodges "Design flow temperature: Unknown"
|
||
# for this cert. `_elmhurst_emitter_temperature_int` (mapper.py)
|
||
# converts that to SAP10.2 Table 4d code 1 (high-temp / ≥45 °C —
|
||
# the worst-case assumption for an unmeasured gas boiler). This
|
||
# int encoding mirrors the API mapper's `MainHeatingDetail.
|
||
# emitter_temperature` for cross-mapper field parity; the older
|
||
# behaviour of surfacing the raw "Unknown" string was replaced
|
||
# when the int conversion landed.
|
||
assert result.sap_heating.main_heating_details[0].emitter_temperature == 1
|
||
|
||
def test_fan_flue_present(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.main_heating_details[0].fan_flue_present is True
|
||
|
||
def test_has_fghrs(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.main_heating_details[0].has_fghrs is False
|
||
|
||
def test_main_heating_control(self, result: EpcPropertyData) -> None:
|
||
# SAP10.2 main_heating_control code extracted from the Elmhurst
|
||
# "SAP code 2106, Programmer, room thermostat and TRVs" string;
|
||
# the cascade keys efficiency adjustments off the integer.
|
||
assert result.sap_heating.main_heating_details[0].main_heating_control == 2106
|
||
|
||
def test_shower_outlet_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.shower_outlets is not None
|
||
assert (
|
||
result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type
|
||
== "Electric shower"
|
||
)
|
||
|
||
def test_no_hot_water_cylinder_size(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.cylinder_size is None
|
||
|
||
def test_has_fixed_air_conditioning(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.has_fixed_air_conditioning is False
|
||
|
||
def test_water_heating_code(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_heating.water_heating_code == 901
|
||
|
||
|
||
class TestEnergySource:
|
||
def test_mains_gas(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.mains_gas is True
|
||
|
||
def test_meter_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.meter_type == "Single"
|
||
|
||
def test_electricity_smart_meter(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.electricity_smart_meter_present is False
|
||
|
||
def test_gas_smart_meter(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.gas_smart_meter_present is False
|
||
|
||
def test_wind_turbines_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.wind_turbines_count == 0
|
||
|
||
def test_wind_turbines_terrain_type(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.wind_turbines_terrain_type == "Suburban"
|
||
|
||
def test_pv_battery_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_energy_source.pv_battery_count == 0
|
||
|
||
|
||
class TestVentilation:
|
||
def test_draught_lobby(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.draught_lobby is False
|
||
|
||
def test_pressure_test(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.pressure_test == "Not available"
|
||
|
||
def test_extract_fans_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.extract_fans_count == 2
|
||
|
||
def test_open_flues_count(self, result: EpcPropertyData) -> None:
|
||
assert result.sap_ventilation is not None
|
||
assert result.sap_ventilation.open_flues_count == 0
|
||
|
||
|
||
class TestDraughtproofingAndWater:
|
||
def test_percent_draughtproofed(self, result: EpcPropertyData) -> None:
|
||
assert result.percent_draughtproofed == 100
|
||
|
||
def test_waste_water_heat_recovery_absent(self, result: EpcPropertyData) -> None:
|
||
assert result.waste_water_heat_recovery == "None"
|
||
|
||
def test_any_unheated_rooms_false(self, result: EpcPropertyData) -> None:
|
||
assert result.any_unheated_rooms is False
|
||
|
||
|
||
class TestEnergyPerformance:
|
||
def test_energy_rating_current(self, result: EpcPropertyData) -> None:
|
||
assert result.energy_rating_current == 69
|
||
|
||
def test_energy_rating_potential(self, result: EpcPropertyData) -> None:
|
||
assert result.energy_rating_potential == 77
|
||
|
||
def test_environmental_impact_current(self, result: EpcPropertyData) -> None:
|
||
assert result.environmental_impact_current == 76
|
||
|
||
def test_environmental_impact_potential(self, result: EpcPropertyData) -> None:
|
||
assert result.environmental_impact_potential == 81
|
||
|
||
def test_co2_emissions_current(self, result: EpcPropertyData) -> None:
|
||
assert result.co2_emissions_current == 1.683
|
||
|
||
|
||
class TestWindowFrameMaterial:
|
||
def test_frame_material_from_elmhurst(self, result2: EpcPropertyData) -> None:
|
||
assert result2.sap_windows[0].frame_material == "PVC"
|
||
|
||
def test_glazing_gap_from_elmhurst(self, result2: EpcPropertyData) -> None:
|
||
assert result2.sap_windows[0].glazing_gap == "16 mm or more"
|
||
|
||
|
||
class TestLowEnergyLighting:
|
||
def test_low_energy_fixed_lighting_bulbs_count(self, result2: EpcPropertyData) -> None:
|
||
assert result2.low_energy_fixed_lighting_bulbs_count == 5
|