Model/backend/documents_parser/tests/test_extractor.py
2026-04-21 11:13:04 +00:00

616 lines
23 KiB
Python

import json
import os
from datetime import date
import pytest
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from datatypes.epc.surveys.pashub_rdsap_site_notes import (
BuildingConstruction,
BuildingMeasurements,
Conservatories,
CustomerResponse,
ExtensionConstruction,
ExtensionMeasurements,
ExtensionRoofSpace,
FloorConstruction,
FloorMeasurement,
General,
HeatingAndHotWater,
InspectionMetadata,
MainBuildingConstruction,
MainBuildingMeasurements,
MainHeating,
Renewables,
RoomCountElements,
RoofSpace,
RoofSpaceDetail,
SecondaryHeating,
Shower,
SurveyAddendum,
Ventilation,
WaterHeating,
WaterUse,
)
FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")
def load_text_fixture() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_text.json")) as f:
return json.load(f)
def load_text_fixture_2() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_2_text.json")) as f:
return json.load(f)
class TestInspectionMetadata:
def test_full_inspection_metadata(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_inspection_metadata()
assert result == InspectionMetadata(
inspection_surveyor="Benjamin Burke",
email_address="ben@mbsolutionsgroup.co.uk",
report_reference="6EA2A86D-94CE-4792-8D49-AB495C744EDD",
created_on="2025-11-10",
date_of_inspection=date(2025, 9, 25),
property_address="40, Abbey Place, Crewe, Cheshire, CW1 4JR",
property_photo=True,
)
class TestGeneral:
@pytest.fixture
def general(self) -> General:
return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_general()
def test_epc_checked_before_assessment(self, general: General) -> None:
assert general.epc_checked_before_assessment is True
def test_epc_exists_at_point_of_assessment(self, general: General) -> None:
assert general.epc_exists_at_point_of_assessment is False
def test_inspection_date(self, general: General) -> None:
assert general.inspection_date == date(2025, 9, 25)
def test_transaction_type(self, general: General) -> None:
assert general.transaction_type == "Grant-Scheme (ECO, RHI, etc.)"
def test_tenure(self, general: General) -> None:
assert general.tenure == "Rented Social"
def test_property_type(self, general: General) -> None:
assert general.property_type == "House"
def test_detachment_type(self, general: General) -> None:
assert general.detachment_type == "Mid-terrace"
def test_number_of_storeys(self, general: General) -> None:
assert general.number_of_storeys == 2
def test_number_of_extensions(self, general: General) -> None:
assert general.number_of_extensions == 1
def test_electricity_smart_meter(self, general: General) -> None:
assert general.electricity_smart_meter is True
def test_mains_gas_available(self, general: General) -> None:
assert general.mains_gas_available is True
def test_measurements_location(self, general: General) -> None:
assert general.measurements_location == "Internal"
def test_full_general(self, general: General) -> None:
assert general == General(
epc_checked_before_assessment=True,
epc_exists_at_point_of_assessment=False,
inspection_date=date(2025, 9, 25),
transaction_type="Grant-Scheme (ECO, RHI, etc.)",
tenure="Rented Social",
property_type="House",
detachment_type="Mid-terrace",
number_of_storeys=2,
terrain_type="Suburban",
number_of_extensions=1,
electricity_smart_meter=True,
electric_meter_type="Single",
dwelling_export_capable=True,
mains_gas_available=True,
gas_smart_meter=True,
gas_meter_accessible=True,
measurements_location="Internal",
)
class TestGeneralNoExtensions:
@pytest.fixture
def general(self) -> General:
return PasHubRdSapSiteNotesExtractor(load_text_fixture_2()).extract_general()
def test_number_of_extensions_when_no_extensions(self, general: General) -> None:
assert general.number_of_extensions == 0
class TestBuildingConstruction:
@pytest.fixture
def construction(self) -> BuildingConstruction:
return PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_building_construction()
def test_main_building_wall_u_value_known_is_false(
self, construction: BuildingConstruction
) -> None:
assert construction.main_building.wall_u_value_known is False
def test_main_building_wall_thickness_mm(
self, construction: BuildingConstruction
) -> None:
assert construction.main_building.wall_thickness_mm == 310
def test_main_building_filled_cavity_indicators_present(
self, construction: BuildingConstruction
) -> None:
assert (
construction.main_building.filled_cavity_indicators
== "evidence of cavity fill drill holes"
)
def test_extension_filled_cavity_indicators_absent(
self, construction: BuildingConstruction
) -> None:
assert construction.extensions is not None
assert construction.extensions[0].filled_cavity_indicators is None
def test_one_extension(self, construction: BuildingConstruction) -> None:
assert construction.extensions is not None
assert len(construction.extensions) == 1
def test_extension_id(self, construction: BuildingConstruction) -> None:
assert construction.extensions is not None
assert construction.extensions[0].id == 1
def test_full_building_construction(
self, construction: BuildingConstruction
) -> None:
assert construction == BuildingConstruction(
main_building=MainBuildingConstruction(
age_range="1950-1966",
age_indicators="local knowledge, enquiries of owner",
walls_construction_type="Cavity",
cavity_construction_indicators="wall thickness over 270 mm",
walls_insulation_type="Filled Cavity",
filled_cavity_indicators="evidence of cavity fill drill holes",
thermal_conductivity_of_wall_insulation="Unknown",
wall_u_value_known=False,
wall_thickness_mm=310,
party_wall_construction_type="Cavity Masonry, Filled",
),
floor=FloorConstruction(
floor_type="Ground Floor",
floor_construction="Solid",
floor_insulation_type="As Built",
floor_u_value_known=False,
),
extensions=[
ExtensionConstruction(
id=1,
age_range="2003-2006",
age_indicators="local knowledge, enquiries of owner",
walls_construction_type="Cavity",
cavity_construction_indicators="wall thickness over 270 mm",
walls_insulation_type="As built",
thermal_conductivity_of_wall_insulation="Unknown",
wall_u_value_known=False,
wall_thickness_mm=310,
party_wall_construction_type="Cavity Masonry, Filled",
filled_cavity_indicators=None,
)
],
)
class TestBuildingMeasurements:
@pytest.fixture
def measurements(self) -> BuildingMeasurements:
return PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_building_measurements()
def test_main_building_has_two_floors(
self, measurements: BuildingMeasurements
) -> None:
assert len(measurements.main_building.floors) == 2
def test_main_building_floor_area(
self, measurements: BuildingMeasurements
) -> None:
assert measurements.main_building.floors[0].area_m2 == 35.68
def test_integer_token_parses_to_float(
self, measurements: BuildingMeasurements
) -> None:
# "11" in the PDF (no decimal) should parse to 11.0
assert measurements.main_building.floors[1].heat_loss_perimeter_m == 11.0
def test_extension_measurements_present(
self, measurements: BuildingMeasurements
) -> None:
assert measurements.extensions is not None
assert len(measurements.extensions) == 1
def test_extension_id(self, measurements: BuildingMeasurements) -> None:
assert measurements.extensions is not None
assert measurements.extensions[0].id == 1
def test_full_building_measurements(
self, measurements: BuildingMeasurements
) -> None:
assert measurements == BuildingMeasurements(
main_building=MainBuildingMeasurements(
floors=[
FloorMeasurement(
name="Floor 1",
area_m2=35.68,
height_m=2.19,
heat_loss_perimeter_m=13.44,
pwl_m=10.62,
),
FloorMeasurement(
name="Floor 0",
area_m2=35.68,
height_m=2.17,
heat_loss_perimeter_m=11.0,
pwl_m=10.62,
),
]
),
extensions=[
ExtensionMeasurements(
id=1,
floors=[
FloorMeasurement(
name="Floor 0",
area_m2=3.8,
height_m=2.0,
heat_loss_perimeter_m=5.7,
pwl_m=0.0,
)
],
)
],
)
class TestRoofSpace:
@pytest.fixture
def roof_space(self) -> RoofSpace:
return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_roof_space()
def test_main_building_insulation_thickness_mm(
self, roof_space: RoofSpace
) -> None:
assert roof_space.main_building.insulation_thickness_mm == 100
def test_main_building_insulation_thickness_string_absent(
self, roof_space: RoofSpace
) -> None:
assert roof_space.main_building.insulation_thickness is None
def test_main_building_rooms_in_roof(self, roof_space: RoofSpace) -> None:
assert roof_space.main_building.rooms_in_roof is False
def test_main_building_roof_u_value_known(self, roof_space: RoofSpace) -> None:
assert roof_space.main_building.roof_u_value_known is False
def test_extension_uses_string_thickness(self, roof_space: RoofSpace) -> None:
assert roof_space.extensions is not None
assert roof_space.extensions[0].insulation_thickness == "As built"
assert roof_space.extensions[0].insulation_thickness_mm is None
def test_full_roof_space(self, roof_space: RoofSpace) -> None:
assert roof_space == RoofSpace(
main_building=RoofSpaceDetail(
construction_type="Pitched roof (Slates or tiles), Access to loft",
insulation_at="Joists",
roof_u_value_known=False,
cavity_wall_construction_indicators="cavity visible in roof space",
rooms_in_roof=False,
insulation_thickness_mm=100,
insulation_thickness=None,
),
extensions=[
ExtensionRoofSpace(
id=1,
construction_type="Pitched roof, Sloping ceiling",
insulation_at="Sloping ceiling insulation",
roof_u_value_known=False,
cavity_wall_construction_indicators="No indicator of construction visible",
rooms_in_roof=False,
insulation_thickness_mm=None,
insulation_thickness="As built",
)
],
)
class TestWindows:
@pytest.fixture
def windows(self) -> list:
return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_windows()
def test_window_count(self, windows: list) -> None:
assert len(windows) == 8
def test_ids_are_sequential(self, windows: list) -> None:
assert [w.id for w in windows] == list(range(1, 9))
def test_first_window_location(self, windows: list) -> None:
assert windows[0].location == "Main Building"
def test_extension_window_location(self, windows: list) -> None:
assert windows[3].location == "Extension 1"
def test_height_parses_to_float(self, windows: list) -> None:
assert windows[0].height_m == 1.2
def test_draught_proofed_true(self, windows: list) -> None:
assert windows[0].draught_proofed is True
def test_permanent_shutters_false(self, windows: list) -> None:
assert windows[0].permanent_shutters is False
def test_first_window_full(self, windows: list) -> None:
from datatypes.epc.surveys.pashub_rdsap_site_notes import Window
assert windows[0] == Window(
id=1,
location="Main Building",
wall_type="External wall",
glazing_type="Double glazing, Unknown install date",
window_type="Window",
frame_type="Wooden or PVC",
glazing_gap="16 mm or more",
draught_proofed=True,
permanent_shutters=False,
height_m=1.2,
width_m=2.3,
orientation="North West",
)
class TestWaterHeatingCylinderThickness:
@pytest.fixture
def hhw(self) -> HeatingAndHotWater:
return PasHubRdSapSiteNotesExtractor(
load_text_fixture_2()
).extract_heating_and_hot_water()
@pytest.fixture
def hhw_no_cylinder(self) -> HeatingAndHotWater:
return PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_heating_and_hot_water()
def test_cylinder_insulation_thickness_mm(self, hhw: HeatingAndHotWater) -> None:
assert hhw.water_heating.insulation_thickness_mm == 38
def test_cylinder_insulation_thickness_mm_absent(self, hhw_no_cylinder: HeatingAndHotWater) -> None:
assert hhw_no_cylinder.water_heating.insulation_thickness_mm is None
def test_cylinder_size(self, hhw: HeatingAndHotWater) -> None:
assert hhw.water_heating.cylinder_size == "Normal (90-130 litres)"
class TestHeatingAndHotWater:
@pytest.fixture
def hhw(self) -> HeatingAndHotWater:
return PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_heating_and_hot_water()
def test_product_id_parses_to_int(self, hhw: HeatingAndHotWater) -> None:
assert hhw.main_heating.product_id == 16839
def test_summer_efficiency_parses_to_float(self, hhw: HeatingAndHotWater) -> None:
assert hhw.main_heating.summer_efficiency == 0.0
def test_condensing_true(self, hhw: HeatingAndHotWater) -> None:
assert hhw.main_heating.condensing is True
def test_fghrs_false(self, hhw: HeatingAndHotWater) -> None:
# multi-line label
assert hhw.main_heating.flue_gas_heat_recovery_system is False
def test_secondary_fuel(self, hhw: HeatingAndHotWater) -> None:
assert hhw.secondary_heating.secondary_fuel == "No Secondary Heating"
def test_water_heating_no_cylinder(self, hhw: HeatingAndHotWater) -> None:
assert hhw.water_heating.cylinder_size == "No Cylinder"
assert hhw.water_heating.insulation_type is None
assert hhw.water_heating.has_thermostat is None
def test_full_heating_and_hot_water(self, hhw: HeatingAndHotWater) -> None:
assert hhw == HeatingAndHotWater(
main_heating=MainHeating(
selection_method="PCDF Search",
system_type="Boiler with radiators or underfloor heating",
product_id=16839,
manufacturer="Vaillant",
model="ecoTEC pro 28",
orig_manufacturer="Vaillant",
fuel="Mains gas",
summer_efficiency=0.0,
type="Combi",
condensing=True,
year="2005 - 2015",
mount="Wall",
open_flue="Room-sealed",
fan_assist=True,
status="Normal status for an actual product",
central_heating_pump_age="Unknown",
controls="Programmer, room thermostat and TRVs",
flue_gas_heat_recovery_system=False,
weather_compensator=False,
emitter="Radiators",
emitter_temperature="Unknown",
),
secondary_heating=SecondaryHeating(
secondary_fuel="No Secondary Heating",
),
water_heating=WaterHeating(
type="Regular",
system="From main heating 1",
cylinder_size="No Cylinder",
cylinder_measured_heat_loss=None,
insulation_type=None,
insulation_thickness_mm=None,
has_thermostat=None,
),
)
class TestVentilation:
@pytest.fixture
def ventilation(self) -> Ventilation:
return PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_ventilation()
def test_ventilation_type(self, ventilation: Ventilation) -> None:
assert ventilation.ventilation_type == "Mechanical Extract - Decentralised"
def test_number_of_open_flues(self, ventilation: Ventilation) -> None:
assert ventilation.number_of_open_flues == 0
def test_ventilation_in_pcdf_database(self, ventilation: Ventilation) -> None:
assert ventilation.ventilation_in_pcdf_database is False
def test_full_ventilation(self, ventilation: Ventilation) -> None:
assert ventilation == Ventilation(
ventilation_type="Mechanical Extract - Decentralised",
has_fixed_air_conditioning=False,
number_of_open_flues=0,
number_of_closed_flues=0,
number_of_boiler_flues=0,
number_of_other_flues=0,
number_of_extract_fans=0,
number_of_passive_vents=0,
number_of_flueless_gas_fires=0,
pressure_test="No test",
draught_lobby=False,
ventilation_in_pcdf_database=False,
)
class TestConservatories:
def test_full_conservatories(self) -> None:
result = PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_conservatories()
assert result == Conservatories(has_conservatory=False)
class TestRenewables:
def test_number_of_pv_batteries_none_string_becomes_zero(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_renewables()
assert result.number_of_pv_batteries == 0
def test_full_renewables(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_renewables()
assert result == Renewables(
wind_turbines=False,
solar_hot_water=False,
photovoltaic_array=False,
number_of_pv_batteries=0,
hydro=False,
)
class TestRoomCountElements:
@pytest.fixture
def rce(self) -> RoomCountElements:
return PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_room_count_elements()
def test_habitable_rooms(self, rce: RoomCountElements) -> None:
assert rce.number_of_habitable_rooms == 3
def test_heated_rooms_null(self, rce: RoomCountElements) -> None:
assert rce.number_of_heated_rooms is None
def test_full_room_count_elements(self, rce: RoomCountElements) -> None:
assert rce == RoomCountElements(
number_of_habitable_rooms=3,
any_unheated_rooms=False,
number_of_heated_rooms=None,
number_of_external_doors=2,
number_of_insulated_external_doors=0,
number_of_draughtproofed_external_doors=2,
number_of_open_chimneys=0,
number_of_blocked_chimneys=0,
number_of_fixed_incandescent_bulbs=4,
exact_led_cfl_count_known=True,
number_of_fixed_led_bulbs=0,
number_of_fixed_cfl_bulbs=1,
waste_water_heat_recovery="None",
)
class TestWaterUse:
def test_full_water_use(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_water_use()
assert result == WaterUse(
number_of_baths=1,
number_of_special_features=0,
showers=[Shower(id=1, outlet_type="Non-Electric Shower")],
)
class TestCustomerResponse:
def test_full_customer_response(self) -> None:
result = PasHubRdSapSiteNotesExtractor(
load_text_fixture()
).extract_customer_response()
assert result == CustomerResponse(
customer_present=True,
willing_to_answer_satisfaction_survey=False,
)
class TestExtract:
def test_full_extract(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract()
assert result.inspection_metadata.inspection_surveyor == "Benjamin Burke"
assert result.general.inspection_date == date(2025, 9, 25)
assert result.building_construction.main_building.wall_thickness_mm == 310
assert result.building_measurements.main_building.floors[0].area_m2 == 35.68
assert result.roof_space.main_building.insulation_thickness_mm == 100
assert len(result.windows) == 8
assert result.heating_and_hot_water.main_heating.product_id == 16839
assert result.ventilation.ventilation_type == "Mechanical Extract - Decentralised"
assert result.conservatories.has_conservatory is False
assert result.renewables.number_of_pv_batteries == 0
assert result.room_count_elements.number_of_habitable_rooms == 3
assert result.water_use.number_of_baths == 1
assert result.customer_response.customer_present is True
assert result.addendum.addendum == "None"
class TestSurveyAddendum:
def test_hard_to_treat_flags(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_addendum()
assert result.hard_to_treat_cavity_access_issues is False
assert result.hard_to_treat_cavity_high_exposure is False
assert result.hard_to_treat_cavity_narrow_cavities is False
def test_full_addendum(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_addendum()
assert result == SurveyAddendum(
addendum="None",
related_party_disclosure="No related party",
hard_to_treat_cavity_access_issues=False,
hard_to_treat_cavity_high_exposure=False,
hard_to_treat_cavity_narrow_cavities=False,
)