From 1a53a8d83ee90beae1772775b18a3c785dbfed2c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 13:13:24 +0000 Subject: [PATCH] =?UTF-8?q?Extract=20Elmhurst=20site=20notes=20to=20datacl?= =?UTF-8?q?ass=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_elmhurst_extractor.py | 433 ++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 backend/documents_parser/tests/test_elmhurst_extractor.py diff --git a/backend/documents_parser/tests/test_elmhurst_extractor.py b/backend/documents_parser/tests/test_elmhurst_extractor.py new file mode 100644 index 00000000..99670ee8 --- /dev/null +++ b/backend/documents_parser/tests/test_elmhurst_extractor.py @@ -0,0 +1,433 @@ +import json +import os +from datetime import date + +import pytest + +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.surveys.elmhurst_site_notes import ( + BathsAndShowers, + BuildingPartDimensions, + ElmhurstSiteNotes, + FloorDetails, + FloorDimension, + Lighting, + MainHeating, + Meters, + PropertyDetails, + Renewables, + RoofDetails, + Shower, + SurveyorInfo, + VentilationAndCooling, + WallDetails, + WaterHeating, + Window, +) + +FIXTURE_PATH = os.path.join( + os.path.dirname(__file__), "fixtures", "elmhurst_site_notes_1_text.json" +) + + +@pytest.fixture(scope="module") +def result() -> ElmhurstSiteNotes: + with open(FIXTURE_PATH) as f: + pages = json.load(f) + return ElmhurstSiteNotesExtractor(pages).extract() + + +class TestSurveyorInfo: + def test_surveyor_code(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.surveyor_code == "P960-0001" + + def test_name(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.name == "Richard Matthew Ratcliff" + + def test_title(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.title == "Mr." + + def test_tel_number(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.tel_number == "07760 443 469" + + def test_survey_reference(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.survey_reference == "001573" + + def test_my_reference_none(self, result: ElmhurstSiteNotes) -> None: + assert result.surveyor_info.my_reference is None + + +class TestPropertyDetails: + def test_rdsap_version(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.rdsap_version == "RdSAP10" + + def test_reference_number(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.reference_number == "P960-0001-001573" + + def test_lodgement_required(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.lodgement_required is False + + def test_regs_region(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.regs_region == "England" + + def test_epc_language(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.epc_language == "English" + + def test_uprn_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.uprn is None + + def test_postcode(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.postcode == "BB10 1XX" + + def test_region(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.region == "West Pennines" + + def test_house_name_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.house_name is None + + def test_house_number(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.house_number == "19" + + def test_street(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.street == "Queens Road" + + def test_locality_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.locality is None + + def test_town(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.town == "BURNLEY" + + def test_county_none(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.county is None + + def test_tenure(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.tenure == "Rented (social)" + + def test_transaction_type(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.transaction_type == "Grant scheme" + + def test_inspection_date(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.inspection_date == date(2026, 3, 6) + + def test_process_date(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.process_date == date(2026, 3, 6) + + def test_epc_exists(self, result: ElmhurstSiteNotes) -> None: + assert result.property_details.epc_exists is False + + +class TestPropertyDescription: + def test_property_type(self, result: ElmhurstSiteNotes) -> None: + assert result.property_type == "B Bungalow" + + def test_attachment(self, result: ElmhurstSiteNotes) -> None: + assert result.attachment == "E End-Terrace" + + def test_number_of_storeys(self, result: ElmhurstSiteNotes) -> None: + assert result.number_of_storeys == 1 + + def test_habitable_rooms(self, result: ElmhurstSiteNotes) -> None: + assert result.habitable_rooms == 2 + + def test_heated_habitable_rooms(self, result: ElmhurstSiteNotes) -> None: + assert result.heated_habitable_rooms == 2 + + def test_construction_age_band(self, result: ElmhurstSiteNotes) -> None: + assert result.construction_age_band == "D 1950-1966" + + def test_has_conservatory(self, result: ElmhurstSiteNotes) -> None: + assert result.has_conservatory is False + + +class TestDimensions: + def test_dimension_type(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.dimension_type == "Internal" + + def test_floor_count(self, result: ElmhurstSiteNotes) -> None: + assert len(result.dimensions.floors) == 1 + + def test_floor_name(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].name == "Lowest Floor" + + def test_floor_area(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].area_m2 == 44.89 + + def test_floor_room_height(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].room_height_m == 2.24 + + def test_floor_heat_loss_perimeter(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].heat_loss_perimeter_m == 20.10 + + def test_floor_party_wall_length(self, result: ElmhurstSiteNotes) -> None: + assert result.dimensions.floors[0].party_wall_length_m == 6.70 + + +class TestWalls: + def test_wall_type(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.wall_type == "CA Cavity" + + def test_insulation(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.insulation == "F Filled Cavity" + + def test_thickness_unknown(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.thickness_unknown is False + + def test_thickness_mm(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.thickness_mm == 300 + + def test_u_value_known(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.u_value_known is False + + def test_party_wall_type(self, result: ElmhurstSiteNotes) -> None: + assert result.walls.party_wall_type == "U Unable to determine" + + +class TestRoof: + def test_roof_type(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.roof_type == "PA Pitched (slates/tiles), access to loft" + + def test_insulation(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.insulation == "J Joists" + + def test_insulation_thickness_mm(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.insulation_thickness_mm == 270 + + def test_u_value_known(self, result: ElmhurstSiteNotes) -> None: + assert result.roof.u_value_known is False + + +class TestFloor: + def test_location(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.location == "G Ground floor" + + def test_floor_type(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.floor_type == "N Suspended, not timber" + + def test_insulation(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.insulation == "A As built" + + def test_default_u_value(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.default_u_value == 0.69 + + def test_u_value_known(self, result: ElmhurstSiteNotes) -> None: + assert result.floor.u_value_known is False + + +class TestDoors: + def test_door_count(self, result: ElmhurstSiteNotes) -> None: + assert result.door_count == 0 + + def test_insulated_door_count(self, result: ElmhurstSiteNotes) -> None: + assert result.insulated_door_count == 0 + + +class TestWindows: + def test_window_count(self, result: ElmhurstSiteNotes) -> None: + assert len(result.windows) == 4 + + def test_draught_proofing_percent(self, result: ElmhurstSiteNotes) -> None: + assert result.draught_proofing_percent == 100 + + def test_first_window_dimensions(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.width_m == 1.30 + assert w.height_m == 1.10 + assert w.area_m2 == 1.43 + + def test_first_window_glazing(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.glazing_type == "Double post or during 2022" + assert w.frame_factor == 0.70 + + def test_first_window_location(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.building_part == "Main" + assert w.location == "External wall" + assert w.orientation == "North" + + def test_first_window_performance(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[0] + assert w.data_source == "Manufacturer" + assert w.u_value == 1.40 + assert w.g_value == 0.72 + assert w.draught_proofed is True + assert w.permanent_shutters == "None" + + def test_third_window_orientation(self, result: ElmhurstSiteNotes) -> None: + assert result.windows[2].orientation == "South" + + def test_fourth_window_dimensions(self, result: ElmhurstSiteNotes) -> None: + w = result.windows[3] + assert w.width_m == 0.70 + assert w.height_m == 1.30 + assert w.area_m2 == 0.91 + + +class TestVentilation: + def test_open_chimneys(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.open_chimneys_count == 0 + + def test_open_flues(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.open_flues_count == 0 + + def test_open_chimneys_closed_fire(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.open_chimneys_closed_fire_count == 0 + + def test_solid_fuel_boiler_flues(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.solid_fuel_boiler_flues_count == 0 + + def test_other_heater_flues(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.other_heater_flues_count == 0 + + def test_blocked_chimneys(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.blocked_chimneys_count == 0 + + def test_extract_fans(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.extract_fans_count == 2 + + def test_passive_vents(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.passive_vents_count == 0 + + def test_flueless_gas_fires(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.flueless_gas_fires_count == 0 + + def test_fixed_space_cooling(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.fixed_space_cooling is False + + def test_draught_lobby(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.draught_lobby == "Not present" + + def test_mechanical_ventilation(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.mechanical_ventilation is False + + def test_pressure_test_method(self, result: ElmhurstSiteNotes) -> None: + assert result.ventilation.pressure_test_method == "Not available" + + +class TestLighting: + def test_total_bulbs(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.total_bulbs == 8 + + def test_led_cfl_count_known(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.led_cfl_count_known is True + + def test_led_count(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.led_count == 4 + + def test_cfl_count(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.cfl_count == 4 + + def test_incandescent_count(self, result: ElmhurstSiteNotes) -> None: + assert result.lighting.incandescent_count == 0 + + +class TestMainHeating: + def test_pcdf_boiler_reference(self, result: ElmhurstSiteNotes) -> None: + assert ( + result.main_heating.pcdf_boiler_reference + == "17742 Potterton, Promax 33 Combi ErP, 88.30%" + ) + + def test_heat_emitter(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.heat_emitter == "Radiators" + + def test_heat_pump_age(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.heat_pump_age == "Unknown" + + def test_fuel_type(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.fuel_type == "Mains gas" + + def test_flue_type(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.flue_type == "Balanced" + + def test_fan_assisted_flue(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.fan_assisted_flue is True + + def test_design_flow_temperature(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.design_flow_temperature == "Unknown" + + def test_heating_controls_ees(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.heating_controls_ees == "CBE" + + def test_heating_controls_sap(self, result: ElmhurstSiteNotes) -> None: + assert ( + result.main_heating.heating_controls_sap + == "SAP code 2106, Programmer, room thermostat and TRVs" + ) + + def test_percentage_of_heat(self, result: ElmhurstSiteNotes) -> None: + assert result.main_heating.percentage_of_heat == 100 + + +class TestMeters: + def test_electricity_meter_type(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.electricity_meter_type == "Single" + + def test_main_gas(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.main_gas is True + + def test_electricity_smart_meter(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.electricity_smart_meter is False + + def test_gas_smart_meter(self, result: ElmhurstSiteNotes) -> None: + assert result.meters.gas_smart_meter is False + + +class TestWaterHeating: + def test_water_heating_code(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.water_heating_code == "HWP" + + def test_water_heating_sap_code(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.water_heating_sap_code == 901 + + def test_water_heating_fuel_type(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.water_heating_fuel_type == "Mains gas" + + def test_hot_water_cylinder_present(self, result: ElmhurstSiteNotes) -> None: + assert result.water_heating.hot_water_cylinder_present is False + + +class TestBathsAndShowers: + def test_number_of_baths(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.number_of_baths == 0 + + def test_number_of_baths_connected(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.number_of_baths_connected == 0 + + def test_shower_count(self, result: ElmhurstSiteNotes) -> None: + assert len(result.baths_and_showers.showers) == 1 + + def test_shower_number(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.showers[0].shower_number == 1 + + def test_shower_outlet_type(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.showers[0].outlet_type == "Electric shower" + + def test_shower_connected(self, result: ElmhurstSiteNotes) -> None: + assert result.baths_and_showers.showers[0].connected == "None" + + +class TestRenewables: + def test_solar_water_heating(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.solar_water_heating is False + + def test_wwhrs_present(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.wwhrs_present is False + + def test_flue_gas_heat_recovery_present(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.flue_gas_heat_recovery_present is False + + def test_photovoltaic_panel(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.photovoltaic_panel == "None" + + def test_export_capable_meter(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.export_capable_meter is False + + def test_wind_turbine_present(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.wind_turbine_present is False + + def test_wind_turbines_terrain_type(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.wind_turbines_terrain_type == "Suburban" + + def test_hydro_electricity_generated_kwh(self, result: ElmhurstSiteNotes) -> None: + assert result.renewables.hydro_electricity_generated_kwh == 0.0