diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index cb90af18..7a879773 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -7,6 +7,7 @@ python-jose==3.3.0 cryptography==43.0.3 mangum==0.19.0 playwright==1.58.0 +pymupdf # AWS boto3==1.35.44 # Data diff --git a/.dockerignore b/.dockerignore index 246f8354..0c7d7749 100644 --- a/.dockerignore +++ b/.dockerignore @@ -18,5 +18,6 @@ utils/tests/* etl/epc/tests/* etl/epc_clean/tests/* etl/spatial/tests/* +backend/documents_parser/tests/* diff --git a/.gitignore b/.gitignore index ee5aa8ab..d6d23313 100644 --- a/.gitignore +++ b/.gitignore @@ -280,7 +280,7 @@ cache/ *.pptx *.csv *.xlsx -*.pdf +# *.pdf **/Chunks/ *.ipynb diff --git a/backend/app/db/models/epc_property.py b/backend/app/db/models/epc_property.py new file mode 100644 index 00000000..0bbf2add --- /dev/null +++ b/backend/app/db/models/epc_property.py @@ -0,0 +1,658 @@ +from __future__ import annotations + +from typing import Optional +from sqlmodel import SQLModel, Field + +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + EnergyElement, + MainHeatingDetail, + SapBuildingPart, + SapFloorDimension, + SapFlatDetails, + SapWindow, +) + + +class EpcPropertyModel(SQLModel, table=True): + __tablename__ = "epc_property" + + id: Optional[int] = Field(default=None, primary_key=True) + property_id: int = Field(foreign_key="property.id", nullable=False) + portfolio_id: int = Field(foreign_key="portfolio.id", nullable=False) + + # Identity / admin + uprn: Optional[int] = Field(default=None) + uprn_source: Optional[str] = Field(default=None) + report_reference: Optional[str] = Field(default=None) + report_type: Optional[str] = Field(default=None) + assessment_type: Optional[str] = Field(default=None) + sap_version: Optional[float] = Field(default=None) + schema_type: Optional[str] = Field(default=None) + schema_versions_original: Optional[str] = Field(default=None) + status: Optional[str] = Field(default=None) + calculation_software_version: Optional[str] = Field(default=None) + + # Address + address_line_1: Optional[str] = Field(default=None) + address_line_2: Optional[str] = Field(default=None) + post_town: Optional[str] = Field(default=None) + postcode: Optional[str] = Field(default=None) + region_code: Optional[str] = Field(default=None) + country_code: Optional[str] = Field(default=None) + language_code: Optional[str] = Field(default=None) + + # Property description + dwelling_type: str + property_type: Optional[str] = Field(default=None) + built_form: Optional[str] = Field(default=None) + tenure: str + transaction_type: str + inspection_date: str # store as ISO string; cast on read if needed + completion_date: Optional[str] = Field(default=None) + registration_date: Optional[str] = Field(default=None) + total_floor_area_m2: float + measurement_type: Optional[int] = Field(default=None) + + # Flags + solar_water_heating: bool + has_hot_water_cylinder: bool + has_fixed_air_conditioning: bool + has_conservatory: Optional[bool] = Field(default=None) + has_heated_separate_conservatory: Optional[bool] = Field(default=None) + conservatory_type: Optional[int] = Field(default=None) + + # Counts + door_count: int + wet_rooms_count: int + extensions_count: int + heated_rooms_count: int + open_chimneys_count: int + habitable_rooms_count: int + insulated_door_count: int + cfl_fixed_lighting_bulbs_count: int + led_fixed_lighting_bulbs_count: int + incandescent_fixed_lighting_bulbs_count: int + blocked_chimneys_count: Optional[int] = Field(default=None) + draughtproofed_door_count: Optional[int] = Field(default=None) + energy_rating_average: Optional[int] = Field(default=None) + low_energy_fixed_lighting_bulbs_count: Optional[int] = Field(default=None) + fixed_lighting_outlets_count: Optional[int] = Field(default=None) + low_energy_fixed_lighting_outlets_count: Optional[int] = Field(default=None) + number_of_storeys: Optional[int] = Field(default=None) + any_unheated_rooms: Optional[bool] = Field(default=None) + + # Misc + hydro: Optional[bool] = Field(default=None) + photovoltaic_array: Optional[bool] = Field(default=None) + waste_water_heat_recovery: Optional[str] = Field(default=None) + pressure_test: Optional[int] = Field(default=None) + pressure_test_certificate_number: Optional[int] = Field(default=None) + percent_draughtproofed: Optional[int] = Field(default=None) + insulated_door_u_value: Optional[float] = Field(default=None) + multiple_glazed_proportion: Optional[int] = Field(default=None) + windows_transmission_u_value: Optional[float] = Field(default=None) + windows_transmission_data_source: Optional[int] = Field(default=None) + windows_transmission_solar_transmittance: Optional[float] = Field(default=None) + + # Energy source + energy_mains_gas: bool + energy_meter_type: str + energy_pv_battery_count: int + energy_wind_turbines_count: int + energy_gas_smart_meter_present: bool + energy_is_dwelling_export_capable: bool + energy_wind_turbines_terrain_type: str + energy_electricity_smart_meter_present: bool + energy_pv_connection: Optional[str] = Field(default=None) + energy_pv_percent_roof_area: Optional[int] = Field(default=None) + energy_pv_battery_capacity: Optional[float] = Field(default=None) + energy_wind_turbine_hub_height: Optional[float] = Field(default=None) + energy_wind_turbine_rotor_diameter: Optional[float] = Field(default=None) + + # Heating config + heating_cylinder_size: Optional[str] = Field(default=None) + heating_water_heating_code: Optional[int] = Field(default=None) + heating_water_heating_fuel: Optional[int] = Field(default=None) + heating_immersion_heating_type: Optional[str] = Field(default=None) + heating_cylinder_insulation_type: Optional[str] = Field(default=None) + heating_cylinder_thermostat: Optional[str] = Field(default=None) + heating_secondary_fuel_type: Optional[int] = Field(default=None) + heating_secondary_heating_type: Optional[str] = Field(default=None) + heating_cylinder_insulation_thickness_mm: Optional[int] = Field(default=None) + heating_wwhrs_index_number_1: Optional[int] = Field(default=None) + heating_wwhrs_index_number_2: Optional[int] = Field(default=None) + heating_shower_outlet_type: Optional[str] = Field(default=None) + heating_shower_wwhrs: Optional[int] = Field(default=None) + + # Ventilation + ventilation_type: Optional[str] = Field(default=None) + ventilation_draught_lobby: Optional[bool] = Field(default=None) + ventilation_pressure_test: Optional[str] = Field(default=None) + ventilation_open_flues_count: Optional[int] = Field(default=None) + ventilation_closed_flues_count: Optional[int] = Field(default=None) + ventilation_boiler_flues_count: Optional[int] = Field(default=None) + ventilation_other_flues_count: Optional[int] = Field(default=None) + ventilation_extract_fans_count: Optional[int] = Field(default=None) + ventilation_passive_vents_count: Optional[int] = Field(default=None) + ventilation_flueless_gas_fires_count: Optional[int] = Field(default=None) + ventilation_in_pcdf_database: Optional[bool] = Field(default=None) + mechanical_ventilation: Optional[int] = Field(default=None) + mechanical_vent_duct_type: Optional[int] = Field(default=None) + mechanical_vent_duct_placement: Optional[int] = Field(default=None) + mechanical_vent_duct_insulation: Optional[int] = Field(default=None) + mechanical_ventilation_index_number: Optional[int] = Field(default=None) + mechanical_vent_measured_installation: Optional[str] = Field(default=None) + + @classmethod + def from_epc_property_data( + cls, + data: EpcPropertyData, + property_id: int, + portfolio_id: int, + ) -> EpcPropertyModel: + es = data.sap_energy_source + h = data.sap_heating + v = data.sap_ventilation + shower = h.shower_outlets.shower_outlet if h.shower_outlets else None + pv = es.photovoltaic_supply + wt = es.wind_turbine_details + pvb = es.pv_batteries + + return cls( + property_id=property_id, + portfolio_id=portfolio_id, + uprn=data.uprn, + uprn_source=data.uprn_source, + report_reference=data.report_reference, + report_type=data.report_type, + assessment_type=data.assessment_type, + sap_version=data.sap_version, + schema_type=data.schema_type, + schema_versions_original=data.schema_versions_original, + status=data.status, + calculation_software_version=data.calculation_software_version, + address_line_1=data.address_line_1, + address_line_2=data.address_line_2, + post_town=data.post_town, + postcode=data.postcode, + region_code=data.region_code, + country_code=data.country_code, + language_code=data.language_code, + dwelling_type=data.dwelling_type, + property_type=data.property_type, + built_form=data.built_form, + tenure=data.tenure, + transaction_type=data.transaction_type, + inspection_date=data.inspection_date.isoformat(), + completion_date=( + data.completion_date.isoformat() if data.completion_date else None + ), + registration_date=( + data.registration_date.isoformat() if data.registration_date else None + ), + total_floor_area_m2=data.total_floor_area_m2, + measurement_type=data.measurement_type, + solar_water_heating=data.solar_water_heating, + has_hot_water_cylinder=data.has_hot_water_cylinder, + has_fixed_air_conditioning=data.has_fixed_air_conditioning, + has_conservatory=data.has_conservatory, + has_heated_separate_conservatory=data.has_heated_separate_conservatory, + conservatory_type=data.conservatory_type, + door_count=data.door_count, + wet_rooms_count=data.wet_rooms_count, + extensions_count=data.extensions_count, + heated_rooms_count=data.heated_rooms_count, + open_chimneys_count=data.open_chimneys_count, + habitable_rooms_count=data.habitable_rooms_count, + insulated_door_count=data.insulated_door_count, + cfl_fixed_lighting_bulbs_count=data.cfl_fixed_lighting_bulbs_count, + led_fixed_lighting_bulbs_count=data.led_fixed_lighting_bulbs_count, + incandescent_fixed_lighting_bulbs_count=data.incandescent_fixed_lighting_bulbs_count, + blocked_chimneys_count=data.blocked_chimneys_count, + draughtproofed_door_count=data.draughtproofed_door_count, + energy_rating_average=data.energy_rating_average, + low_energy_fixed_lighting_bulbs_count=data.low_energy_fixed_lighting_bulbs_count, + fixed_lighting_outlets_count=data.fixed_lighting_outlets_count, + low_energy_fixed_lighting_outlets_count=data.low_energy_fixed_lighting_outlets_count, + number_of_storeys=data.number_of_storeys, + any_unheated_rooms=data.any_unheated_rooms, + hydro=data.hydro, + photovoltaic_array=data.photovoltaic_array, + waste_water_heat_recovery=data.waste_water_heat_recovery, + pressure_test=data.pressure_test, + pressure_test_certificate_number=data.pressure_test_certificate_number, + percent_draughtproofed=data.percent_draughtproofed, + insulated_door_u_value=data.insulated_door_u_value, + multiple_glazed_proportion=data.multiple_glazed_propertion, + windows_transmission_u_value=( + data.windows_transmission_details.u_value + if data.windows_transmission_details + else None + ), + windows_transmission_data_source=( + data.windows_transmission_details.data_source + if data.windows_transmission_details + else None + ), + windows_transmission_solar_transmittance=( + data.windows_transmission_details.solar_transmittance + if data.windows_transmission_details + else None + ), + energy_mains_gas=es.mains_gas, + energy_meter_type=str(es.meter_type), + energy_pv_battery_count=es.pv_battery_count, + energy_wind_turbines_count=es.wind_turbines_count, + energy_gas_smart_meter_present=es.gas_smart_meter_present, + energy_is_dwelling_export_capable=es.is_dwelling_export_capable, + energy_wind_turbines_terrain_type=str(es.wind_turbines_terrain_type), + energy_electricity_smart_meter_present=es.electricity_smart_meter_present, + energy_pv_connection=( + str(es.pv_connection) if es.pv_connection is not None else None + ), + energy_pv_percent_roof_area=( + pv.none_or_no_details.percent_roof_area if pv else None + ), + energy_pv_battery_capacity=pvb.pv_battery.battery_capacity if pvb else None, + energy_wind_turbine_hub_height=wt.hub_height if wt else None, + energy_wind_turbine_rotor_diameter=wt.rotor_diameter if wt else None, + heating_cylinder_size=( + str(h.cylinder_size) if h.cylinder_size is not None else None + ), + heating_water_heating_code=h.water_heating_code, + heating_water_heating_fuel=h.water_heating_fuel, + heating_immersion_heating_type=( + str(h.immersion_heating_type) + if h.immersion_heating_type is not None + else None + ), + heating_cylinder_insulation_type=( + str(h.cylinder_insulation_type) + if h.cylinder_insulation_type is not None + else None + ), + heating_cylinder_thermostat=h.cylinder_thermostat, + heating_secondary_fuel_type=h.secondary_fuel_type, + heating_secondary_heating_type=( + str(h.secondary_heating_type) + if h.secondary_heating_type is not None + else None + ), + heating_cylinder_insulation_thickness_mm=h.cylinder_insulation_thickness_mm, + heating_wwhrs_index_number_1=h.instantaneous_wwhrs.wwhrs_index_number1, + heating_wwhrs_index_number_2=h.instantaneous_wwhrs.wwhrs_index_number2, + heating_shower_outlet_type=( + str(shower.shower_outlet_type) if shower else None + ), + heating_shower_wwhrs=shower.shower_wwhrs if shower else None, + ventilation_type=v.ventilation_type if v else None, + ventilation_draught_lobby=v.draught_lobby if v else None, + ventilation_pressure_test=v.pressure_test if v else None, + ventilation_open_flues_count=v.open_flues_count if v else None, + ventilation_closed_flues_count=v.closed_flues_count if v else None, + ventilation_boiler_flues_count=v.boiler_flues_count if v else None, + ventilation_other_flues_count=v.other_flues_count if v else None, + ventilation_extract_fans_count=v.extract_fans_count if v else None, + ventilation_passive_vents_count=v.passive_vents_count if v else None, + ventilation_flueless_gas_fires_count=( + v.flueless_gas_fires_count if v else None + ), + ventilation_in_pcdf_database=v.ventilation_in_pcdf_database if v else None, + mechanical_ventilation=data.mechanical_ventilation, + mechanical_vent_duct_type=data.mechanical_vent_duct_type, + mechanical_vent_duct_placement=data.mechanical_vent_duct_placement, + mechanical_vent_duct_insulation=data.mechanical_vent_duct_insulation, + mechanical_ventilation_index_number=data.mechanical_ventilation_index_number, + mechanical_vent_measured_installation=data.mechanical_vent_measured_installation, + ) + + +class EpcPropertyEnergyPerformanceModel(SQLModel, table=True): + __tablename__ = "epc_property_energy_performance" + + id: Optional[int] = Field(default=None, primary_key=True) + epc_property_id: int = Field( + foreign_key="epc_property.id", nullable=False, unique=True + ) + + energy_rating_current: Optional[int] = Field(default=None) + energy_consumption_current: Optional[int] = Field(default=None) + environmental_impact_current: Optional[int] = Field(default=None) + heating_cost_current: Optional[float] = Field(default=None) + lighting_cost_current: Optional[float] = Field(default=None) + hot_water_cost_current: Optional[float] = Field(default=None) + co2_emissions_current: Optional[float] = Field(default=None) + co2_emissions_current_per_floor_area: Optional[int] = Field(default=None) + current_energy_efficiency_band: Optional[str] = Field(default=None) + energy_rating_potential: Optional[float] = Field(default=None) + energy_consumption_potential: Optional[int] = Field(default=None) + environmental_impact_potential: Optional[int] = Field(default=None) + heating_cost_potential: Optional[float] = Field(default=None) + lighting_cost_potential: Optional[float] = Field(default=None) + hot_water_cost_potential: Optional[float] = Field(default=None) + co2_emissions_potential: Optional[float] = Field(default=None) + potential_energy_efficiency_band: Optional[str] = Field(default=None) + + @classmethod + def from_epc_property_data( + cls, data: EpcPropertyData, epc_property_id: int + ) -> EpcPropertyEnergyPerformanceModel: + return cls( + epc_property_id=epc_property_id, + energy_rating_current=data.energy_rating_current, + energy_consumption_current=data.energy_consumption_current, + environmental_impact_current=data.environmental_impact_current, + heating_cost_current=data.heating_cost_current, + lighting_cost_current=data.lighting_cost_current, + hot_water_cost_current=data.hot_water_cost_current, + co2_emissions_current=data.co2_emissions_current, + co2_emissions_current_per_floor_area=data.co2_emissions_current_per_floor_area, + current_energy_efficiency_band=( + data.current_energy_efficiency_band.value + if data.current_energy_efficiency_band + else None + ), + energy_rating_potential=data.energy_rating_potential, + energy_consumption_potential=data.energy_consumption_potential, + environmental_impact_potential=data.environmental_impact_potential, + heating_cost_potential=data.heating_cost_potential, + lighting_cost_potential=data.lighting_cost_potential, + hot_water_cost_potential=data.hot_water_cost_potential, + co2_emissions_potential=data.co2_emissions_potential, + potential_energy_efficiency_band=( + data.potential_energy_efficiency_band.value + if data.potential_energy_efficiency_band + else None + ), + ) + + +class EpcFlatDetailsModel(SQLModel, table=True): + __tablename__ = "epc_flat_details" + + id: Optional[int] = Field(default=None, primary_key=True) + epc_property_id: int = Field( + foreign_key="epc_property.id", nullable=False, unique=True + ) + + level: int + top_storey: str + flat_location: int + heat_loss_corridor: int + storey_count: Optional[int] = Field(default=None) + unheated_corridor_length_m: Optional[int] = Field(default=None) + + @classmethod + def from_domain( + cls, flat: SapFlatDetails, epc_property_id: int + ) -> EpcFlatDetailsModel: + return cls( + epc_property_id=epc_property_id, + level=flat.level, + top_storey=flat.top_storey, + flat_location=flat.flat_location, + heat_loss_corridor=flat.heat_loss_corridor, + storey_count=flat.storey_count, + unheated_corridor_length_m=flat.unheated_corridor_length_m, + ) + + +class EpcMainHeatingDetailModel(SQLModel, table=True): + __tablename__ = "epc_main_heating_detail" + + id: Optional[int] = Field(default=None, primary_key=True) + epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False) + + has_fghrs: bool + main_fuel_type: str + heat_emitter_type: str + emitter_temperature: str + main_heating_control: str + fan_flue_present: Optional[bool] = Field(default=None) + boiler_flue_type: Optional[int] = Field(default=None) + boiler_ignition_type: Optional[int] = Field(default=None) + central_heating_pump_age: Optional[int] = Field(default=None) + central_heating_pump_age_str: Optional[str] = Field(default=None) + main_heating_index_number: Optional[int] = Field(default=None) + sap_main_heating_code: Optional[int] = Field(default=None) + main_heating_number: Optional[int] = Field(default=None) + main_heating_category: Optional[int] = Field(default=None) + main_heating_fraction: Optional[int] = Field(default=None) + main_heating_data_source: Optional[int] = Field(default=None) + condensing: Optional[bool] = Field(default=None) + weather_compensator: Optional[bool] = Field(default=None) + + @classmethod + def from_domain( + cls, detail: MainHeatingDetail, epc_property_id: int + ) -> EpcMainHeatingDetailModel: + return cls( + epc_property_id=epc_property_id, + has_fghrs=detail.has_fghrs, + main_fuel_type=str(detail.main_fuel_type), + heat_emitter_type=str(detail.heat_emitter_type), + emitter_temperature=str(detail.emitter_temperature), + main_heating_control=str(detail.main_heating_control), + fan_flue_present=detail.fan_flue_present, + boiler_flue_type=detail.boiler_flue_type, + boiler_ignition_type=detail.boiler_ignition_type, + central_heating_pump_age=detail.central_heating_pump_age, + central_heating_pump_age_str=detail.central_heating_pump_age_str, + main_heating_index_number=detail.main_heating_index_number, + sap_main_heating_code=detail.sap_main_heating_code, + main_heating_number=detail.main_heating_number, + main_heating_category=detail.main_heating_category, + main_heating_fraction=detail.main_heating_fraction, + main_heating_data_source=detail.main_heating_data_source, + condensing=detail.condensing, + weather_compensator=detail.weather_compensator, + ) + + +class EpcBuildingPartModel(SQLModel, table=True): + __tablename__ = "epc_building_part" + + id: Optional[int] = Field(default=None, primary_key=True) + epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False) + + identifier: str + construction_age_band: str + wall_construction: str + wall_insulation_type: str + wall_thickness_measured: bool + party_wall_construction: str + building_part_number: Optional[int] = Field(default=None) + wall_dry_lined: Optional[bool] = Field(default=None) + wall_thickness_mm: Optional[int] = Field(default=None) + wall_insulation_thickness: Optional[str] = Field(default=None) + floor_heat_loss: Optional[int] = Field(default=None) + floor_insulation_thickness: Optional[str] = Field(default=None) + flat_roof_insulation_thickness: Optional[str] = Field(default=None) + floor_type: Optional[str] = Field(default=None) + floor_construction_type: Optional[str] = Field(default=None) + floor_insulation_type_str: Optional[str] = Field(default=None) + floor_u_value_known: Optional[bool] = Field(default=None) + roof_construction: Optional[int] = Field(default=None) + roof_insulation_location: Optional[str] = Field(default=None) + roof_insulation_thickness: Optional[str] = Field(default=None) + room_in_roof_floor_area: Optional[float] = Field(default=None) + room_in_roof_construction_age_band: Optional[str] = Field(default=None) + alt_wall_1_area: Optional[float] = Field(default=None) + alt_wall_1_dry_lined: Optional[str] = Field(default=None) + alt_wall_1_construction: Optional[int] = Field(default=None) + alt_wall_1_insulation_type: Optional[int] = Field(default=None) + alt_wall_1_thickness_measured: Optional[str] = Field(default=None) + alt_wall_1_insulation_thickness: Optional[str] = Field(default=None) + alt_wall_2_area: Optional[float] = Field(default=None) + alt_wall_2_dry_lined: Optional[str] = Field(default=None) + alt_wall_2_construction: Optional[int] = Field(default=None) + alt_wall_2_insulation_type: Optional[int] = Field(default=None) + alt_wall_2_thickness_measured: Optional[str] = Field(default=None) + alt_wall_2_insulation_thickness: Optional[str] = Field(default=None) + + @classmethod + def from_domain( + cls, part: SapBuildingPart, epc_property_id: int + ) -> EpcBuildingPartModel: + rir = part.sap_room_in_roof + aw1 = part.sap_alternative_wall_1 + aw2 = part.sap_alternative_wall_2 + return cls( + epc_property_id=epc_property_id, + identifier=part.identifier, + construction_age_band=part.construction_age_band, + wall_construction=str(part.wall_construction), + wall_insulation_type=str(part.wall_insulation_type), + wall_thickness_measured=part.wall_thickness_measured, + party_wall_construction=str(part.party_wall_construction), + building_part_number=part.building_part_number, + wall_dry_lined=part.wall_dry_lined, + wall_thickness_mm=part.wall_thickness_mm, + wall_insulation_thickness=part.wall_insulation_thickness, + floor_heat_loss=part.floor_heat_loss, + floor_insulation_thickness=part.floor_insulation_thickness, + flat_roof_insulation_thickness=( + str(part.flat_roof_insulation_thickness) + if part.flat_roof_insulation_thickness is not None + else None + ), + floor_type=part.floor_type, + floor_construction_type=part.floor_construction_type, + floor_insulation_type_str=part.floor_insulation_type_str, + floor_u_value_known=part.floor_u_value_known, + roof_construction=part.roof_construction, + roof_insulation_location=( + str(part.roof_insulation_location) + if part.roof_insulation_location is not None + else None + ), + roof_insulation_thickness=( + str(part.roof_insulation_thickness) + if part.roof_insulation_thickness is not None + else None + ), + room_in_roof_floor_area=float(rir.floor_area) if rir else None, + room_in_roof_construction_age_band=( + rir.construction_age_band if rir else None + ), + alt_wall_1_area=aw1.wall_area if aw1 else None, + alt_wall_1_dry_lined=aw1.wall_dry_lined if aw1 else None, + alt_wall_1_construction=aw1.wall_construction if aw1 else None, + alt_wall_1_insulation_type=aw1.wall_insulation_type if aw1 else None, + alt_wall_1_thickness_measured=aw1.wall_thickness_measured if aw1 else None, + alt_wall_1_insulation_thickness=( + aw1.wall_insulation_thickness if aw1 else None + ), + alt_wall_2_area=aw2.wall_area if aw2 else None, + alt_wall_2_dry_lined=aw2.wall_dry_lined if aw2 else None, + alt_wall_2_construction=aw2.wall_construction if aw2 else None, + alt_wall_2_insulation_type=aw2.wall_insulation_type if aw2 else None, + alt_wall_2_thickness_measured=aw2.wall_thickness_measured if aw2 else None, + alt_wall_2_insulation_thickness=( + aw2.wall_insulation_thickness if aw2 else None + ), + ) + + +class EpcFloorDimensionModel(SQLModel, table=True): + __tablename__ = "epc_floor_dimension" + + id: Optional[int] = Field(default=None, primary_key=True) + epc_building_part_id: int = Field( + foreign_key="epc_building_part.id", nullable=False + ) + + floor: Optional[int] = Field(default=None) + room_height_m: float + total_floor_area_m2: float + party_wall_length_m: float + heat_loss_perimeter_m: float + floor_insulation: Optional[int] = Field(default=None) + floor_construction: Optional[int] = Field(default=None) + + @classmethod + def from_domain( + cls, dim: SapFloorDimension, epc_building_part_id: int + ) -> EpcFloorDimensionModel: + return cls( + epc_building_part_id=epc_building_part_id, + floor=dim.floor, + room_height_m=dim.room_height_m, + total_floor_area_m2=dim.total_floor_area_m2, + party_wall_length_m=dim.party_wall_length_m, + heat_loss_perimeter_m=dim.heat_loss_perimeter_m, + floor_insulation=dim.floor_insulation, + floor_construction=dim.floor_construction, + ) + + +class EpcWindowModel(SQLModel, table=True): + __tablename__ = "epc_window" + + id: Optional[int] = Field(default=None, primary_key=True) + epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False) + + pvc_frame: str + glazing_gap: str + orientation: str + window_type: str + glazing_type: str + window_width: float + window_height: float + draught_proofed: bool + window_location: str + window_wall_type: str + permanent_shutters_present: bool + frame_factor: Optional[float] = Field(default=None) + permanent_shutters_insulated: Optional[str] = Field(default=None) + transmission_u_value: Optional[float] = Field(default=None) + transmission_data_source: Optional[int] = Field(default=None) + transmission_solar_transmittance: Optional[float] = Field(default=None) + + @classmethod + def from_domain(cls, window: SapWindow, epc_property_id: int) -> EpcWindowModel: + td = window.window_transmission_details + return cls( + epc_property_id=epc_property_id, + pvc_frame=str(window.pvc_frame), + glazing_gap=str(window.glazing_gap), + orientation=str(window.orientation), + window_type=str(window.window_type), + glazing_type=str(window.glazing_type), + window_width=window.window_width, + window_height=window.window_height, + draught_proofed=bool(window.draught_proofed), + window_location=str(window.window_location), + window_wall_type=str(window.window_wall_type), + permanent_shutters_present=bool(window.permanent_shutters_present), + frame_factor=window.frame_factor, + permanent_shutters_insulated=window.permanent_shutters_insulated, + transmission_u_value=td.u_value if td else None, + transmission_data_source=td.data_source if td else None, + transmission_solar_transmittance=td.solar_transmittance if td else None, + ) + + +class EpcEnergyElementModel(SQLModel, table=True): + __tablename__ = "epc_energy_element" + + id: Optional[int] = Field(default=None, primary_key=True) + epc_property_id: int = Field(foreign_key="epc_property.id", nullable=False) + + element_type: str # roof | wall | floor | main_heating | window | lighting | hot_water | secondary_heating | main_heating_controls + description: str + energy_efficiency_rating: int + environmental_efficiency_rating: int + + @classmethod + def from_domain( + cls, element: EnergyElement, element_type: str, epc_property_id: int + ) -> EpcEnergyElementModel: + return cls( + epc_property_id=epc_property_id, + element_type=element_type, + description=element.description, + energy_efficiency_rating=element.energy_efficiency_rating, + environmental_efficiency_rating=element.environmental_efficiency_rating, + ) diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 48f8b1ed..452c8d36 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -113,8 +113,8 @@ class PropertyModel(Base): Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False, ) - address = Column(Text) - postcode = Column(Text) + address = Column(Text, nullable=False) + postcode = Column(Text, nullable=False) has_pre_condition_report = Column(Boolean) has_recommendations = Column(Boolean) created_at = Column( diff --git a/backend/documents_parser/__init__.py b/backend/documents_parser/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/documents_parser/extractor.py b/backend/documents_parser/extractor.py new file mode 100644 index 00000000..822f7907 --- /dev/null +++ b/backend/documents_parser/extractor.py @@ -0,0 +1,692 @@ +from datetime import datetime +from typing import List, Optional + +from datatypes.epc.surveys.pashub_rdsap_site_notes import ( + BuildingConstruction, + InspectionMetadata, + BuildingMeasurements, + Conservatories, + CustomerResponse, + ExtensionConstruction, + ExtensionMeasurements, + ExtensionRoofSpace, + FloorConstruction, + FloorMeasurement, + General, + HeatingAndHotWater, + MainBuildingConstruction, + MainBuildingMeasurements, + MainHeating, + PasHubRdSapSiteNotes, + Renewables, + RoomCountElements, + RoofSpace, + RoofSpaceDetail, + SecondaryHeating, + Shower, + SurveyAddendum, + Ventilation, + WaterHeating, + WaterUse, + Window, +) + + +class PasHubRdSapSiteNotesExtractor: + def __init__(self, text_list: list[str]) -> None: + self.text_list = text_list + + # --- generic helpers --- + + def _get_in_doc(self, key: str, offset: int = 1) -> Optional[str]: + return self._get_in(self.text_list, key, offset) + + def _bool(self, key: str, offset: int = 1) -> bool: + val = self._get_in_doc(key, offset) + return val is not None and val.lower() == "yes" + + def _get_in( + self, list_to_process: List[str], key: str, offset: int = 1 + ) -> Optional[str]: + try: + idx = list_to_process.index(key) + return list_to_process[idx + offset].strip() or None + except (ValueError, IndexError): + return None + + def _bool_in(self, list_to_process: List[str], key: str, offset: int = 1) -> bool: + val = self._get_in(list_to_process, key, offset) + return val is not None and val.lower() == "yes" + + def _optional_bool_in(self, list_to_process: List[str], key: str) -> Optional[bool]: + val = self._get_in(list_to_process, key) + return None if val is None else val.lower() == "yes" + + def _is_known_in(self, list_to_process: List[str], key: str) -> bool: + val = self._get_in(list_to_process, key) + return val is not None and val.lower() != "not known" + + def _wall_thickness_in(self, list_to_process: List[str]) -> int: + val = self._get_in(list_to_process, "Wall thickness:") + return int(val.split()[0]) if val else 0 + + def _section(self, start: str, end: str) -> List[str]: + try: + start_idx = self.text_list.index(start) + end_idx = self.text_list.index(end, start_idx) + return self.text_list[start_idx:end_idx] + except ValueError: + return [] + + # --- public extract methods --- + + def extract_inspection_metadata(self) -> InspectionMetadata: + try: + addr_start = self.text_list.index("Property Address:") + 1 + addr_end = self.text_list.index("Property Photo", addr_start) + property_address = ", ".join( + t.rstrip(",") for t in self.text_list[addr_start:addr_end] + ) + except ValueError: + property_address = "" + + created_on_raw = self._get_in_doc("Created On:") + created_on = ( + datetime.strptime(created_on_raw, "%d %B %Y").strftime("%Y-%m-%d") + if created_on_raw + else "" + ) + + date_of_inspection_raw = self._get_in_doc("Date of Inspection:") + if not date_of_inspection_raw: + raise ValueError("Date of Inspection not found in document") + date_of_inspection = datetime.strptime( + date_of_inspection_raw, "%d %B %Y" + ).date() + + return InspectionMetadata( + inspection_surveyor=self._get_in_doc("Inspection Surveyor:") or "", + email_address=self._get_in_doc("E-Mail Address:") or "", + report_reference=self._get_in_doc("Report Reference:") or "", + created_on=created_on, + date_of_inspection=date_of_inspection, + property_address=property_address, + property_photo="Property Photo" in self.text_list, + ) + + def extract(self) -> PasHubRdSapSiteNotes: + return PasHubRdSapSiteNotes( + inspection_metadata=self.extract_inspection_metadata(), + general=self.extract_general(), + building_construction=self.extract_building_construction(), + building_measurements=self.extract_building_measurements(), + roof_space=self.extract_roof_space(), + windows=self.extract_windows(), + heating_and_hot_water=self.extract_heating_and_hot_water(), + ventilation=self.extract_ventilation(), + conservatories=self.extract_conservatories(), + renewables=self.extract_renewables(), + room_count_elements=self.extract_room_count_elements(), + water_use=self.extract_water_use(), + customer_response=self.extract_customer_response(), + addendum=self.extract_addendum(), + ) + + def extract_general(self) -> General: + inspection_date_raw = self._get_in_doc("Inspection Date:") + if not inspection_date_raw: + raise ValueError("Inspection Date not found in document") + inspection_date = datetime.strptime(inspection_date_raw, "%d/%m/%Y").date() + + storeys_raw = self._get_in_doc("Number of storeys:") or "0" + extensions_raw = self._get_in_doc("Number of Extensions:") or "0" + _extensions_first = extensions_raw.split()[0] + extensions_count = int(_extensions_first) if _extensions_first.isdigit() else 0 + + return General( + epc_checked_before_assessment=self._bool( + "Confirm you have checked for the existence of an", offset=2 + ), + epc_exists_at_point_of_assessment=self._bool( + "Does an EPC exist at the point of carrying out this", offset=2 + ), + inspection_date=inspection_date, + transaction_type=self._get_in_doc("Transaction Type:") or "", + tenure=self._get_in_doc("Tenure:") or "", + property_type=self._get_in_doc("Type of Property:") or "", + detachment_type=self._get_in_doc("Detachment Type:") or "", + number_of_storeys=int(storeys_raw.split()[0]), + terrain_type=self._get_in_doc("Terrain Type:") or "", + number_of_extensions=extensions_count, + electricity_smart_meter=self._bool( + "Is an electricity smart meter present?" + ), + electric_meter_type=self._get_in_doc("Electric meter type:") or "", + dwelling_export_capable=self._bool("Is the dwelling export-capable?"), + mains_gas_available=self._bool("Is mains gas available?"), + gas_smart_meter=self._bool("Is there a gas smart meter?"), + gas_meter_accessible=self._bool("Is the gas meter accessible?"), + measurements_location=self._get_in_doc("Select Measurements Location:") + or "", + ) + + def extract_building_construction(self) -> BuildingConstruction: + bc_section = self._section("Building Construction", "Building Measurements") + + # Find extension markers within this section + extension_markers = [] + i = 1 + while f"Extension {i}" in bc_section: + extension_markers.append(f"Extension {i}") + i += 1 + + # Slice main building data: from "Main Building" to first extension or end + main_start = bc_section.index("Main Building") + main_end = ( + bc_section.index(extension_markers[0]) + if extension_markers + else len(bc_section) + ) + main_data = bc_section[main_start:main_end] + + # Slice each extension's data + extensions = [] + for n, marker in enumerate(extension_markers): + ext_start = bc_section.index(marker) + ext_end = ( + bc_section.index(extension_markers[n + 1]) + if n + 1 < len(extension_markers) + else len(bc_section) + ) + ext_data = bc_section[ext_start:ext_end] + extensions.append(self._parse_extension_construction(n + 1, ext_data)) + + return BuildingConstruction( + main_building=self._parse_main_building_construction(main_data), + floor=self._parse_floor_construction(main_data), + extensions=extensions if extensions else None, + ) + + # --- private parsing helpers --- + + def _parse_main_building_construction( + self, data: List[str] + ) -> MainBuildingConstruction: + return MainBuildingConstruction( + age_range=self._get_in(data, "Age Range:") or "", + age_indicators=self._get_in(data, "Record indicators of property age:") + or "", + walls_construction_type=self._get_in(data, "Walls - Construction Type:") + or "", + cavity_construction_indicators=self._get_in( + data, "Record external indicators of Cavity Construction:" + ) + or "", + walls_insulation_type=self._get_in(data, "Walls - Insulation Type:") or "", + filled_cavity_indicators=self._get_in( + data, "Record indicators of filled cavity:" + ), + thermal_conductivity_of_wall_insulation=self._get_in( + data, "Thermal conductivity of wall insulation:" + ) + or "", + wall_u_value_known=self._is_known_in(data, "Wall U-Value known?"), + wall_thickness_mm=self._wall_thickness_in(data), + party_wall_construction_type=self._get_in( + data, "Party wall construction type:" + ) + or "", + ) + + def _parse_extension_construction( + self, ext_id: int, data: List[str] + ) -> ExtensionConstruction: + return ExtensionConstruction( + id=ext_id, + age_range=self._get_in(data, "Age Range:") or "", + age_indicators=self._get_in(data, "Record indicators of property age:") + or "", + walls_construction_type=self._get_in(data, "Walls - Construction Type:") + or "", + cavity_construction_indicators=self._get_in( + data, "Record external indicators of Cavity Construction:" + ) + or "", + walls_insulation_type=self._get_in(data, "Walls - Insulation Type:") or "", + filled_cavity_indicators=self._get_in( + data, "Record indicators of filled cavity:" + ), + thermal_conductivity_of_wall_insulation=self._get_in( + data, "Thermal conductivity of wall insulation:" + ) + or "", + wall_u_value_known=self._is_known_in(data, "Wall U-Value known?"), + wall_thickness_mm=self._wall_thickness_in(data), + party_wall_construction_type=self._get_in( + data, "Party wall construction type:" + ) + or "", + ) + + def extract_building_measurements(self) -> BuildingMeasurements: + bm_section = self._section("Building Measurements", "Roof Space") + + extension_markers = [] + i = 1 + while f"Extension {i}" in bm_section: + extension_markers.append(f"Extension {i}") + i += 1 + + main_start = bm_section.index("Main Building") + main_end = ( + bm_section.index(extension_markers[0]) + if extension_markers + else len(bm_section) + ) + main_floors = self._parse_floor_measurements(bm_section[main_start:main_end]) + + extensions = [] + for n, marker in enumerate(extension_markers): + ext_start = bm_section.index(marker) + ext_end = ( + bm_section.index(extension_markers[n + 1]) + if n + 1 < len(extension_markers) + else len(bm_section) + ) + extensions.append( + ExtensionMeasurements( + id=n + 1, + floors=self._parse_floor_measurements( + bm_section[ext_start:ext_end] + ), + ) + ) + + return BuildingMeasurements( + main_building=MainBuildingMeasurements(floors=main_floors), + extensions=extensions if extensions else None, + ) + + def extract_roof_space(self) -> RoofSpace: + rs_section = self._section("Roof Space", "Windows") + + extension_markers = [] + i = 1 + while f"Extension {i}" in rs_section: + extension_markers.append(f"Extension {i}") + i += 1 + + main_start = rs_section.index("Main Building") + main_end = ( + rs_section.index(extension_markers[0]) + if extension_markers + else len(rs_section) + ) + main_data = rs_section[main_start:main_end] + + extensions = [] + for n, marker in enumerate(extension_markers): + ext_start = rs_section.index(marker) + ext_end = ( + rs_section.index(extension_markers[n + 1]) + if n + 1 < len(extension_markers) + else len(rs_section) + ) + ext_data = rs_section[ext_start:ext_end] + extensions.append(self._parse_extension_roof_space(n + 1, ext_data)) + + return RoofSpace( + main_building=self._parse_roof_space_detail(main_data), + extensions=extensions if extensions else None, + ) + + def extract_windows(self) -> List[Window]: + w_section = self._section("Windows", "Heating & Hot Water") + + windows = [] + n = 1 + while f"Window {n}" in w_section: + start = w_section.index(f"Window {n}") + end = ( + w_section.index(f"Window {n + 1}") + if f"Window {n + 1}" in w_section + else len(w_section) + ) + windows.append(self._parse_window(n, w_section[start:end])) + n += 1 + + return windows + + def extract_heating_and_hot_water(self) -> HeatingAndHotWater: + hhw_section = self._section("Heating & Hot Water", "Ventilation") + return HeatingAndHotWater( + main_heating=self._parse_main_heating(hhw_section), + secondary_heating=self._parse_secondary_heating(hhw_section), + water_heating=self._parse_water_heating(hhw_section), + ) + + def extract_ventilation(self) -> Ventilation: + v_section = self._section("Ventilation", "Conservatories") + return Ventilation( + ventilation_type=self._get_in(v_section, "Ventilation type:") or "", + has_fixed_air_conditioning=self._bool_in( + v_section, "Has fixed air conditioning?" + ), + number_of_open_flues=int( + self._get_in(v_section, "Number of open flues:") or 0 + ), + number_of_closed_flues=int( + self._get_in(v_section, "Number of closed flues:") or 0 + ), + number_of_boiler_flues=int( + self._get_in(v_section, "Number of boiler flues:") or 0 + ), + number_of_other_flues=int( + self._get_in(v_section, "Number of other flues:") or 0 + ), + number_of_extract_fans=int( + self._get_in(v_section, "Number of extract fans:") or 0 + ), + number_of_passive_vents=int( + self._get_in(v_section, "Number of passive vents:") or 0 + ), + number_of_flueless_gas_fires=int( + self._get_in(v_section, "Number of flueless gas fires:") or 0 + ), + pressure_test=self._get_in(v_section, "Pressure test:") or "", + draught_lobby=self._bool_in(v_section, "Is there a draught lobby?"), + ventilation_in_pcdf_database=self._optional_bool_in( + v_section, "Is the ventilation in the PCDF database?" + ), + ) + + def extract_conservatories(self) -> Conservatories: + c_section = self._section("Conservatories", "Renewables") + val = self._get_in(c_section, "Is there conservatory?") + return Conservatories( + has_conservatory=val is not None and val.lower() != "no conservatory" + ) + + def extract_renewables(self) -> Renewables: + r_section = self._section("Renewables", "Room Count Elements") + batteries_raw = self._get_in(r_section, "Number of PV batteries:") + batteries = ( + 0 + if batteries_raw is None or batteries_raw.lower() == "none" + else int(batteries_raw) + ) + pv_connection = self._get_in(r_section, "PV Connection:") + percent_raw = self._get_in(r_section, "Percentage of roof covered with photovoltaic array?") + percent_roof = int(percent_raw.split()[0]) if percent_raw else None + return Renewables( + wind_turbines=self._bool_in(r_section, "Has wind turbines?"), + solar_hot_water=self._bool_in(r_section, "Has solar hot water?"), + photovoltaic_array=self._bool_in(r_section, "Has photovoltaic array?"), + number_of_pv_batteries=batteries, + hydro=self._bool_in(r_section, "Is the dwelling connected to Hydro?"), + pv_connection=pv_connection, + percent_roof_covered_pv=percent_roof, + ) + + def extract_room_count_elements(self) -> RoomCountElements: + rce_section = self._section("Room Count Elements", "Customer Response") + heated_rooms_raw = self._get_in(rce_section, "Number of heated rooms?") + return RoomCountElements( + number_of_habitable_rooms=int( + self._get_in(rce_section, "Number of habitable rooms?") or 0 + ), + any_unheated_rooms=self._bool_in( + rce_section, "Are any of these rooms unheated?" + ), + number_of_heated_rooms=int(heated_rooms_raw) if heated_rooms_raw else None, + number_of_external_doors=int( + self._get_in(rce_section, "Number of external doors?") or 0 + ), + number_of_insulated_external_doors=int( + self._get_in(rce_section, "Number of insulated external doors?") or 0 + ), + number_of_draughtproofed_external_doors=int( + self._get_in(rce_section, "Number of draughtproofed external doors?") + or 0 + ), + number_of_open_chimneys=int( + self._get_in(rce_section, "Number of open chimneys?") or 0 + ), + number_of_blocked_chimneys=int( + self._get_in(rce_section, "Number of blocked chimneys?") or 0 + ), + number_of_fixed_incandescent_bulbs=int( + self._get_in(rce_section, "Number of fixed incandescent bulbs:") or 0 + ), + exact_led_cfl_count_known=self._bool_in( + rce_section, "Is the exact number of LED and CFL bulbs known?" + ), + number_of_fixed_led_bulbs=int( + self._get_in(rce_section, "Number of fixed LED bulbs:") or 0 + ), + number_of_fixed_cfl_bulbs=int( + self._get_in(rce_section, "Number of fixed CFL bulbs:") or 0 + ), + waste_water_heat_recovery=self._get_in( + rce_section, "Are there any waste water heat recovery systems?" + ) + or "", + ) + + def extract_water_use(self) -> WaterUse: + wu_section = self._section("Room Count Elements", "Customer Response") + baths_raw = self._get_in(wu_section, "Number of baths:") or "0" + special_raw = ( + self._get_in( + wu_section, "How many special features are there at the", offset=2 + ) + or "0" + ) + + showers = [] + n = 1 + while f"Shower {n}" in wu_section: + start = wu_section.index(f"Shower {n}") + end = ( + wu_section.index(f"Shower {n + 1}") + if f"Shower {n + 1}" in wu_section + else len(wu_section) + ) + shower_data = wu_section[start:end] + showers.append( + Shower( + id=n, + outlet_type=self._get_in(shower_data, "Shower outlet type:") or "", + ) + ) + n += 1 + + return WaterUse( + number_of_baths=int(baths_raw), + number_of_special_features=int(special_raw), + showers=showers, + ) + + def extract_customer_response(self) -> CustomerResponse: + cr_section = self._section( + "Customer Response", "Addendum + Related Party Disclosure" + ) + return CustomerResponse( + customer_present=self._bool_in(cr_section, "Customer present?"), + willing_to_answer_satisfaction_survey=self._bool_in( + cr_section, "Customer willing to answer satisfaction survey?" + ), + ) + + def extract_addendum(self) -> SurveyAddendum: + a_section = self._section( + "Addendum + Related Party Disclosure", "Photographs Required" + ) + return SurveyAddendum( + addendum=self._get_in(a_section, "Addendum") or "", + related_party_disclosure=self._get_in(a_section, "Related party disclosure") + or "", + hard_to_treat_cavity_access_issues=self._bool_in( + a_section, + "Hard to treat cavity walls: Property has access", + offset=2, + ), + hard_to_treat_cavity_high_exposure=self._bool_in( + a_section, + "Hard to treat cavity walls: Property has high", + offset=2, + ), + hard_to_treat_cavity_narrow_cavities=self._bool_in( + a_section, + "Hard to treat cavity walls: Property has narrow", + offset=2, + ), + ) + + def _parse_main_heating(self, data: List[str]) -> MainHeating: + return MainHeating( + selection_method=self._get_in( + data, "How would you like to select the Heating System?" + ) + or "", + system_type=self._get_in(data, "System type:") or "", + product_id=int(self._get_in(data, "Product Id") or 0), + manufacturer=self._get_in(data, "Manufacturer") or "", + model=self._get_in(data, "Model") or "", + orig_manufacturer=self._get_in(data, "Orig Manuf") or "", + fuel=self._get_in(data, "Fuel") or "", + summer_efficiency=float(self._get_in(data, "S. Efficiency") or 0), + type=self._get_in(data, "Type") or "", + condensing=self._bool_in(data, "Condensing"), + year=self._get_in(data, "Year") or "", + mount=self._get_in(data, "Mount") or "", + open_flue=self._get_in(data, "Open Flue") or "", + fan_assist=self._bool_in(data, "Fan Assist"), + status=self._get_in(data, "Status") or "", + central_heating_pump_age=self._get_in(data, "Central heating pump age:") + or "", + controls=self._get_in(data, "Controls:") or "", + flue_gas_heat_recovery_system=self._bool_in( + data, "Does the boiler have a Flue Gas Heat Recover", offset=2 + ), + weather_compensator=self._bool_in(data, "Is there a weather compensator?"), + emitter=self._get_in(data, "Emitter:") or "", + emitter_temperature=self._get_in(data, "Emitter Temperature:") or "", + ) + + def _parse_secondary_heating(self, data: List[str]) -> SecondaryHeating: + system_raw = self._get_in(data, "Secondary System:") + return SecondaryHeating( + secondary_fuel=self._get_in(data, "Secondary Fuel") or "", + secondary_system=system_raw if system_raw else None, + ) + + def _parse_water_heating(self, data: List[str]) -> WaterHeating: + thickness_raw = self._get_in(data, "Insulation Thickness (mm):") or self._get_in(data, "Thickness:") + thickness_mm = int(thickness_raw.split()[0]) if thickness_raw else None + return WaterHeating( + type=self._get_in(data, "Water Heating Type:") or "", + system=self._get_in(data, "Water Heating System:") or "", + cylinder_size=self._get_in(data, "Cylinder Size:") or "", + cylinder_measured_heat_loss=self._get_in( + data, "Cylinder Measured Heat Loss:" + ), + insulation_type=self._get_in(data, "Insulation Type:"), + insulation_thickness_mm=thickness_mm, + has_thermostat=self._optional_bool_in(data, "Cylinder Thermostat:") or self._optional_bool_in(data, "Has thermostat?"), + immersion_type=self._get_in(data, "Immersion:"), + ) + + def _parse_window(self, window_id: int, data: List[str]) -> Window: + height_raw = self._get_in(data, "Window height:") + width_raw = self._get_in(data, "Window width:") + return Window( + id=window_id, + location=self._get_in(data, "Window location:") or "", + wall_type=self._get_in(data, "Window wall type:") or "", + glazing_type=self._get_in(data, "Glazing Type:") or "", + window_type=self._get_in(data, "Window type:") or "", + frame_type=self._get_in(data, "Window frame type:") or "", + glazing_gap=self._get_in(data, "What size is the glazing gap?") or "", + draught_proofed=self._bool_in(data, "Is the window draught proofed?"), + permanent_shutters=self._bool_in( + data, "Are there permanent shutters present?" + ), + height_m=float(height_raw.split()[0]) if height_raw else 0.0, + width_m=float(width_raw.split()[0]) if width_raw else 0.0, + orientation=self._get_in(data, "Orientation:") or "", + ) + + def _parse_insulation_thickness( + self, val: Optional[str] + ) -> tuple[Optional[int], Optional[str]]: + if val is None: + return None, None + try: + return int(val.split()[0]), None + except (ValueError, IndexError): + return None, val + + def _parse_roof_space_detail(self, data: List[str]) -> RoofSpaceDetail: + thickness_mm, thickness_str = self._parse_insulation_thickness( + self._get_in(data, "Roofs - Insulation Thickness:") + ) + return RoofSpaceDetail( + construction_type=self._get_in(data, "Roofs - Construction Type:") or "", + insulation_at=self._get_in(data, "Roofs - Insulation At:") or "", + roof_u_value_known=self._is_known_in(data, "Roof U-Value:"), + cavity_wall_construction_indicators=self._get_in( + data, "Record indicators of Cavity Wall Construction in roof", offset=2 + ) + or "", + rooms_in_roof=self._bool_in(data, "Are there rooms in the roof?"), + insulation_thickness_mm=thickness_mm, + insulation_thickness=thickness_str, + ) + + def _parse_extension_roof_space( + self, ext_id: int, data: List[str] + ) -> ExtensionRoofSpace: + thickness_mm, thickness_str = self._parse_insulation_thickness( + self._get_in(data, "Roofs - Insulation Thickness:") + ) + return ExtensionRoofSpace( + id=ext_id, + construction_type=self._get_in(data, "Roofs - Construction Type:") or "", + insulation_at=self._get_in(data, "Roofs - Insulation At:") or "", + roof_u_value_known=self._is_known_in(data, "Roof U-Value:"), + cavity_wall_construction_indicators=self._get_in( + data, "Record indicators of Cavity Wall Construction in roof", offset=2 + ) + or "", + rooms_in_roof=self._bool_in(data, "Are there rooms in the roof?"), + insulation_thickness_mm=thickness_mm, + insulation_thickness=thickness_str, + ) + + def _parse_floor_measurements(self, data: List[str]) -> List[FloorMeasurement]: + floors = [] + i = 0 + while i < len(data): + if data[i].startswith("Floor") and i + 4 < len(data): + floors.append( + FloorMeasurement( + name=data[i], + area_m2=float(data[i + 1]), + height_m=float(data[i + 2]), + heat_loss_perimeter_m=float(data[i + 3]), + pwl_m=float(data[i + 4]), + ) + ) + i += 5 + else: + i += 1 + return floors + + def _parse_floor_construction(self, data: List[str]) -> FloorConstruction: + return FloorConstruction( + floor_type=self._get_in(data, "Floor type:") or "", + floor_construction=self._get_in(data, "Floor Construction:") or "", + floor_insulation_type=self._get_in(data, "Floor Insulation Type:") or "", + floor_u_value_known=self._is_known_in(data, "Floor U-Value known?"), + ) diff --git a/backend/documents_parser/handler/handler.py b/backend/documents_parser/handler/handler.py new file mode 100644 index 00000000..09b84092 --- /dev/null +++ b/backend/documents_parser/handler/handler.py @@ -0,0 +1,39 @@ +import os +from typing import Any, Mapping + + +from utils.logger import setup_logger +from utils.s3 import upload_file_to_s3 + +logger = setup_logger() + +BUCKET = "retrofit-energy-assessments-dev" +PDF_S3_KEY = "example/SiteNotesExample.pdf" +PDF_LOCAL_PATH = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "tests", + "test_data", + "SiteNotesExample.pdf", +) + + +def upload_pdf(local_path: str, bucket: str, key: str) -> None: + logger.info(f"Uploading {local_path} to s3://{bucket}/{key}") + upload_file_to_s3(local_path, bucket, key) + logger.info("Upload complete") + + +def handler(event: Mapping[str, Any], context: Any) -> None: + logger.info("Entered handler") + + output_path = os.path.join(os.path.dirname(__file__), "..", "textract_blocks.json") + + upload_pdf(PDF_LOCAL_PATH, BUCKET, PDF_S3_KEY) + + logger.info(f"Uploaded file to {output_path}") + + +if __name__ == "__main__": + handler({}, None) diff --git a/backend/documents_parser/pdf.py b/backend/documents_parser/pdf.py new file mode 100644 index 00000000..dfa07300 --- /dev/null +++ b/backend/documents_parser/pdf.py @@ -0,0 +1,12 @@ +from typing import List + +import pymupdf + + +def pdf_to_text_list(pdf_bytes: bytes) -> List[str]: + tokens: List[str] = [] + with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc: + for page in doc: + for line in page.get_text().split("\n"): + tokens.append(line) + return tokens diff --git a/backend/documents_parser/tests/__init__.py b/backend/documents_parser/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf b/backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf new file mode 100644 index 00000000..361482ee Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ExampleSiteNotes.pdf differ diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_2.pdf b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_2.pdf new file mode 100644 index 00000000..92ffb176 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_2.pdf differ diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_3.pdf b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_3.pdf new file mode 100644 index 00000000..4147aac7 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_3.pdf differ diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_4.pdf b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_4.pdf new file mode 100644 index 00000000..ea913eeb Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_4.pdf differ diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_5.pdf b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_5.pdf new file mode 100644 index 00000000..25cc8204 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_5.pdf differ diff --git a/backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf new file mode 100644 index 00000000..f19446bd Binary files /dev/null and b/backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf differ diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_2_text.json b/backend/documents_parser/tests/fixtures/site_notes_example_2_text.json new file mode 100644 index 00000000..fb4ae41c --- /dev/null +++ b/backend/documents_parser/tests/fixtures/site_notes_example_2_text.json @@ -0,0 +1,737 @@ +[ + "SMART EPC: Record of", + "Inspection & Site Notes", + "Inspection Surveyor:", + "Rebecca Mcwilliam", + "E-Mail Address:", + "rebeccamcdea@gmail.com", + "Report Reference:", + "ADD992DF-BE9B-47EB-8121-4C6143C6A7C9", + "Created On:", + "15 October 2025", + "Date of Inspection:", + "13 October 2025", + "Property Address:", + "13,", + "Sandfield Avenue ,", + "Wrenbury,", + "CW5 8EU", + "Property Photo", + "Page 1", + "", + "Photo of electricity meter:", + "Photo of electricity meter:", + "Photo of electricity meter:", + "Photo of electricity meter:", + "RdSAP Assessment", + "General", + "Confirm you have checked for the existence of an", + "EPC before carrying out another energy assessment.", + "Yes", + "Does an EPC exist at the point of carrying out this", + "energy assessment?", + "Yes", + "Please select why another energy assessment needs", + "to be undertaken:", + "Assessor instructed to produce a new EPC upon request from building", + "owner/tenant/landlord after confirming to the requestor that a valid EPC", + "already exists", + "Inspection Date:", + "13/10/2025", + "Transaction Type:", + "None of the Above", + "Tenure:", + "Rented Social", + "Type of Property:", + "House", + "Detachment Type:", + "End-terrace", + "Number of storeys:", + "2 Storeys", + "Terrain Type:", + "Suburban", + "Number of Extensions:", + "No Extensions", + "Is an electricity smart meter present?", + "Yes", + "Electric meter type:", + "Single", + "Page 2", + "", + "Photo of electricity meter:", + "Photo of electricity meter:", + "External indicators of Cavity Wall Construction:", + "Is the dwelling export-capable?", + "Yes", + "Is mains gas available?", + "No", + "Select Measurements Location:", + "Internal", + "Building Construction", + "Main Building", + "Age Range:", + "1950-1966", + "Record indicators of property age:", + "local knowledge, enquiries of owner, period building features", + "Walls - Construction Type:", + "Cavity", + "Record external indicators of Cavity Construction:", + "stretcher bond, wall thickness over 270 mm", + "Walls - Insulation Type:", + "Filled Cavity", + "Record indicators of filled cavity:", + "evidence of cavity fill drill holes, Boroscope", + "Page 3", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 4", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 5", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 6", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 7", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 8", + "", + "Photo wall thickness:", + "Photo wall thickness:", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "300 mm", + "Party wall construction type:", + "Unable to determine", + "Floor type:", + "Ground Floor", + "Floor Construction:", + "Solid", + "Floor Insulation Type:", + "As Built", + "Floor U-Value known?", + "Not Known", + "Building Measurements", + "Area (m2)", + "Height (m)", + "Heat Loss Perimeter (m)", + "PWL (m)", + "Main Building", + "Floor 1", + "43.29", + "2.45", + "19.74", + "6.58", + "Floor 0", + "43.29", + "2.46", + "19.74", + "6.58", + "Roof Space", + "Main Building", + "Roofs - Construction Type:", + "Pitched roof (Slates or tiles), Access to loft", + "Roofs - Insulation At:", + "Joists", + "Roof U-Value:", + "Not Known", + "Roofs - Insulation Thickness:", + "150 mm", + "Page 9", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 10", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 11", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 12", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 13", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 14", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 15", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Record indicators of Cavity Wall Construction in roof", + "space:", + "No indicator of construction visible", + "Page 16", + "", + "Record indicators of party wall construction in roof space:", + "Are there rooms in the roof?", + "No", + "Windows", + "Window 1", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Manufacturing date stamp", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.27 m", + "Window width:", + "1.55 m", + "Orientation:", + "North West", + "Page 17", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 2", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Manufacturing date stamp", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.04 m", + "Orientation:", + "North West", + "Page 18", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 3", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Manufacturing date stamp", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.53 m", + "Orientation:", + "North West", + "Window 4", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Page 19", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "What size is the glazing gap?", + "12 mm", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.98 m", + "Window width:", + "0.57 m", + "Orientation:", + "North West", + "Window 5", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.29 m", + "Window width:", + "0.57 m", + "Orientation:", + "South West", + "Page 20", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 6", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Manufacturing date stamp", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.98 m", + "Window width:", + "0.53 m", + "Orientation:", + "South West", + "Window 7", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.98 m", + "Window width:", + "1.09 m", + "Orientation:", + "South East", + "Page 21", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 8", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Manufacturing date stamp", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.96 m", + "Window width:", + "1.54 m", + "Orientation:", + "South East", + "Page 22", + "", + "Photo of glazing type:", + "Window 9", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Manufacturing date stamp", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.53 m", + "Orientation:", + "South East", + "Window 10", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "12 mm", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.95 m", + "Window width:", + "1.04 m", + "Orientation:", + "South East", + "Page 23", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 11", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "12 mm", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.53 m", + "Orientation:", + "South East", + "Page 24", + "", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Heating & Hot Water", + "Main Heating Systems", + "Main Heating 1", + "How would you like to select the Heating System?", + "Manual Entry", + "System type:", + "Room heaters", + "Fuel:", + "Dual fuel appliance (mineral and wood)", + "Heating System (Other):", + "Open fire with back boiler (No radiators)", + "Controls:", + "No thermostatic control of room temperature", + "Secondary Heating System", + "Secondary Fuel", + "Dual Fuel Appliance (Mineral and Wood)", + "Secondary System:", + "Open fire in grate", + "Page 25", + "", + "Photo of secondary heating system", + "Photo of water heating system:", + "Photo of water heating system:", + "Water Heating & Cylinder", + "Water Heating Type:", + "Regular", + "Water Heating System:", + "From main heating 1", + "Page 26", + "", + "Photo of water heating system:", + "Photo of water heating system:", + "Photo of water heating system:", + "Photo of water heating system:", + "Page 27", + "", + "Photo of water heating system:", + "Photo of cylinder and thermostat if present:", + "Photo of cylinder and thermostat if present:", + "Photo of ventilation type:", + "Cylinder Size:", + "Normal (90-130 litres)", + "What is the cylinder measured heat loss:", + "Not known", + "Insulation Type:", + "Factory fitted", + "Thickness:", + "38 mm", + "Has thermostat?", + "No", + "Ventilation", + "Ventilation type:", + "Mechanical Extract - Decentralised", + "Has fixed air conditioning?", + "No", + "Is the ventilation in the PCDF database?", + "No", + "Page 28", + "", + "Photo of boiler flues:", + "Photo of extract fans:", + "Number of open flues:", + "0", + "Number of closed flues:", + "0", + "Number of boiler flues:", + "1", + "Number of other flues:", + "0", + "Number of extract fans:", + "1", + "Number of passive vents:", + "0", + "Number of flueless gas fires:", + "0", + "Pressure test:", + "No test", + "Is there a draught lobby?", + "No", + "Conservatories", + "Is there conservatory?", + "No conservatory", + "Page 29", + "", + "Photo of open chimneys:", + "Renewables", + "Wind Turbines", + "Has wind turbines?", + "No", + "Solar hot water", + "Has solar hot water?", + "No", + "Photovoltaics", + "Has photovoltaic array?", + "No", + "Number of PV batteries:", + "None", + "Hydro", + "Is the dwelling connected to Hydro?", + "No", + "Room Count Elements", + "Number of habitable rooms?", + "5", + "Are any of these rooms unheated?", + "Yes", + "Please enter the number of HEATED rooms:", + "0", + "Number of external doors?", + "2", + "Number of insulated external doors?", + "0", + "Number of draughtproofed external doors?", + "2", + "Number of open chimneys?", + "1", + "Number of blocked chimneys?", + "1", + "Page 30", + "", + "Photo of blocked chimneys:", + "Photo of blocked chimneys:", + "Photo of blocked chimneys:", + "Photo of incandescent bulbs:", + "Number of fixed incandescent bulbs:", + "1", + "Is the exact number of LED and CFL bulbs known?", + "Yes", + "Number of fixed LED bulbs:", + "4", + "Page 31", + "", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of CFL bulbs:", + "Photo of CFL bulbs:", + "Number of fixed CFL bulbs:", + "4", + "Page 32", + "", + "Photo of CFL bulbs:", + "Photo of CFL bulbs:", + "Photo of shower:", + "Photo of shower:", + "Are there any waste water heat recovery systems?", + "None", + "Number of baths:", + "1", + "How many special features are there at the", + "property?", + "0", + "Showers", + "Shower 1", + "Shower outlet type:", + "Electric Shower", + "Page 33", + "", + "Photo of shower:", + "Customer Response", + "Customer present?", + "Yes", + "Customer willing to answer satisfaction survey?", + "No", + "Addendum + Related Party Disclosure", + "Addendum", + "None", + "Related party disclosure", + "No related party", + "Hard to treat cavity walls: Property has access", + "issues?", + "No", + "Hard to treat cavity walls: Property has high", + "exposure?", + "No", + "Hard to treat cavity walls: Property has narrow", + "cavities?", + "No", + "Photographs Required", + "Page 34", + "", + "General Photos:", + "General Photos:", + "General Photos:", + "General Photos:", + "External Elevations:", + "External Elevations:", + "Page 35", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 36", + "", + "External Elevations:", + "Page 37", + "", + "Page 38", + "", + "Page 39", + "", + "Page 40", + "", + "Additional Notes", + "Additional Notes", + "Over the years Tenant has refused any sort of heating system upgrade so the", + "property only has an open fire with back boiler in the living room. There are no", + "radiators in habitable rooms. The only radiator is in the bathroom. The hot", + "water cylinder is connected to the back boiler and there is also a backup", + "emersion.", + "Page 41", + "" +] \ No newline at end of file diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_3_text.json b/backend/documents_parser/tests/fixtures/site_notes_example_3_text.json new file mode 100644 index 00000000..750fabbc --- /dev/null +++ b/backend/documents_parser/tests/fixtures/site_notes_example_3_text.json @@ -0,0 +1,799 @@ +[ + "SMART EPC: Record of", + "Inspection & Site Notes", + "Inspection Surveyor:", + "Rebecca Mcwilliam", + "E-Mail Address:", + "rebeccamcdea@gmail.com", + "Report Reference:", + "Not Applicable", + "Created On:", + "14 October 2025", + "Date of Inspection:", + "14 October 2025", + "Property Address:", + "19,", + "Sandfield ,", + "Wrenbury,", + "CW5 8EU", + "Property Photo", + "Page 1", + "", + "Photo of electricity meter:", + "Photo of electricity meter:", + "Photo of electricity meter:", + "RdSAP Assessment", + "General", + "Confirm you have checked for the existence of an", + "EPC before carrying out another energy assessment.", + "Yes", + "Does an EPC exist at the point of carrying out this", + "energy assessment?", + "Yes", + "Please select why another energy assessment needs", + "to be undertaken:", + "Assessor instructed to produce a new EPC upon request from building", + "owner/tenant/landlord after confirming to the requestor that a valid EPC", + "already exists", + "Inspection Date:", + "14/10/2025", + "Transaction Type:", + "None of the Above", + "Tenure:", + "Rented Social", + "Type of Property:", + "House", + "Detachment Type:", + "End-terrace", + "Number of storeys:", + "2 Storeys", + "Terrain Type:", + "Suburban", + "Number of Extensions:", + "No Extensions", + "Is an electricity smart meter present?", + "Yes", + "Electric meter type:", + "Dual", + "Is the dwelling export-capable?", + "Yes", + "Is mains gas available?", + "No", + "Page 2", + "", + "External indicators of Cavity Wall Construction:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Select Measurements Location:", + "Internal", + "Building Construction", + "Main Building", + "Age Range:", + "1950-1966", + "Record indicators of property age:", + "local knowledge, enquiries of owner, period building features", + "Walls - Construction Type:", + "Cavity", + "Record external indicators of Cavity Construction:", + "stretcher bond", + "Walls - Insulation Type:", + "Filled Cavity", + "Record indicators of filled cavity:", + "evidence of cavity fill drill holes", + "Page 3", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 4", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 5", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 6", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 7", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 8", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 9", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo wall thickness:", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "300 mm", + "Party wall construction type:", + "Unable to determine", + "Floor type:", + "Ground Floor", + "Floor Construction:", + "Solid", + "Floor Insulation Type:", + "As Built", + "Floor U-Value known?", + "Not Known", + "Page 10", + "", + "Loft insulation:", + "Loft insulation:", + "Building Measurements", + "Area (m2)", + "Height (m)", + "Heat Loss Perimeter (m)", + "PWL (m)", + "Main Building", + "Floor 1", + "42.7", + "2.45", + "19.61", + "6.53", + "Floor 0", + "42.7", + "2.45", + "19.61", + "6.53", + "Roof Space", + "Main Building", + "Roofs - Construction Type:", + "Pitched roof (Slates or tiles), Access to loft", + "Roofs - Insulation At:", + "Joists", + "Roof U-Value:", + "Not Known", + "Roofs - Insulation Thickness:", + "200 mm", + "Page 11", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 12", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 13", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 14", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 15", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 16", + "", + "Loft insulation:", + "Record indicators of party wall construction in roof space:", + "Record indicators of Cavity Wall Construction in roof", + "space:", + "No indicator of construction visible", + "Are there rooms in the roof?", + "No", + "Windows", + "Window 1", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Page 17", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.26 m", + "Window width:", + "1.54 m", + "Orientation:", + "North East", + "Window 2", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.04 m", + "Orientation:", + "North East", + "Page 18", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 3", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.53 m", + "Orientation:", + "North East", + "Window 4", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Page 19", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.27 m", + "Window width:", + "0.58 m", + "Orientation:", + "North East", + "Window 5", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.98 m", + "Window width:", + "0.54 m", + "Orientation:", + "South East", + "Page 20", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 6", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.27 m", + "Window width:", + "0.58 m", + "Orientation:", + "South East", + "Window 7", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.98 m", + "Window width:", + "1.06 m", + "Page 21", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Orientation:", + "South West", + "Window 8", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.56 m", + "Orientation:", + "South East", + "Window 9", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Page 22", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.96 m", + "Window width:", + "1.53 m", + "Orientation:", + "South East", + "Window 10", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing installed between 2002 - 2021", + "Identify basis of window dating:", + "Thermal spacer bar", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.95 m", + "Window width:", + "1.04 m", + "Orientation:", + "South East", + "Page 23", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Page 24", + "", + "Heating & Hot Water", + "Main Heating Systems", + "Main Heating 1", + "How would you like to select the Heating System?", + "PCDF Search", + "System type:", + "Electric storage heaters", + "Number of storage heater types:", + "3", + "Product Id", + "230026", + "Manufacturer", + "Dimplex", + "Model", + "Quantum", + "Orig Manuf", + "Dimplex", + "S. Efficiency", + "0", + "Year", + "2019 - current", + "Open Flue", + "No", + "Status", + "Normal status for an actual product", + "Number of heaters of this type:", + "1", + "Product Id", + "230024", + "Manufacturer", + "Dimplex", + "Model", + "Quantum", + "Orig Manuf", + "Dimplex", + "S. Efficiency", + "0", + "Year", + "2019 - current", + "Open Flue", + "No", + "Status", + "Normal status for an actual product", + "Number of heaters of this type:", + "2", + "Product Id", + "230023", + "Manufacturer", + "Dimplex", + "Model", + "Quantum", + "Orig Manuf", + "Dimplex", + "S. Efficiency", + "0", + "Year", + "2019 - current", + "Open Flue", + "No", + "Status", + "Normal status for an actual product", + "Number of heaters of this type:", + "3", + "Controls:", + "Controls for high heat retention storage heaters", + "Page 25", + "", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Page 26", + "", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating controls:", + "Photo of heating controls:", + "Photo of heating controls:", + "Page 27", + "", + "Photo of secondary heating system", + "Photo of cylinder and thermostat if present:", + "Photo of cylinder and thermostat if present:", + "Secondary Heating System", + "Secondary Fuel", + "Electricity", + "Secondary System:", + "Panel, convector or radiant heaters", + "Water Heating & Cylinder", + "Water Heating Type:", + "Regular", + "Water Heating System:", + "Electric immersion", + "Immersion:", + "Dual", + "Cylinder Size:", + "Medium (131-170 litres)", + "What is the cylinder measured heat loss:", + "Not known", + "Insulation Type:", + "Factory fitted", + "Thickness:", + "50 mm", + "Page 28", + "", + "Photo of cylinder and thermostat if present:", + "Photo of cylinder and thermostat if present:", + "Photo of cylinder and thermostat if present:", + "Photo of ventilation type:", + "Has thermostat?", + "Yes", + "Ventilation", + "Ventilation type:", + "Mechanical Extract - Decentralised", + "Has fixed air conditioning?", + "No", + "Page 29", + "", + "Photo of extract fans:", + "Is the ventilation in the PCDF database?", + "No", + "Number of open flues:", + "0", + "Number of closed flues:", + "0", + "Number of boiler flues:", + "0", + "Number of other flues:", + "0", + "Number of extract fans:", + "1", + "Number of passive vents:", + "0", + "Number of flueless gas fires:", + "0", + "Pressure test:", + "No test", + "Is there a draught lobby?", + "No", + "Conservatories", + "Is there conservatory?", + "No conservatory", + "Page 30", + "", + "Photo of photovoltaic array:", + "Photo of photovoltaic array:", + "Photo of photovoltaic array:", + "Photo of photovoltaic array:", + "Renewables", + "Wind Turbines", + "Has wind turbines?", + "No", + "Solar hot water", + "Has solar hot water?", + "No", + "Photovoltaics", + "Has photovoltaic array?", + "Yes", + "Is there a PV diverter?", + "No", + "PV Connection:", + "Connected to dwellings electricity meter", + "Photovoltaic array kWp Known?", + "No", + "Percentage of roof covered with photovoltaic array?", + "45 %", + "Number of PV batteries:", + "None", + "Hydro", + "Is the dwelling connected to Hydro?", + "No", + "Room Count Elements", + "Number of habitable rooms?", + "5", + "Are any of these rooms unheated?", + "No", + "Page 31", + "", + "Photo of open chimneys:", + "Photo of incandescent bulbs:", + "Photo of incandescent bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Number of external doors?", + "2", + "Number of insulated external doors?", + "0", + "Number of draughtproofed external doors?", + "2", + "Number of open chimneys?", + "1", + "Number of blocked chimneys?", + "0", + "Number of fixed incandescent bulbs:", + "2", + "Is the exact number of LED and CFL bulbs known?", + "Yes", + "Number of fixed LED bulbs:", + "9", + "Page 32", + "", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Number of fixed CFL bulbs:", + "0", + "Are there any waste water heat recovery systems?", + "None", + "Number of baths:", + "1", + "How many special features are there at the", + "property?", + "0", + "Showers", + "Shower 1", + "Shower outlet type:", + "Electric Shower", + "Page 33", + "", + "Photo of shower:", + "Photo of shower:", + "Customer Response", + "Customer present?", + "Yes", + "Customer willing to answer satisfaction survey?", + "No", + "Addendum + Related Party Disclosure", + "Addendum", + "None", + "Related party disclosure", + "No related party", + "Hard to treat cavity walls: Property has access", + "issues?", + "No", + "Hard to treat cavity walls: Property has high", + "exposure?", + "No", + "Hard to treat cavity walls: Property has narrow", + "cavities?", + "No", + "Photographs Required", + "Page 34", + "", + "General Photos:", + "General Photos:", + "General Photos:", + "General Photos:", + "General Photos:", + "General Photos:", + "Page 35", + "", + "General Photos:", + "General Photos:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 36", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 37", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 38", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 39", + "", + "Page 40", + "", + "Page 41", + "", + "Additional Notes", + "Additional Notes", + "dMEV in kitchen, IEV in bathroom. Vent on chimney more than 300x100mm so", + "recorded as open chimney.", + "Page 42", + "" +] diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_4_text.json b/backend/documents_parser/tests/fixtures/site_notes_example_4_text.json new file mode 100644 index 00000000..d7561ded --- /dev/null +++ b/backend/documents_parser/tests/fixtures/site_notes_example_4_text.json @@ -0,0 +1,480 @@ +[ + "SMART EPC: Record of", + "Inspection & Site Notes", + "Inspection Surveyor:", + "Rebecca Mcwilliam", + "E-Mail Address:", + "rebeccamcdea@gmail.com", + "Report Reference:", + "DA1A93D9-354C-4B4B-A299-3A681231D4B5", + "Created On:", + "15 October 2025", + "Date of Inspection:", + "13 October 2025", + "Property Address:", + "18,", + "Oakfield Close,", + "Wrenbury,", + "CW5 8ET", + "Property Photo", + "Page 1", + "", + "Photo of electricity meter:", + "Photo of electricity meter:", + "RdSAP Assessment", + "General", + "Confirm you have checked for the existence of an", + "EPC before carrying out another energy assessment.", + "Yes", + "Does an EPC exist at the point of carrying out this", + "energy assessment?", + "Yes", + "Please select why another energy assessment needs", + "to be undertaken:", + "Assessor instructed to produce a new EPC upon request from building", + "owner/tenant/landlord after confirming to the requestor that a valid EPC", + "already exists", + "Inspection Date:", + "13/10/2025", + "Transaction Type:", + "None of the Above", + "Tenure:", + "Rented Social", + "Type of Property:", + "Bungalow", + "Detachment Type:", + "End-terrace", + "Number of storeys:", + "1 Storey", + "Terrain Type:", + "Suburban", + "Number of Extensions:", + "No Extensions", + "Is an electricity smart meter present?", + "Yes", + "Electric meter type:", + "Single", + "Page 2", + "", + "Photo of electricity meter:", + "Photo of electricity meter:", + "External indicators of Cavity Wall Construction:", + "External indicators of Cavity Wall Construction:", + "Is the dwelling export-capable?", + "Yes", + "Is mains gas available?", + "No", + "Select Measurements Location:", + "Internal", + "Building Construction", + "Main Building", + "Age Range:", + "1950-1966", + "Record indicators of property age:", + "local knowledge, enquiries of owner, period building features", + "Walls - Construction Type:", + "Cavity", + "Record external indicators of Cavity Construction:", + "stretcher bond, wall thickness over 270 mm", + "Walls - Insulation Type:", + "Filled Cavity", + "Record indicators of filled cavity:", + "evidence of cavity fill drill holes, Boroscope", + "Page 3", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 4", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo wall thickness:", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "300 mm", + "Party wall construction type:", + "Unable to determine", + "Floor type:", + "Ground Floor", + "Floor Construction:", + "Solid", + "Floor Insulation Type:", + "As Built", + "Floor U-Value known?", + "Not Known", + "Page 5", + "", + "Loft insulation:", + "Loft insulation:", + "Building Measurements", + "Area (m2)", + "Height (m)", + "Heat Loss Perimeter (m)", + "PWL (m)", + "Main Building", + "Floor 0", + "46.08", + "2.44", + "20.49", + "6.67", + "Roof Space", + "Main Building", + "Roofs - Construction Type:", + "Pitched roof (Slates or tiles), No access", + "Identify the reason for restricted access:", + "access hatch blocked", + "Roofs - Insulation At:", + "Unknown", + "Page 6", + "", + "Loft insulation:", + "Record indicators of Cavity Wall Construction in roof", + "space:", + "No indicator of construction visible", + "Are there rooms in the roof?", + "No", + "Windows", + "Window 1", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "12 mm", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.97 m", + "Window width:", + "1.54 m", + "Orientation:", + "North East", + "Page 7", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 2", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "12 mm", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.01 m", + "Window width:", + "1.04 m", + "Orientation:", + "North East", + "Window 3", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Page 8", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "What size is the glazing gap?", + "12 mm", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.99 m", + "Window width:", + "1.07 m", + "Orientation:", + "South West", + "Window 4", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "12 mm", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.28 m", + "Window width:", + "2.07 m", + "Orientation:", + "South", + "Page 9", + "", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Heating & Hot Water", + "Main Heating Systems", + "Main Heating 1", + "How would you like to select the Heating System?", + "PCDF Search", + "System type:", + "Heat pump with radiators or underfloor heating", + "Product Id", + "102421", + "Manufacturer", + "Daikin Altherma", + "Model", + "EDLQ05CAV3", + "Year", + "2014 - current", + "Fuel", + "Electricity, any tariff", + "Status", + "Normal status for an actual product", + "Central heating pump age:", + "Unknown", + "MCS installed heat pump:", + "Yes", + "Controls:", + "Programmer, room thermostat and TRVs", + "Emitter:", + "Radiators", + "Emitter Temperature:", + "Unknown", + "Page 10", + "", + "Photo of heating controls:", + "Photo of heating controls:", + "Photo of heating controls:", + "Photo of heating controls:", + "Photo of heating controls:", + "Photo of heating controls:", + "Page 11", + "", + "Photo of heating controls:", + "Photo of secondary heating system", + "Secondary Heating System", + "Secondary Fuel", + "Electricity", + "Secondary System:", + "Panel, convector or radiant heaters", + "Water Heating & Cylinder", + "Water Heating Type:", + "Regular", + "Water Heating System:", + "From main heating 1", + "Page 12", + "", + "Photo of water heating system:", + "Photo of water heating system:", + "Photo of water heating system:", + "Cylinder Size:", + "Medium (131-170 litres)", + "What is the cylinder measured heat loss:", + "Not known", + "Insulation Type:", + "Factory fitted", + "Thickness:", + "50 mm", + "Page 13", + "", + "Photo of cylinder and thermostat if present:", + "Photo of cylinder and thermostat if present:", + "Photo of ventilation type:", + "Photo of ventilation type:", + "Has thermostat?", + "Yes", + "Ventilation", + "Ventilation type:", + "Mechanical Extract - Decentralised", + "Has fixed air conditioning?", + "No", + "Is the ventilation in the PCDF database?", + "No", + "Number of open flues:", + "0", + "Number of closed flues:", + "0", + "Number of boiler flues:", + "0", + "Page 14", + "", + "Photo of extract fans:", + "Number of other flues:", + "0", + "Number of extract fans:", + "1", + "Number of passive vents:", + "0", + "Number of flueless gas fires:", + "0", + "Pressure test:", + "No test", + "Is there a draught lobby?", + "No", + "Conservatories", + "Is there conservatory?", + "No conservatory", + "Renewables", + "Wind Turbines", + "Has wind turbines?", + "No", + "Solar hot water", + "Has solar hot water?", + "No", + "Photovoltaics", + "Has photovoltaic array?", + "No", + "Number of PV batteries:", + "None", + "Hydro", + "Is the dwelling connected to Hydro?", + "No", + "Room Count Elements", + "Number of habitable rooms?", + "2", + "Are any of these rooms unheated?", + "No", + "Page 15", + "", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Photo of LED bulbs:", + "Number of external doors?", + "2", + "Number of insulated external doors?", + "0", + "Number of draughtproofed external doors?", + "2", + "Number of open chimneys?", + "0", + "Number of blocked chimneys?", + "0", + "Number of fixed incandescent bulbs:", + "0", + "Is the exact number of LED and CFL bulbs known?", + "Yes", + "Number of fixed LED bulbs:", + "7", + "Page 16", + "", + "Photo of LED bulbs:", + "Photo of shower:", + "Photo of shower:", + "Number of fixed CFL bulbs:", + "0", + "Are there any waste water heat recovery systems?", + "None", + "Number of baths:", + "1", + "How many special features are there at the", + "property?", + "0", + "Showers", + "Shower 1", + "Shower outlet type:", + "Electric Shower", + "Customer Response", + "Customer present?", + "Yes", + "Customer willing to answer satisfaction survey?", + "No", + "Addendum + Related Party Disclosure", + "Addendum", + "None", + "Page 17", + "", + "General Photos:", + "General Photos:", + "External Elevations:", + "External Elevations:", + "Related party disclosure", + "No related party", + "Hard to treat cavity walls: Property has access", + "issues?", + "No", + "Hard to treat cavity walls: Property has high", + "exposure?", + "No", + "Hard to treat cavity walls: Property has narrow", + "cavities?", + "No", + "Photographs Required", + "Page 18", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 19", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 20", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 21", + "", + "External Elevations:", + "External Elevations:", + "Page 22", + "", + "Additional Notes", + "Additional Notes", + "The loft hatch is a drop-down hatch with a ladder above. The hatch is broken", + "and it could not be opened. She is about to have a new door made so loft will", + "be able to be accessed at some point soon but on the day I could not.", + "Page 23", + "" +] \ No newline at end of file diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_5_text.json b/backend/documents_parser/tests/fixtures/site_notes_example_5_text.json new file mode 100644 index 00000000..74b09972 --- /dev/null +++ b/backend/documents_parser/tests/fixtures/site_notes_example_5_text.json @@ -0,0 +1 @@ +["SMART EPC: Record of", "Inspection & Site Notes", "Inspection Surveyor:", "Kevin Rossiter", "E-Mail Address:", "domesticretrofitsolutions@yahoo.com", "Report Reference:", "B94B3A7D-5D32-4143-865C-55142AD997EC", "Created On:", "7 November 2025", "Date of Inspection:", "02 October 2025", "Property Address:", "20,", "Acton Road,", "Crewe,", "Cheshire,", "CW2 8TN", "Property Photo", "Page 1", "", "Photo of electricity meter:", "RdSAP Assessment", "General", "Confirm you have checked for the existence of an", "EPC before carrying out another energy assessment.", "Yes", "Does an EPC exist at the point of carrying out this", "energy assessment?", "Yes", "Please select why another energy assessment needs", "to be undertaken:", "Assessor instructed to produce a new EPC upon request from building", "owner/tenant/landlord after confirming to the requestor that a valid EPC", "already exists", "Inspection Date:", "02/10/2025", "Transaction Type:", "Grant-Scheme (ECO, RHI, etc.)", "Tenure:", "Rented Social", "Type of Property:", "House", "Detachment Type:", "Semi-Detached", "Number of storeys:", "2 Storeys", "Terrain Type:", "Suburban", "Number of Extensions:", "No Extensions", "Is an electricity smart meter present?", "Yes", "Electric meter type:", "Single", "Is the dwelling export-capable?", "Yes", "Is mains gas available?", "Yes", "Is there a gas smart meter?", "Yes", "Is the gas meter accessible?", "Yes", "Page 2", "", "Photo of Gas Meter:", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Select Measurements Location:", "Internal", "Building Construction", "Main Building", "Age Range:", "1950-1966", "Record indicators of property age:", "local knowledge, period building features", "Walls - Construction Type:", "Cavity", "Record external indicators of Cavity Construction:", "stretcher bond", "Walls - Insulation Type:", "Filled Cavity", "Record indicators of filled cavity:", "evidence of cavity fill drill holes, Scope images", "Page 3", "", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Photo wall thickness:", "Thermal conductivity of wall insulation:", "Unknown", "Wall U-Value known?", "Not Known", "Wall thickness:", "300 mm", "Party wall construction type:", "Unable to determine", "Floor type:", "Ground Floor", "Floor Construction:", "Solid", "Floor Insulation Type:", "As Built", "Floor U-Value known?", "Not Known", "Page 4", "", "Loft insulation:", "Mostly boarded", "Loft insulation:", "Mostly boarded", "Building Measurements", "Area (m2)", "Height (m)", "Heat Loss Perimeter (m)", "PWL (m)", "Main Building", "Floor 1", "41.28", "2.51", "18.62", "7.28", "Floor 0", "41.88", "2.51", "19.8", "7.28", "Roof Space", "Main Building", "Roofs - Construction Type:", "Pitched roof (Slates or tiles), Access to loft", "Roofs - Insulation At:", "Unknown", "Page 5", "", "Loft insulation:", "Mostly boarded", "Loft insulation:", "Mostly boarded", "Loft insulation:", "Mostly boarded", "Loft insulation:", "Mostly boarded", "Page 6", "", "Loft insulation:", "Unknown", "Record indicators of Cavity Wall Construction in roof", "space:", "No indicator of construction visible", "Are there rooms in the roof?", "No", "Windows", "Window 1", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "1.27 m", "Window width:", "2.06 m", "Orientation:", "North West", "Window 2", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Page 7", "", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.89 m", "Window width:", "1.45 m", "Orientation:", "North West", "Window 3", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.89 m", "Window width:", "0.95 m", "Orientation:", "North West", "Window 4", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.89 m", "Window width:", "0.95 m", "Orientation:", "South East", "Window 5", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Page 8", "", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.89 m", "Window width:", "1.45 m", "Orientation:", "South East", "Window 6", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.89 m", "Window width:", "0.95 m", "Orientation:", "South East", "Window 7", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "1.17 m", "Window width:", "0.46 m", "Orientation:", "South East", "Window 8", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Page 9", "", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "1.15 m", "Window width:", "0.93 m", "Orientation:", "North East", "Page 10", "", "Photo of heating system:", "Heating & Hot Water", "Main Heating Systems", "Main Heating 1", "How would you like to select the Heating System?", "PCDF Search", "System type:", "Boiler with radiators or underfloor heating", "Product Id", "18119", "Manufacturer", "Vaillant", "Model", "ecoTEC sustain 28", "Orig Manuf", "Vaillant", "Fuel", "Mains gas", "S. Efficiency", "0", "Type", "Combi", "Condensing", "Yes", "Year", "2017 - current", "Mount", "Wall", "Open Flue", "Room-sealed", "Fan Assist", "Yes", "Status", "Normal status for an actual product", "Central heating pump age:", "2013 or later", "Controls:", "Programmer, room thermostat and TRVs", "Does the boiler have a Flue Gas Heat Recover", "System (FGHRS)?", "No", "Is there a weather compensator?", "No", "Emitter:", "Radiators", "Emitter Temperature:", "Unknown", "Page 11", "", "Photo of heating system:", "Photo of heating controls:", "Photo of heating controls:", "Photo of secondary heating system", "Secondary Heating System", "Secondary Fuel", "Electricity", "Secondary System:", "Panel, convector or radiant heaters", "Page 12", "", "Water Heating & Cylinder", "Water Heating Type:", "Regular", "Water Heating System:", "From main heating 1", "Cylinder Size:", "No Cylinder", "Ventilation", "Ventilation type:", "Mechanical Extract - Decentralised", "Has fixed air conditioning?", "No", "Is the ventilation in the PCDF database?", "No", "Number of open flues:", "0", "Number of closed flues:", "0", "Number of boiler flues:", "0", "Number of other flues:", "0", "Number of extract fans:", "0", "Number of passive vents:", "0", "Number of flueless gas fires:", "0", "Pressure test:", "No test", "Is there a draught lobby?", "No", "Conservatories", "Is there conservatory?", "No conservatory", "Renewables", "Wind Turbines", "Has wind turbines?", "No", "Solar hot water", "Has solar hot water?", "No", "Photovoltaics", "Has photovoltaic array?", "No", "Number of PV batteries:", "None", "Hydro", "Is the dwelling connected to Hydro?", "No", "Room Count Elements", "Number of habitable rooms?", "4", "Are any of these rooms unheated?", "No", "Number of external doors?", "2", "Number of insulated external doors?", "0", "Number of draughtproofed external doors?", "2", "Page 13", "", "Photo of incandescent bulbs:", "Photo of LED bulbs:", "Photo of LED bulbs:", "Photo of LED bulbs:", "Photo of LED bulbs:", "Number of open chimneys?", "0", "Number of blocked chimneys?", "0", "Number of fixed incandescent bulbs:", "1", "Is the exact number of LED and CFL bulbs known?", "Yes", "Number of fixed LED bulbs:", "7", "Page 14", "", "Photo of LED bulbs:", "Photo of CFL bulbs:", "Photo of CFL bulbs:", "Photo of shower:", "Number of fixed CFL bulbs:", "2", "Are there any waste water heat recovery systems?", "None", "Number of baths:", "1", "How many special features are there at the", "property?", "0", "Showers", "Shower 1", "Shower outlet type:", "Electric Shower", "Page 15", "", "General Photos:", "12mm dg unknown", "External Elevations:", "External Elevations:", "Customer Response", "Customer present?", "Yes", "Customer willing to answer satisfaction survey?", "No", "Addendum + Related Party Disclosure", "Addendum", "None", "Related party disclosure", "No related party", "Hard to treat cavity walls: Property has access", "issues?", "No", "Hard to treat cavity walls: Property has high", "exposure?", "No", "Hard to treat cavity walls: Property has narrow", "cavities?", "No", "Photographs Required", "Page 16", "", "External Elevations:", "Floor plan", "Page 17", ""] \ No newline at end of file diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_6_text.json b/backend/documents_parser/tests/fixtures/site_notes_example_6_text.json new file mode 100644 index 00000000..ef7258de --- /dev/null +++ b/backend/documents_parser/tests/fixtures/site_notes_example_6_text.json @@ -0,0 +1 @@ +["SMART EPC: Record of", "Inspection & Site Notes", "Inspection Surveyor:", "Robert Rhodes", "E-Mail Address:", "rods30@live.co.uk", "Report Reference:", "2B688EF1-FE12-4F7E-860A-E63ABA4FEA47", "Created On:", "8 November 2025", "Date of Inspection:", "01 October 2025", "Property Address:", "34,", "Acton Road,", "Crewe,", "Cheshire,", "CW2 8TN", "Property Photo", "Page 1", "", "Photo of electricity meter:", "Photo of Gas Meter:", "RdSAP Assessment", "General", "Confirm you have checked for the existence of an", "EPC before carrying out another energy assessment.", "Yes", "Does an EPC exist at the point of carrying out this", "energy assessment?", "No", "Inspection Date:", "01/10/2025", "Transaction Type:", "Grant-Scheme (ECO, RHI, etc.)", "Tenure:", "Rented Social", "Type of Property:", "House", "Detachment Type:", "Semi-Detached", "Number of storeys:", "2 Storeys", "Terrain Type:", "Suburban", "Number of Extensions:", "No Extensions", "Is an electricity smart meter present?", "Yes", "Electric meter type:", "Single", "Is the dwelling export-capable?", "Yes", "Is mains gas available?", "Yes", "Is there a gas smart meter?", "Yes", "Is the gas meter accessible?", "Yes", "Page 2", "", "External indicators of Cavity Wall Construction:", "External indicators of Cavity Wall Construction:", "External indicators of Cavity Wall Construction:", "External indicators of Cavity Wall Construction:", "Select Measurements Location:", "Internal", "Building Construction", "Main Building", "Age Range:", "1950-1966", "Record indicators of property age:", "local knowledge, enquiries of owner", "Walls - Construction Type:", "Cavity", "Record external indicators of Cavity Construction:", "stretcher bond, wall thickness over 270 mm, evidence of cavity fill drill holes,", "Drilled and boroscoped", "Page 3", "", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Walls - Insulation Type:", "Filled Cavity", "Record indicators of filled cavity:", "evidence of cavity fill drill holes, Drilled and boroscoped wall", "Page 4", "", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Photo indicators of filled cavity insulation:", "Page 5", "", "Photo indicators of filled cavity insulation:", "Photo wall thickness:", "Thermal conductivity of wall insulation:", "Unknown", "Wall U-Value known?", "Not Known", "Wall thickness:", "280 mm", "Party wall construction type:", "Solid Masonry, Timber Frame, or System Built", "Floor type:", "Ground Floor", "Floor Construction:", "Solid", "Floor Insulation Type:", "As Built", "Floor U-Value known?", "Not Known", "Building Measurements", "Area (m2)", "Height (m)", "Heat Loss Perimeter (m)", "PWL (m)", "Main Building", "Floor 1", "41.33", "2.25", "18.63", "7.29", "Floor 0", "41.33", "2.46", "18.63", "7.29", "Page 6", "", "Loft insulation:", "Record indicators of party wall construction in roof space:", "Roof Space", "Main Building", "Roofs - Construction Type:", "Pitched roof (Slates or tiles), Access to loft", "Roofs - Insulation At:", "Joists", "Roof U-Value:", "Not Known", "Roofs - Insulation Thickness:", "300 mm", "Record indicators of Cavity Wall Construction in roof", "space:", "No indicator of construction visible", "Are there rooms in the roof?", "No", "Page 7", "", "Photo of glazing type:", "All Windows the same", "Windows", "Window 1", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.88 m", "Window width:", "0.95 m", "Orientation:", "East", "Window 2", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "1.17 m", "Window width:", "0.82 m", "Orientation:", "South", "Window 3", "Window location:", "Main Building", "Page 8", "", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "1.24 m", "Window width:", "2.06 m", "Orientation:", "West", "Window 4", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.88 m", "Window width:", "1.45 m", "Orientation:", "East", "Window 5", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.88 m", "Window width:", "0.95 m", "Orientation:", "East", "Window 6", "Window location:", "Main Building", "Page 9", "", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "1.18 m", "Window width:", "0.48 m", "Orientation:", "South", "Window 7", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.88 m", "Window width:", "0.95 m", "Orientation:", "West", "Window 8", "Window location:", "Main Building", "Window wall type:", "External wall", "Glazing Type:", "Double glazing, Unknown install date", "Window type:", "Window", "Window frame type:", "Wooden or PVC", "What size is the glazing gap?", "12 mm", "Is the window draught proofed?", "Yes", "Are there permanent shutters present?", "No", "Window height:", "0.88 m", "Window width:", "1.45 m", "Orientation:", "West", "Page 10", "", "Heating & Hot Water", "Main Heating Systems", "Main Heating 1", "How would you like to select the Heating System?", "PCDF Search", "System type:", "Boiler with radiators or underfloor heating", "Product Id", "18119", "Manufacturer", "Vaillant", "Model", "ecoTEC sustain 28", "Orig Manuf", "Vaillant", "Fuel", "Mains gas", "S. Efficiency", "0", "Type", "Combi", "Condensing", "Yes", "Year", "2017 - current", "Mount", "Wall", "Open Flue", "Room-sealed", "Fan Assist", "Yes", "Status", "Normal status for an actual product", "Central heating pump age:", "Unknown", "Controls:", "Programmer, room thermostat and TRVs", "Does the boiler have a Flue Gas Heat Recover", "System (FGHRS)?", "No", "Is there a weather compensator?", "No", "Emitter:", "Radiators", "Emitter Temperature:", "Unknown", "Page 11", "", "Photo of heating system:", "Photo of heating controls:", "Photo of heating controls:", "Photo of heating controls:", "Photo of heating controls:", "Page 12", "", "Photo of heating controls:", "Photo of heating controls:", "Photo of heating controls:", "Photo of secondary heating system", "Secondary Heating System", "Secondary Fuel", "Electricity", "Secondary System:", "Panel, convector or radiant heaters", "Water Heating & Cylinder", "Water Heating Type:", "Regular", "Water Heating System:", "From main heating 1", "Cylinder Size:", "No Cylinder", "Page 13", "", "Ventilation", "Ventilation type:", "Mechanical Extract - Decentralised", "Has fixed air conditioning?", "No", "Is the ventilation in the PCDF database?", "No", "Number of open flues:", "0", "Number of closed flues:", "0", "Number of boiler flues:", "0", "Number of other flues:", "0", "Number of extract fans:", "0", "Number of passive vents:", "0", "Number of flueless gas fires:", "0", "Pressure test:", "No test", "Is there a draught lobby?", "No", "Conservatories", "Is there conservatory?", "No conservatory", "Renewables", "Wind Turbines", "Has wind turbines?", "No", "Solar hot water", "Has solar hot water?", "No", "Photovoltaics", "Has photovoltaic array?", "No", "Number of PV batteries:", "None", "Hydro", "Is the dwelling connected to Hydro?", "No", "Room Count Elements", "Number of habitable rooms?", "4", "Are any of these rooms unheated?", "No", "Number of external doors?", "2", "Number of insulated external doors?", "0", "Number of draughtproofed external doors?", "2", "Number of open chimneys?", "0", "Number of blocked chimneys?", "0", "Number of fixed incandescent bulbs:", "0", "Is the exact number of LED and CFL bulbs known?", "Yes", "Page 14", "", "Photo of LED bulbs:", "Photo of LED bulbs:", "Photo of LED bulbs:", "Photo of LED bulbs:", "Photo of LED bulbs:", "Number of fixed LED bulbs:", "5", "Number of fixed CFL bulbs:", "2", "Page 15", "", "Photo of CFL bulbs:", "Photo of CFL bulbs:", "Photo of shower:", "Are there any waste water heat recovery systems?", "None", "Number of baths:", "1", "How many special features are there at the", "property?", "0", "Showers", "Shower 1", "Shower outlet type:", "Electric Shower", "Customer Response", "Customer present?", "Yes", "Customer willing to answer satisfaction survey?", "No", "Addendum + Related Party Disclosure", "Addendum", "PV Recommended", "Related party disclosure", "No related party", "Hard to treat cavity walls: Property has access", "issues?", "No", "Hard to treat cavity walls: Property has high", "exposure?", "No", "Hard to treat cavity walls: Property has narrow", "cavities?", "No", "Page 16", "", "General Photos:", "General Photos:", "General Photos:", "Photographs Required", "Page 17", "", "External Elevations:", "Front elavation", "External Elevations:", "Front elavation", "External Elevations:", "Front elavation", "External Elevations:", "Right Side elavation", "Page 18", "", "External Elevations:", "Right Side elavation", "External Elevations:", "Rear elavation", "External Elevations:", "Rear elavation", "External Elevations:", "Rear elavation", "Page 19", "", "Floor plan", "Page 20", ""] \ No newline at end of file diff --git a/backend/documents_parser/tests/fixtures/site_notes_example_text.json b/backend/documents_parser/tests/fixtures/site_notes_example_text.json new file mode 100644 index 00000000..b18ac082 --- /dev/null +++ b/backend/documents_parser/tests/fixtures/site_notes_example_text.json @@ -0,0 +1,643 @@ +[ + "SMART EPC: Record of", + "Inspection & Site Notes", + "Inspection Surveyor:", + "Benjamin Burke", + "E-Mail Address:", + "ben@mbsolutionsgroup.co.uk", + "Report Reference:", + "6EA2A86D-94CE-4792-8D49-AB495C744EDD", + "Created On:", + "10 November 2025", + "Date of Inspection:", + "25 September 2025", + "Property Address:", + "40,", + "Abbey Place,", + "Crewe,", + "Cheshire,", + "CW1 4JR", + "Property Photo", + "Page 1", + "", + "Photo of electricity meter:", + "Photo of electricity meter:", + "Photo of electricity meter:", + "Photo of electricity meter:", + "RdSAP Assessment", + "General", + "Confirm you have checked for the existence of an", + "EPC before carrying out another energy assessment.", + "Yes", + "Does an EPC exist at the point of carrying out this", + "energy assessment?", + "No", + "Inspection Date:", + "25/09/2025", + "Transaction Type:", + "Grant-Scheme (ECO, RHI, etc.)", + "Tenure:", + "Rented Social", + "Type of Property:", + "House", + "Detachment Type:", + "Mid-terrace", + "Number of storeys:", + "2 Storeys", + "Terrain Type:", + "Suburban", + "Number of Extensions:", + "1 Extension", + "Is an electricity smart meter present?", + "Yes", + "Electric meter type:", + "Single", + "Is the dwelling export-capable?", + "Yes", + "Is mains gas available?", + "Yes", + "Is there a gas smart meter?", + "Yes", + "Is the gas meter accessible?", + "Yes", + "Page 2", + "", + "Photo of Gas Meter:", + "External indicators of Cavity Wall Construction:", + "External indicators of Cavity Wall Construction:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Select Measurements Location:", + "Internal", + "Building Construction", + "Main Building", + "Age Range:", + "1950-1966", + "Record indicators of property age:", + "local knowledge, enquiries of owner", + "Walls - Construction Type:", + "Cavity", + "Record external indicators of Cavity Construction:", + "wall thickness over 270 mm", + "Walls - Insulation Type:", + "Filled Cavity", + "Record indicators of filled cavity:", + "evidence of cavity fill drill holes", + "Page 3", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Page 4", + "", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Photo indicators of filled cavity insulation:", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "310 mm", + "Page 5", + "", + "Photo wall thickness:", + "External indicators of Cavity Wall Construction:", + "Party wall construction type:", + "Cavity Masonry, Filled", + "Floor type:", + "Ground Floor", + "Floor Construction:", + "Solid", + "Floor Insulation Type:", + "As Built", + "Floor U-Value known?", + "Not Known", + "Extension 1", + "Age Range:", + "2003-2006", + "Record indicators of property age:", + "local knowledge, enquiries of owner", + "Walls - Construction Type:", + "Cavity", + "Record external indicators of Cavity Construction:", + "wall thickness over 270 mm", + "Walls - Insulation Type:", + "As built", + "Thermal conductivity of wall insulation:", + "Unknown", + "Wall U-Value known?", + "Not Known", + "Wall thickness:", + "310 mm", + "Page 6", + "", + "Photo wall thickness:", + "Party wall construction type:", + "Cavity Masonry, Filled", + "Floor type:", + "Ground Floor", + "Floor Construction:", + "Solid", + "Floor Insulation Type:", + "As Built", + "Floor U-Value known?", + "Not Known", + "Building Measurements", + "Area (m2)", + "Height (m)", + "Heat Loss Perimeter (m)", + "PWL (m)", + "Main Building", + "Floor 1", + "35.68", + "2.19", + "13.44", + "10.62", + "Floor 0", + "35.68", + "2.17", + "11", + "10.62", + "Extension 1", + "Floor 0", + "3.8", + "2", + "5.7", + "0", + "Roof Space", + "Main Building", + "Roofs - Construction Type:", + "Pitched roof (Slates or tiles), Access to loft", + "Roofs - Insulation At:", + "Joists", + "Roof U-Value:", + "Not Known", + "Roofs - Insulation Thickness:", + "100 mm", + "Page 7", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 8", + "", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Loft insulation:", + "Page 9", + "", + "Loft insulation:", + "Indicators of Cavity Wall Construction in roof space:", + "Indicators of Cavity Wall Construction in roof space:", + "Record indicators of party wall construction in roof space:", + "Record indicators of party wall construction in roof space:", + "Record indicators of Cavity Wall Construction in roof", + "space:", + "cavity visible in roof space", + "Are there rooms in the roof?", + "No", + "Extension 1", + "Roofs - Construction Type:", + "Pitched roof, Sloping ceiling", + "Page 10", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Roofs - Insulation At:", + "Sloping ceiling insulation", + "Roof U-Value:", + "Not Known", + "Roofs - Insulation Thickness:", + "As built", + "Record indicators of Cavity Wall Construction in roof", + "space:", + "No indicator of construction visible", + "Are there rooms in the roof?", + "No", + "Windows", + "Window 1", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1.2 m", + "Window width:", + "2.3 m", + "Orientation:", + "North West", + "Window 2", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Page 11", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window height:", + "1.2 m", + "Window width:", + "1 m", + "Orientation:", + "North West", + "Window 3", + "Window location:", + "Main Building", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.9 m", + "Window width:", + "1 m", + "Orientation:", + "North East", + "Window 4", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Page 12", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.9 m", + "Window width:", + "1 m", + "Orientation:", + "North", + "Window 5", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.9 m", + "Window width:", + "1.7 m", + "Orientation:", + "North East", + "Page 13", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Window 6", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.9 m", + "Window width:", + "2.3 m", + "Orientation:", + "North West", + "Window 7", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "Page 14", + "", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "Photo of glazing type:", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "1 m", + "Window width:", + "1.2 m", + "Orientation:", + "North West", + "Window 8", + "Window location:", + "Extension 1", + "Window wall type:", + "External wall", + "Glazing Type:", + "Double glazing, Unknown install date", + "Window type:", + "Window", + "Window frame type:", + "Wooden or PVC", + "What size is the glazing gap?", + "16 mm or more", + "Is the window draught proofed?", + "Yes", + "Are there permanent shutters present?", + "No", + "Window height:", + "0.9 m", + "Window width:", + "1 m", + "Orientation:", + "North East", + "Page 15", + "", + "Photo of heating system:", + "Heating & Hot Water", + "Main Heating Systems", + "Main Heating 1", + "How would you like to select the Heating System?", + "PCDF Search", + "System type:", + "Boiler with radiators or underfloor heating", + "Product Id", + "16839", + "Manufacturer", + "Vaillant", + "Model", + "ecoTEC pro 28", + "Orig Manuf", + "Vaillant", + "Fuel", + "Mains gas", + "S. Efficiency", + "0", + "Type", + "Combi", + "Condensing", + "Yes", + "Year", + "2005 - 2015", + "Mount", + "Wall", + "Open Flue", + "Room-sealed", + "Fan Assist", + "Yes", + "Status", + "Normal status for an actual product", + "Central heating pump age:", + "Unknown", + "Controls:", + "Programmer, room thermostat and TRVs", + "Does the boiler have a Flue Gas Heat Recover", + "System (FGHRS)?", + "No", + "Is there a weather compensator?", + "No", + "Emitter:", + "Radiators", + "Emitter Temperature:", + "Unknown", + "Page 16", + "", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Page 17", + "", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating system:", + "Photo of heating controls:", + "Photo of heating controls:", + "Secondary Heating System", + "Secondary Fuel", + "No Secondary Heating", + "Water Heating & Cylinder", + "Water Heating Type:", + "Regular", + "Water Heating System:", + "From main heating 1", + "Cylinder Size:", + "No Cylinder", + "Ventilation", + "Ventilation type:", + "Mechanical Extract - Decentralised", + "Page 18", + "", + "Photo of ventilation type:", + "Has fixed air conditioning?", + "No", + "Is the ventilation in the PCDF database?", + "No", + "Number of open flues:", + "0", + "Number of closed flues:", + "0", + "Number of boiler flues:", + "0", + "Number of other flues:", + "0", + "Number of extract fans:", + "0", + "Number of passive vents:", + "0", + "Number of flueless gas fires:", + "0", + "Pressure test:", + "No test", + "Is there a draught lobby?", + "No", + "Conservatories", + "Is there conservatory?", + "No conservatory", + "Page 19", + "", + "Photo of incandescent bulbs:", + "Photo of incandescent bulbs:", + "Renewables", + "Wind Turbines", + "Has wind turbines?", + "No", + "Solar hot water", + "Has solar hot water?", + "No", + "Photovoltaics", + "Has photovoltaic array?", + "No", + "Number of PV batteries:", + "None", + "Hydro", + "Is the dwelling connected to Hydro?", + "No", + "Room Count Elements", + "Number of habitable rooms?", + "3", + "Are any of these rooms unheated?", + "No", + "Number of external doors?", + "2", + "Number of insulated external doors?", + "0", + "Number of draughtproofed external doors?", + "2", + "Number of open chimneys?", + "0", + "Number of blocked chimneys?", + "0", + "Number of fixed incandescent bulbs:", + "4", + "Page 20", + "", + "Photo of incandescent bulbs:", + "Photo of incandescent bulbs:", + "Photo of CFL bulbs:", + "Is the exact number of LED and CFL bulbs known?", + "Yes", + "Number of fixed LED bulbs:", + "0", + "Number of fixed CFL bulbs:", + "1", + "Are there any waste water heat recovery systems?", + "None", + "Number of baths:", + "1", + "How many special features are there at the", + "property?", + "0", + "Showers", + "Shower 1", + "Shower outlet type:", + "Non-Electric Shower", + "Page 21", + "", + "Photo of shower:", + "Photo of shower:", + "General Photos:", + "Customer Response", + "Customer present?", + "Yes", + "Customer willing to answer satisfaction survey?", + "No", + "Addendum + Related Party Disclosure", + "Addendum", + "None", + "Related party disclosure", + "No related party", + "Hard to treat cavity walls: Property has access", + "issues?", + "No", + "Hard to treat cavity walls: Property has high", + "exposure?", + "No", + "Hard to treat cavity walls: Property has narrow", + "cavities?", + "No", + "Photographs Required", + "Page 22", + "", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "External Elevations:", + "Page 23", + "", + "Page 24", + "", + "Page 25", + "", + "Page 26", + "", + "Page 27", + "" +] \ No newline at end of file diff --git a/backend/documents_parser/tests/test_end_to_end.py b/backend/documents_parser/tests/test_end_to_end.py new file mode 100644 index 00000000..84e611c6 --- /dev/null +++ b/backend/documents_parser/tests/test_end_to_end.py @@ -0,0 +1,422 @@ +import os +from datetime import date + +import pytest + +from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor +from backend.documents_parser.pdf import pdf_to_text_list +from datatypes.epc.domain.epc_property_data import ( + EpcPropertyData, + InstantaneousWwhrs, + MainHeatingDetail, + SapBuildingPart, + SapEnergySource, + SapFloorDimension, + SapHeating, + SapVentilation, + SapWindow, + ShowerOutlet, + ShowerOutlets, +) +from datatypes.epc.domain.mapper import EpcPropertyDataMapper + +PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf") +PDF_PATH_2 = os.path.join( + os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_2.pdf" +) + + +class TestPdfToEpcPropertyData: + @pytest.fixture + def result(self) -> EpcPropertyData: + with open(PDF_PATH, "rb") as f: + pdf_bytes = f.read() + site_notes = PasHubRdSapSiteNotesExtractor( + pdf_to_text_list(pdf_bytes) + ).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + def test_full_epc_property_data(self, result: EpcPropertyData) -> None: + assert result == EpcPropertyData( + dwelling_type="Mid-terrace house", + inspection_date=date(2025, 9, 25), + tenure="Rented Social", + transaction_type="Grant-Scheme (ECO, RHI, etc.)", + roofs=[], + walls=[], + floors=[], + main_heating=[], + door_count=2, + sap_heating=SapHeating( + instantaneous_wwhrs=InstantaneousWwhrs(), + main_heating_details=[ + MainHeatingDetail( + has_fghrs=False, + main_fuel_type="Mains gas", + heat_emitter_type="Radiators", + emitter_temperature="Unknown", + main_heating_control="Programmer, room thermostat and TRVs", + fan_flue_present=True, + condensing=True, + weather_compensator=False, + central_heating_pump_age_str="Unknown", + ) + ], + has_fixed_air_conditioning=False, + shower_outlets=ShowerOutlets( + shower_outlet=ShowerOutlet( + shower_outlet_type="Non-Electric Shower" + ), + ), + ), + sap_windows=[ + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North West", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=2.3, + window_height=1.2, + draught_proofed=True, + window_location="Main Building", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North West", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=1.0, + window_height=1.2, + draught_proofed=True, + window_location="Main Building", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North East", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=1.0, + window_height=0.9, + draught_proofed=True, + window_location="Main Building", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=1.0, + window_height=0.9, + draught_proofed=True, + window_location="Extension 1", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North East", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=1.7, + window_height=0.9, + draught_proofed=True, + window_location="Extension 1", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North West", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=2.3, + window_height=0.9, + draught_proofed=True, + window_location="Extension 1", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North West", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=1.2, + window_height=1.0, + draught_proofed=True, + window_location="Extension 1", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + SapWindow( + pvc_frame="Wooden or PVC", + glazing_gap="16 mm or more", + orientation="North East", + window_type="Window", + glazing_type="Double glazing, Unknown install date", + window_width=1.0, + window_height=0.9, + draught_proofed=True, + window_location="Extension 1", + window_wall_type="External wall", + permanent_shutters_present=False, + ), + ], + sap_energy_source=SapEnergySource( + mains_gas=True, + meter_type="Single", + pv_battery_count=0, + wind_turbines_count=0, + gas_smart_meter_present=True, + is_dwelling_export_capable=True, + wind_turbines_terrain_type="Suburban", + electricity_smart_meter_present=True, + ), + sap_building_parts=[ + SapBuildingPart( + identifier="main", + construction_age_band="1950-1966", + wall_construction="Cavity", + wall_insulation_type="Filled Cavity", + wall_thickness_measured=True, + party_wall_construction="Cavity Masonry, Filled", + sap_floor_dimensions=[ + SapFloorDimension( + room_height_m=2.19, + total_floor_area_m2=35.68, + party_wall_length_m=10.62, + heat_loss_perimeter_m=13.44, + floor=1, + ), + SapFloorDimension( + room_height_m=2.17, + total_floor_area_m2=35.68, + party_wall_length_m=10.62, + heat_loss_perimeter_m=11.0, + floor=0, + ), + ], + wall_thickness_mm=310, + roof_insulation_location="Joists", + roof_insulation_thickness=100, + floor_type="Ground Floor", + floor_construction_type="Solid", + floor_insulation_type_str="As Built", + floor_u_value_known=False, + ), + SapBuildingPart( + identifier="extension_1", + construction_age_band="2003-2006", + wall_construction="Cavity", + wall_insulation_type="As built", + wall_thickness_measured=True, + party_wall_construction="Cavity Masonry, Filled", + sap_floor_dimensions=[ + SapFloorDimension( + room_height_m=2.0, + total_floor_area_m2=3.8, + party_wall_length_m=0.0, + heat_loss_perimeter_m=5.7, + floor=0, + ), + ], + wall_thickness_mm=310, + roof_insulation_location="Sloping ceiling insulation", + roof_insulation_thickness="As built", + ), + ], + solar_water_heating=False, + has_hot_water_cylinder=False, + has_fixed_air_conditioning=False, + wet_rooms_count=0, + extensions_count=1, + heated_rooms_count=0, + open_chimneys_count=0, + habitable_rooms_count=3, + insulated_door_count=0, + cfl_fixed_lighting_bulbs_count=1, + led_fixed_lighting_bulbs_count=0, + incandescent_fixed_lighting_bulbs_count=4, + total_floor_area_m2=75.16, + built_form="Mid-terrace", + property_type="House", + has_conservatory=False, + blocked_chimneys_count=0, + draughtproofed_door_count=2, + address_line_1="40, Abbey Place", + post_town="Crewe", + postcode="CW1 4JR", + report_reference="6EA2A86D-94CE-4792-8D49-AB495C744EDD", + number_of_storeys=2, + any_unheated_rooms=False, + waste_water_heat_recovery="None", + hydro=False, + photovoltaic_array=False, + sap_ventilation=SapVentilation( + ventilation_type="Mechanical Extract - Decentralised", + draught_lobby=False, + pressure_test="No test", + open_flues_count=0, + closed_flues_count=0, + boiler_flues_count=0, + other_flues_count=0, + extract_fans_count=0, + passive_vents_count=0, + flueless_gas_fires_count=0, + ventilation_in_pcdf_database=False, + ), + ) + + +class TestPdfToEpcPropertyDataFixture2: + @pytest.fixture + def result(self) -> EpcPropertyData: + with open(PDF_PATH_2, "rb") as f: + pdf_bytes = f.read() + site_notes = PasHubRdSapSiteNotesExtractor( + pdf_to_text_list(pdf_bytes) + ).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + def test_cylinder_insulation_thickness(self, result: EpcPropertyData) -> None: + assert result.sap_heating.cylinder_insulation_thickness_mm == 38 + + def test_cylinder_size(self, result: EpcPropertyData) -> None: + assert result.sap_heating.cylinder_size == "Normal (90-130 litres)" + + def test_secondary_heating_type(self, result: EpcPropertyData) -> None: + assert result.sap_heating.secondary_heating_type == "Open fire in grate" + + +PDF_PATH_3 = os.path.join( + os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_3.pdf" +) + + +class TestPdfToEpcPropertyDataFixture3: + @pytest.fixture + def result(self) -> EpcPropertyData: + with open(PDF_PATH_3, "rb") as f: + pdf_bytes = f.read() + site_notes = PasHubRdSapSiteNotesExtractor( + pdf_to_text_list(pdf_bytes) + ).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + def test_immersion_heating_type(self, result: EpcPropertyData) -> None: + assert result.sap_heating.immersion_heating_type == "Dual" + + def test_pv_connection(self, result: EpcPropertyData) -> None: + assert ( + result.sap_energy_source.pv_connection + == "Connected to dwellings electricity meter" + ) + + def test_photovoltaic_supply_percent_roof(self, result: EpcPropertyData) -> None: + assert result.sap_energy_source.photovoltaic_supply is not None + assert ( + result.sap_energy_source.photovoltaic_supply.none_or_no_details.percent_roof_area + == 45 + ) + + def test_electric_storage_heater_fuel_type(self, result: EpcPropertyData) -> None: + assert ( + result.sap_heating.main_heating_details[0].main_fuel_type == "Electricity" + ) + + +PDF_PATH_4 = os.path.join( + os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_4.pdf" +) + + +class TestPdfToEpcPropertyDataFixture4: + @pytest.fixture + def result(self) -> EpcPropertyData: + with open(PDF_PATH_4, "rb") as f: + pdf_bytes = f.read() + site_notes = PasHubRdSapSiteNotesExtractor( + pdf_to_text_list(pdf_bytes) + ).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + def test_cylinder_insulation_type(self, result: EpcPropertyData) -> None: + assert result.sap_heating.cylinder_insulation_type == "Factory fitted" + + def test_heat_pump_fuel_type(self, result: EpcPropertyData) -> None: + assert ( + result.sap_heating.main_heating_details[0].main_fuel_type == "Electricity" + ) + + def test_roof_insulation_location_unknown(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].roof_insulation_location == "Unknown" + + def test_roof_insulation_thickness_none(self, result: EpcPropertyData) -> None: + assert result.sap_building_parts[0].roof_insulation_thickness is None + + +PDF_PATH_5 = os.path.join( + os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_5.pdf" +) + + +class TestPdfToEpcPropertyDataFixture5: + @pytest.fixture + def result(self) -> EpcPropertyData: + with open(PDF_PATH_5, "rb") as f: + pdf_bytes = f.read() + site_notes = PasHubRdSapSiteNotesExtractor( + pdf_to_text_list(pdf_bytes) + ).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + def test_cfl_bulb_count(self, result: EpcPropertyData) -> None: + assert result.cfl_fixed_lighting_bulbs_count == 2 + + def test_secondary_heating_type(self, result: EpcPropertyData) -> None: + assert ( + result.sap_heating.secondary_heating_type + == "Panel, convector or radiant heaters" + ) + + def test_electric_shower_outlet_type(self, result: EpcPropertyData) -> None: + assert result.sap_heating.shower_outlets is not None + assert ( + result.sap_heating.shower_outlets.shower_outlet.shower_outlet_type + == "Electric Shower" + ) + + +PDF_PATH_6 = os.path.join( + os.path.dirname(__file__), "fixtures", "ExampleSiteNotes_6.pdf" +) + + +class TestPdfToEpcPropertyDataFixture6: + @pytest.fixture + def result(self) -> EpcPropertyData: + with open(PDF_PATH_6, "rb") as f: + pdf_bytes = f.read() + site_notes = PasHubRdSapSiteNotesExtractor( + pdf_to_text_list(pdf_bytes) + ).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + def test_party_wall_construction(self, result: EpcPropertyData) -> None: + assert ( + result.sap_building_parts[0].party_wall_construction + == "Solid Masonry, Timber Frame, or System Built" + ) diff --git a/backend/documents_parser/tests/test_extractor.py b/backend/documents_parser/tests/test_extractor.py new file mode 100644 index 00000000..66cc4271 --- /dev/null +++ b/backend/documents_parser/tests/test_extractor.py @@ -0,0 +1,799 @@ +import json +import os +from datetime import date + +import pytest + +from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor +from datatypes.epc.surveys.pashub_rdsap_site_notes import ( + BuildingConstruction, + BuildingMeasurements, + Conservatories, + CustomerResponse, + ExtensionConstruction, + ExtensionMeasurements, + ExtensionRoofSpace, + FloorConstruction, + FloorMeasurement, + General, + HeatingAndHotWater, + InspectionMetadata, + MainBuildingConstruction, + MainBuildingMeasurements, + MainHeating, + Renewables, + RoomCountElements, + RoofSpace, + RoofSpaceDetail, + SecondaryHeating, + Shower, + SurveyAddendum, + Ventilation, + WaterHeating, + WaterUse, +) + +FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures") + + +def load_text_fixture() -> list[str]: + with open(os.path.join(FIXTURES, "site_notes_example_text.json")) as f: + return json.load(f) + + +def load_text_fixture_2() -> list[str]: + with open(os.path.join(FIXTURES, "site_notes_example_2_text.json")) as f: + return json.load(f) + + +def load_text_fixture_3() -> list[str]: + with open(os.path.join(FIXTURES, "site_notes_example_3_text.json")) as f: + return json.load(f) + + +def load_text_fixture_4() -> list[str]: + with open(os.path.join(FIXTURES, "site_notes_example_4_text.json")) as f: + return json.load(f) + + +def load_text_fixture_5() -> list[str]: + with open(os.path.join(FIXTURES, "site_notes_example_5_text.json")) as f: + return json.load(f) + + +def load_text_fixture_6() -> list[str]: + with open(os.path.join(FIXTURES, "site_notes_example_6_text.json")) as f: + return json.load(f) + + +class TestInspectionMetadata: + def test_full_inspection_metadata(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_inspection_metadata() + assert result == InspectionMetadata( + inspection_surveyor="Benjamin Burke", + email_address="ben@mbsolutionsgroup.co.uk", + report_reference="6EA2A86D-94CE-4792-8D49-AB495C744EDD", + created_on="2025-11-10", + date_of_inspection=date(2025, 9, 25), + property_address="40, Abbey Place, Crewe, Cheshire, CW1 4JR", + property_photo=True, + ) + + +class TestGeneral: + @pytest.fixture + def general(self) -> General: + return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_general() + + def test_epc_checked_before_assessment(self, general: General) -> None: + assert general.epc_checked_before_assessment is True + + def test_epc_exists_at_point_of_assessment(self, general: General) -> None: + assert general.epc_exists_at_point_of_assessment is False + + def test_inspection_date(self, general: General) -> None: + assert general.inspection_date == date(2025, 9, 25) + + def test_transaction_type(self, general: General) -> None: + assert general.transaction_type == "Grant-Scheme (ECO, RHI, etc.)" + + def test_tenure(self, general: General) -> None: + assert general.tenure == "Rented Social" + + def test_property_type(self, general: General) -> None: + assert general.property_type == "House" + + def test_detachment_type(self, general: General) -> None: + assert general.detachment_type == "Mid-terrace" + + def test_number_of_storeys(self, general: General) -> None: + assert general.number_of_storeys == 2 + + def test_number_of_extensions(self, general: General) -> None: + assert general.number_of_extensions == 1 + + def test_electricity_smart_meter(self, general: General) -> None: + assert general.electricity_smart_meter is True + + def test_mains_gas_available(self, general: General) -> None: + assert general.mains_gas_available is True + + def test_measurements_location(self, general: General) -> None: + assert general.measurements_location == "Internal" + + def test_full_general(self, general: General) -> None: + assert general == General( + epc_checked_before_assessment=True, + epc_exists_at_point_of_assessment=False, + inspection_date=date(2025, 9, 25), + transaction_type="Grant-Scheme (ECO, RHI, etc.)", + tenure="Rented Social", + property_type="House", + detachment_type="Mid-terrace", + number_of_storeys=2, + terrain_type="Suburban", + number_of_extensions=1, + electricity_smart_meter=True, + electric_meter_type="Single", + dwelling_export_capable=True, + mains_gas_available=True, + gas_smart_meter=True, + gas_meter_accessible=True, + measurements_location="Internal", + ) + + +class TestGeneralNoExtensions: + @pytest.fixture + def general(self) -> General: + return PasHubRdSapSiteNotesExtractor(load_text_fixture_2()).extract_general() + + def test_number_of_extensions_when_no_extensions(self, general: General) -> None: + assert general.number_of_extensions == 0 + + +class TestBuildingConstruction: + @pytest.fixture + def construction(self) -> BuildingConstruction: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_building_construction() + + def test_main_building_wall_u_value_known_is_false( + self, construction: BuildingConstruction + ) -> None: + assert construction.main_building.wall_u_value_known is False + + def test_main_building_wall_thickness_mm( + self, construction: BuildingConstruction + ) -> None: + assert construction.main_building.wall_thickness_mm == 310 + + def test_main_building_filled_cavity_indicators_present( + self, construction: BuildingConstruction + ) -> None: + assert ( + construction.main_building.filled_cavity_indicators + == "evidence of cavity fill drill holes" + ) + + def test_extension_filled_cavity_indicators_absent( + self, construction: BuildingConstruction + ) -> None: + assert construction.extensions is not None + assert construction.extensions[0].filled_cavity_indicators is None + + def test_one_extension(self, construction: BuildingConstruction) -> None: + assert construction.extensions is not None + assert len(construction.extensions) == 1 + + def test_extension_id(self, construction: BuildingConstruction) -> None: + assert construction.extensions is not None + assert construction.extensions[0].id == 1 + + def test_full_building_construction( + self, construction: BuildingConstruction + ) -> None: + assert construction == BuildingConstruction( + main_building=MainBuildingConstruction( + age_range="1950-1966", + age_indicators="local knowledge, enquiries of owner", + walls_construction_type="Cavity", + cavity_construction_indicators="wall thickness over 270 mm", + walls_insulation_type="Filled Cavity", + filled_cavity_indicators="evidence of cavity fill drill holes", + thermal_conductivity_of_wall_insulation="Unknown", + wall_u_value_known=False, + wall_thickness_mm=310, + party_wall_construction_type="Cavity Masonry, Filled", + ), + floor=FloorConstruction( + floor_type="Ground Floor", + floor_construction="Solid", + floor_insulation_type="As Built", + floor_u_value_known=False, + ), + extensions=[ + ExtensionConstruction( + id=1, + age_range="2003-2006", + age_indicators="local knowledge, enquiries of owner", + walls_construction_type="Cavity", + cavity_construction_indicators="wall thickness over 270 mm", + walls_insulation_type="As built", + thermal_conductivity_of_wall_insulation="Unknown", + wall_u_value_known=False, + wall_thickness_mm=310, + party_wall_construction_type="Cavity Masonry, Filled", + filled_cavity_indicators=None, + ) + ], + ) + + +class TestBuildingMeasurements: + @pytest.fixture + def measurements(self) -> BuildingMeasurements: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_building_measurements() + + def test_main_building_has_two_floors( + self, measurements: BuildingMeasurements + ) -> None: + assert len(measurements.main_building.floors) == 2 + + def test_main_building_floor_area( + self, measurements: BuildingMeasurements + ) -> None: + assert measurements.main_building.floors[0].area_m2 == 35.68 + + def test_integer_token_parses_to_float( + self, measurements: BuildingMeasurements + ) -> None: + # "11" in the PDF (no decimal) should parse to 11.0 + assert measurements.main_building.floors[1].heat_loss_perimeter_m == 11.0 + + def test_extension_measurements_present( + self, measurements: BuildingMeasurements + ) -> None: + assert measurements.extensions is not None + assert len(measurements.extensions) == 1 + + def test_extension_id(self, measurements: BuildingMeasurements) -> None: + assert measurements.extensions is not None + assert measurements.extensions[0].id == 1 + + def test_full_building_measurements( + self, measurements: BuildingMeasurements + ) -> None: + assert measurements == BuildingMeasurements( + main_building=MainBuildingMeasurements( + floors=[ + FloorMeasurement( + name="Floor 1", + area_m2=35.68, + height_m=2.19, + heat_loss_perimeter_m=13.44, + pwl_m=10.62, + ), + FloorMeasurement( + name="Floor 0", + area_m2=35.68, + height_m=2.17, + heat_loss_perimeter_m=11.0, + pwl_m=10.62, + ), + ] + ), + extensions=[ + ExtensionMeasurements( + id=1, + floors=[ + FloorMeasurement( + name="Floor 0", + area_m2=3.8, + height_m=2.0, + heat_loss_perimeter_m=5.7, + pwl_m=0.0, + ) + ], + ) + ], + ) + + +class TestRoofSpace: + @pytest.fixture + def roof_space(self) -> RoofSpace: + return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_roof_space() + + def test_main_building_insulation_thickness_mm( + self, roof_space: RoofSpace + ) -> None: + assert roof_space.main_building.insulation_thickness_mm == 100 + + def test_main_building_insulation_thickness_string_absent( + self, roof_space: RoofSpace + ) -> None: + assert roof_space.main_building.insulation_thickness is None + + def test_main_building_rooms_in_roof(self, roof_space: RoofSpace) -> None: + assert roof_space.main_building.rooms_in_roof is False + + def test_main_building_roof_u_value_known(self, roof_space: RoofSpace) -> None: + assert roof_space.main_building.roof_u_value_known is False + + def test_extension_uses_string_thickness(self, roof_space: RoofSpace) -> None: + assert roof_space.extensions is not None + assert roof_space.extensions[0].insulation_thickness == "As built" + assert roof_space.extensions[0].insulation_thickness_mm is None + + def test_full_roof_space(self, roof_space: RoofSpace) -> None: + assert roof_space == RoofSpace( + main_building=RoofSpaceDetail( + construction_type="Pitched roof (Slates or tiles), Access to loft", + insulation_at="Joists", + roof_u_value_known=False, + cavity_wall_construction_indicators="cavity visible in roof space", + rooms_in_roof=False, + insulation_thickness_mm=100, + insulation_thickness=None, + ), + extensions=[ + ExtensionRoofSpace( + id=1, + construction_type="Pitched roof, Sloping ceiling", + insulation_at="Sloping ceiling insulation", + roof_u_value_known=False, + cavity_wall_construction_indicators="No indicator of construction visible", + rooms_in_roof=False, + insulation_thickness_mm=None, + insulation_thickness="As built", + ) + ], + ) + + +class TestWindows: + @pytest.fixture + def windows(self) -> list: + return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_windows() + + def test_window_count(self, windows: list) -> None: + assert len(windows) == 8 + + def test_ids_are_sequential(self, windows: list) -> None: + assert [w.id for w in windows] == list(range(1, 9)) + + def test_first_window_location(self, windows: list) -> None: + assert windows[0].location == "Main Building" + + def test_extension_window_location(self, windows: list) -> None: + assert windows[3].location == "Extension 1" + + def test_height_parses_to_float(self, windows: list) -> None: + assert windows[0].height_m == 1.2 + + def test_draught_proofed_true(self, windows: list) -> None: + assert windows[0].draught_proofed is True + + def test_permanent_shutters_false(self, windows: list) -> None: + assert windows[0].permanent_shutters is False + + def test_first_window_full(self, windows: list) -> None: + from datatypes.epc.surveys.pashub_rdsap_site_notes import Window + assert windows[0] == Window( + id=1, + location="Main Building", + wall_type="External wall", + glazing_type="Double glazing, Unknown install date", + window_type="Window", + frame_type="Wooden or PVC", + glazing_gap="16 mm or more", + draught_proofed=True, + permanent_shutters=False, + height_m=1.2, + width_m=2.3, + orientation="North West", + ) + + +class TestWaterHeatingCylinderThickness: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_2() + ).extract_heating_and_hot_water() + + @pytest.fixture + def hhw_no_cylinder(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_heating_and_hot_water() + + def test_cylinder_insulation_thickness_mm(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.insulation_thickness_mm == 38 + + def test_cylinder_insulation_thickness_mm_absent(self, hhw_no_cylinder: HeatingAndHotWater) -> None: + assert hhw_no_cylinder.water_heating.insulation_thickness_mm is None + + def test_cylinder_size(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.cylinder_size == "Normal (90-130 litres)" + + +class TestImmersionType: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_3() + ).extract_heating_and_hot_water() + + def test_immersion_type(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.immersion_type == "Dual" + + +class TestCylinderThermostat: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_3() + ).extract_heating_and_hot_water() + + def test_has_thermostat_true(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.has_thermostat is True + + +class TestSecondaryHeating: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_2() + ).extract_heating_and_hot_water() + + @pytest.fixture + def hhw_no_secondary(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_heating_and_hot_water() + + def test_secondary_system(self, hhw: HeatingAndHotWater) -> None: + assert hhw.secondary_heating.secondary_system == "Open fire in grate" + + def test_secondary_system_absent(self, hhw_no_secondary: HeatingAndHotWater) -> None: + assert hhw_no_secondary.secondary_heating.secondary_system is None + + +class TestHeatingAndHotWater: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_heating_and_hot_water() + + def test_product_id_parses_to_int(self, hhw: HeatingAndHotWater) -> None: + assert hhw.main_heating.product_id == 16839 + + def test_summer_efficiency_parses_to_float(self, hhw: HeatingAndHotWater) -> None: + assert hhw.main_heating.summer_efficiency == 0.0 + + def test_condensing_true(self, hhw: HeatingAndHotWater) -> None: + assert hhw.main_heating.condensing is True + + def test_fghrs_false(self, hhw: HeatingAndHotWater) -> None: + # multi-line label + assert hhw.main_heating.flue_gas_heat_recovery_system is False + + def test_secondary_fuel(self, hhw: HeatingAndHotWater) -> None: + assert hhw.secondary_heating.secondary_fuel == "No Secondary Heating" + + def test_water_heating_no_cylinder(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.cylinder_size == "No Cylinder" + assert hhw.water_heating.insulation_type is None + assert hhw.water_heating.has_thermostat is None + + def test_full_heating_and_hot_water(self, hhw: HeatingAndHotWater) -> None: + assert hhw == HeatingAndHotWater( + main_heating=MainHeating( + selection_method="PCDF Search", + system_type="Boiler with radiators or underfloor heating", + product_id=16839, + manufacturer="Vaillant", + model="ecoTEC pro 28", + orig_manufacturer="Vaillant", + fuel="Mains gas", + summer_efficiency=0.0, + type="Combi", + condensing=True, + year="2005 - 2015", + mount="Wall", + open_flue="Room-sealed", + fan_assist=True, + status="Normal status for an actual product", + central_heating_pump_age="Unknown", + controls="Programmer, room thermostat and TRVs", + flue_gas_heat_recovery_system=False, + weather_compensator=False, + emitter="Radiators", + emitter_temperature="Unknown", + ), + secondary_heating=SecondaryHeating( + secondary_fuel="No Secondary Heating", + ), + water_heating=WaterHeating( + type="Regular", + system="From main heating 1", + cylinder_size="No Cylinder", + cylinder_measured_heat_loss=None, + insulation_type=None, + insulation_thickness_mm=None, + has_thermostat=None, + ), + ) + + +class TestVentilation: + @pytest.fixture + def ventilation(self) -> Ventilation: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_ventilation() + + def test_ventilation_type(self, ventilation: Ventilation) -> None: + assert ventilation.ventilation_type == "Mechanical Extract - Decentralised" + + def test_number_of_open_flues(self, ventilation: Ventilation) -> None: + assert ventilation.number_of_open_flues == 0 + + def test_ventilation_in_pcdf_database(self, ventilation: Ventilation) -> None: + assert ventilation.ventilation_in_pcdf_database is False + + def test_full_ventilation(self, ventilation: Ventilation) -> None: + assert ventilation == Ventilation( + ventilation_type="Mechanical Extract - Decentralised", + has_fixed_air_conditioning=False, + number_of_open_flues=0, + number_of_closed_flues=0, + number_of_boiler_flues=0, + number_of_other_flues=0, + number_of_extract_fans=0, + number_of_passive_vents=0, + number_of_flueless_gas_fires=0, + pressure_test="No test", + draught_lobby=False, + ventilation_in_pcdf_database=False, + ) + + +class TestConservatories: + def test_full_conservatories(self) -> None: + result = PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_conservatories() + assert result == Conservatories(has_conservatory=False) + + +class TestRenewables: + def test_number_of_pv_batteries_none_string_becomes_zero(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_renewables() + assert result.number_of_pv_batteries == 0 + + def test_full_renewables(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_renewables() + assert result == Renewables( + wind_turbines=False, + solar_hot_water=False, + photovoltaic_array=False, + number_of_pv_batteries=0, + hydro=False, + ) + + +class TestRenewablesPvConnection: + @pytest.fixture + def renewables(self) -> Renewables: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_3() + ).extract_renewables() + + def test_pv_connection(self, renewables: Renewables) -> None: + assert renewables.pv_connection == "Connected to dwellings electricity meter" + + def test_percent_roof_covered_pv(self, renewables: Renewables) -> None: + assert renewables.percent_roof_covered_pv == 45 + + +class TestRoomCountElements: + @pytest.fixture + def rce(self) -> RoomCountElements: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_room_count_elements() + + def test_habitable_rooms(self, rce: RoomCountElements) -> None: + assert rce.number_of_habitable_rooms == 3 + + def test_heated_rooms_null(self, rce: RoomCountElements) -> None: + assert rce.number_of_heated_rooms is None + + def test_full_room_count_elements(self, rce: RoomCountElements) -> None: + assert rce == RoomCountElements( + number_of_habitable_rooms=3, + any_unheated_rooms=False, + number_of_heated_rooms=None, + number_of_external_doors=2, + number_of_insulated_external_doors=0, + number_of_draughtproofed_external_doors=2, + number_of_open_chimneys=0, + number_of_blocked_chimneys=0, + number_of_fixed_incandescent_bulbs=4, + exact_led_cfl_count_known=True, + number_of_fixed_led_bulbs=0, + number_of_fixed_cfl_bulbs=1, + waste_water_heat_recovery="None", + ) + + +class TestWaterUse: + def test_full_water_use(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_water_use() + assert result == WaterUse( + number_of_baths=1, + number_of_special_features=0, + showers=[Shower(id=1, outlet_type="Non-Electric Shower")], + ) + + +class TestCustomerResponse: + def test_full_customer_response(self) -> None: + result = PasHubRdSapSiteNotesExtractor( + load_text_fixture() + ).extract_customer_response() + assert result == CustomerResponse( + customer_present=True, + willing_to_answer_satisfaction_survey=False, + ) + + +class TestExtract: + def test_full_extract(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract() + assert result.inspection_metadata.inspection_surveyor == "Benjamin Burke" + assert result.general.inspection_date == date(2025, 9, 25) + assert result.building_construction.main_building.wall_thickness_mm == 310 + assert result.building_measurements.main_building.floors[0].area_m2 == 35.68 + assert result.roof_space.main_building.insulation_thickness_mm == 100 + assert len(result.windows) == 8 + assert result.heating_and_hot_water.main_heating.product_id == 16839 + assert result.ventilation.ventilation_type == "Mechanical Extract - Decentralised" + assert result.conservatories.has_conservatory is False + assert result.renewables.number_of_pv_batteries == 0 + assert result.room_count_elements.number_of_habitable_rooms == 3 + assert result.water_use.number_of_baths == 1 + assert result.customer_response.customer_present is True + assert result.addendum.addendum == "None" + + +class TestSurveyAddendum: + def test_hard_to_treat_flags(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_addendum() + assert result.hard_to_treat_cavity_access_issues is False + assert result.hard_to_treat_cavity_high_exposure is False + assert result.hard_to_treat_cavity_narrow_cavities is False + + def test_full_addendum(self) -> None: + result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_addendum() + assert result == SurveyAddendum( + addendum="None", + related_party_disclosure="No related party", + hard_to_treat_cavity_access_issues=False, + hard_to_treat_cavity_high_exposure=False, + hard_to_treat_cavity_narrow_cavities=False, + ) + + +# --- fixture 4: heat pump, factory-fitted cylinder, blocked loft --- + + +class TestCylinderInsulationType: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_4() + ).extract_heating_and_hot_water() + + def test_insulation_type_extracted(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.insulation_type == "Factory fitted" + + def test_insulation_thickness_mm(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.insulation_thickness_mm == 50 + + def test_cylinder_size(self, hhw: HeatingAndHotWater) -> None: + assert hhw.water_heating.cylinder_size == "Medium (131-170 litres)" + + +class TestHeatPumpFuelExtraction: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_4() + ).extract_heating_and_hot_water() + + def test_fuel_raw_value(self, hhw: HeatingAndHotWater) -> None: + assert hhw.main_heating.fuel == "Electricity, any tariff" + + def test_system_type(self, hhw: HeatingAndHotWater) -> None: + assert hhw.main_heating.system_type == "Heat pump with radiators or underfloor heating" + + +class TestRoofSpaceUnknownInsulation: + @pytest.fixture + def roof_space(self) -> RoofSpace: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_4() + ).extract_roof_space() + + def test_insulation_at_unknown(self, roof_space: RoofSpace) -> None: + assert roof_space.main_building.insulation_at == "Unknown" + + def test_insulation_thickness_mm_none(self, roof_space: RoofSpace) -> None: + assert roof_space.main_building.insulation_thickness_mm is None + + def test_insulation_thickness_str_none(self, roof_space: RoofSpace) -> None: + assert roof_space.main_building.insulation_thickness is None + + +class TestCflBulbCount: + @pytest.fixture + def rce(self) -> RoomCountElements: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_5() + ).extract_room_count_elements() + + def test_cfl_count(self, rce: RoomCountElements) -> None: + assert rce.number_of_fixed_cfl_bulbs == 2 + + def test_led_count(self, rce: RoomCountElements) -> None: + assert rce.number_of_fixed_led_bulbs == 7 + + def test_incandescent_count(self, rce: RoomCountElements) -> None: + assert rce.number_of_fixed_incandescent_bulbs == 1 + + +class TestSecondaryHeatingPanel: + @pytest.fixture + def hhw(self) -> HeatingAndHotWater: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_5() + ).extract_heating_and_hot_water() + + def test_secondary_system(self, hhw: HeatingAndHotWater) -> None: + assert hhw.secondary_heating.secondary_system == "Panel, convector or radiant heaters" + + def test_secondary_fuel(self, hhw: HeatingAndHotWater) -> None: + assert hhw.secondary_heating.secondary_fuel == "Electricity" + + +class TestElectricShowerExtraction: + @pytest.fixture + def wu(self) -> WaterUse: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_5() + ).extract_water_use() + + def test_shower_outlet_type(self, wu: WaterUse) -> None: + assert wu.showers[0].outlet_type == "Electric Shower" + + +class TestSolidMasonryPartyWall: + @pytest.fixture + def bc(self) -> BuildingConstruction: + return PasHubRdSapSiteNotesExtractor( + load_text_fixture_6() + ).extract_building_construction() + + def test_party_wall_construction_type(self, bc: BuildingConstruction) -> None: + assert ( + bc.main_building.party_wall_construction_type + == "Solid Masonry, Timber Frame, or System Built" + ) diff --git a/backend/documents_parser/tests/test_pdf.py b/backend/documents_parser/tests/test_pdf.py new file mode 100644 index 00000000..3a6dd2fb --- /dev/null +++ b/backend/documents_parser/tests/test_pdf.py @@ -0,0 +1,31 @@ +import json +import os + +import pytest + +from backend.documents_parser.pdf import pdf_to_text_list + +PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf") +FIXTURE_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "site_notes_example_text.json") + + +@pytest.fixture +def pdf_bytes() -> bytes: + with open(PDF_PATH, "rb") as f: + return f.read() + + +class TestPdfToTextList: + def test_returns_list(self, pdf_bytes: bytes) -> None: + result = pdf_to_text_list(pdf_bytes) + assert isinstance(result, list) + + def test_all_elements_are_strings(self, pdf_bytes: bytes) -> None: + result = pdf_to_text_list(pdf_bytes) + assert all(isinstance(t, str) for t in result) + + def test_matches_fixture(self, pdf_bytes: bytes) -> None: + with open(FIXTURE_PATH) as f: + expected = json.load(f) + result = pdf_to_text_list(pdf_bytes) + assert result == expected diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index b92a46aa..8f949b0f 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -29,18 +29,21 @@ class MainHeatingDetail: boiler_flue_type: Optional[int] = None # TODO: make enum? boiler_ignition_type: Optional[int] = None # TODO: make enum? central_heating_pump_age: Optional[int] = None + central_heating_pump_age_str: Optional[str] = None # str from site notes e.g. "Unknown", "Pre 2013" main_heating_index_number: Optional[int] = None sap_main_heating_code: Optional[int] = None # TODO: make enum? main_heating_number: Optional[int] = None main_heating_category: Optional[int] = None main_heating_fraction: Optional[int] = None main_heating_data_source: Optional[int] = None + condensing: Optional[bool] = None + weather_compensator: Optional[bool] = None @dataclass class ShowerOutlet: - shower_wwhrs: int - shower_outlet_type: int + shower_outlet_type: Union[int, str] + shower_wwhrs: Optional[int] = None @dataclass @@ -54,18 +57,33 @@ class SapHeating: instantaneous_wwhrs: InstantaneousWwhrs main_heating_details: List[MainHeatingDetail] has_fixed_air_conditioning: bool - cylinder_size: Optional[int] = ( - None # int code from API; not directly available from site notes + cylinder_size: Optional[Union[int, str]] = ( + None # int code from API; str (e.g. "Normal (90-130 litres)") from site notes ) water_heating_code: Optional[int] = None # TODO: make enum? water_heating_fuel: Optional[int] = None # TODO: make enum? immersion_heating_type: Optional[Union[int, str]] = None # TODO: make enum? shower_outlets: Optional[ShowerOutlets] = None - cylinder_insulation_type: Optional[int] = None + cylinder_insulation_type: Optional[Union[int, str]] = None cylinder_thermostat: Optional[str] = None secondary_fuel_type: Optional[int] = None - secondary_heating_type: Optional[int] = None - cylinder_insulation_thickness: Optional[int] = None + secondary_heating_type: Optional[Union[int, str]] = None # int from API; str from site notes + cylinder_insulation_thickness_mm: Optional[int] = None + + +@dataclass +class SapVentilation: + ventilation_type: Optional[str] = None + draught_lobby: Optional[bool] = None + pressure_test: Optional[str] = None # str from site notes e.g. "No test"; int in API via mechanical_ventilation + open_flues_count: Optional[int] = None + closed_flues_count: Optional[int] = None + boiler_flues_count: Optional[int] = None + other_flues_count: Optional[int] = None + extract_fans_count: Optional[int] = None + passive_vents_count: Optional[int] = None + flueless_gas_fires_count: Optional[int] = None + ventilation_in_pcdf_database: Optional[bool] = None @dataclass @@ -130,7 +148,7 @@ class SapEnergySource: wind_turbines_terrain_type: str # int in API, str (e.g. "Suburban") in site notes electricity_smart_meter_present: bool - pv_connection: Optional[int] = None + pv_connection: Optional[Union[int, str]] = None # int from API; str from site notes photovoltaic_supply: Optional[PhotovoltaicSupply] = None wind_turbine_details: Optional[WindTurbineDetails] = None pv_batteries: Optional[PvBatteries] = None @@ -200,6 +218,10 @@ class SapBuildingPart: flat_roof_insulation_thickness: Optional[Union[str, int]] = ( None # TODO: make enum/mapping? ) + floor_type: Optional[str] = None # str from site notes e.g. "Ground Floor" + floor_construction_type: Optional[str] = None # str from site notes; distinct from floor_construction: int in SapFloorDimension + floor_insulation_type_str: Optional[str] = None # str from site notes e.g. "As Built" + floor_u_value_known: Optional[bool] = None roof_construction: Optional[int] = None roof_insulation_location: Optional[Union[int, str]] = ( @@ -235,6 +257,9 @@ class EpcPropertyData: inspection_date: date tenure: str # str in site notes; stringified int (e.g. "1") from API transaction_type: str # str in site notes; stringified int from API + address_line_1: str + postcode: str + post_town: str # Elements roofs: List[EnergyElement] @@ -270,9 +295,6 @@ class EpcPropertyData: assessment_type: Optional[str] = None # not available from site notes sap_version: Optional[float] = None # not available from site notes uprn: Optional[int] = None # not available from site notes - address_line_1: Optional[str] = None # not available from site notes - postcode: Optional[str] = None # not available from site notes - post_town: Optional[str] = None # not available from site notes status: Optional[str] = None # not available from site notes window: Optional[EnergyElement] = None # not available from site notes lighting: Optional[EnergyElement] = None # not available from site notes @@ -280,6 +302,7 @@ class EpcPropertyData: schema_type: Optional[str] = None schema_versions_original: Optional[str] = None report_type: Optional[str] = None # TODO: make enum? + report_reference: Optional[str] = None uprn_source: Optional[str] = None address_line_2: Optional[str] = None region_code: Optional[str] = None # TODO: make enum? @@ -346,3 +369,10 @@ class EpcPropertyData: # survey_addendum: Optional[Any] = None # not sure how to handle, skip for now fixed_lighting_outlets_count: Optional[int] = None low_energy_fixed_lighting_outlets_count: Optional[int] = None + # Site-notes-only fields + sap_ventilation: Optional[SapVentilation] = None + number_of_storeys: Optional[int] = None + any_unheated_rooms: Optional[bool] = None + waste_water_heat_recovery: Optional[str] = None + hydro: Optional[bool] = None + photovoltaic_array: Optional[bool] = None diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ccc4dd82..1afade5c 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1,5 +1,5 @@ from datetime import date -from typing import List, Sequence, Union +from typing import List, Optional, Sequence, Union from datatypes.epc.domain.epc_property_data import ( EnergyElement, @@ -16,6 +16,7 @@ from datatypes.epc.domain.epc_property_data import ( SapFloorDimension, SapHeating, SapRoomInRoof, + SapVentilation, SapWindow, ShowerOutlet, ShowerOutlets, @@ -55,10 +56,14 @@ from datatypes.epc.surveys.pashub_rdsap_site_notes import ( BuildingMeasurements, ExtensionConstruction, ExtensionMeasurements, + ExtensionRoofSpace, + FloorConstruction, FloorMeasurement, HeatingAndHotWater, PasHubRdSapSiteNotes, + RoofSpaceDetail, Ventilation, + WaterUse, Window, ) @@ -78,6 +83,24 @@ class EpcPropertyDataMapper: @staticmethod def from_site_notes(survey: PasHubRdSapSiteNotes) -> EpcPropertyData: general = survey.general + metadata = survey.inspection_metadata + address_parts = [p.strip() for p in metadata.property_address.split(", ")] + postcode = address_parts[-1] if address_parts else "" + post_town = ( + address_parts[-3] + if len(address_parts) >= 4 + else (address_parts[-2] if len(address_parts) >= 3 else "") + ) + address_line_1 = ( + ", ".join(address_parts[:-3]) + if len(address_parts) >= 4 + else ( + ", ".join(address_parts[:-2]) + if len(address_parts) >= 3 + else address_parts[0] if address_parts else "" + ) + ) + construction = survey.building_construction measurements = survey.building_measurements heating = survey.heating_and_hot_water @@ -85,13 +108,23 @@ class EpcPropertyDataMapper: renewables = survey.renewables room_counts = survey.room_count_elements - sap_building_parts = [_map_main_building_part(construction, measurements)] + roof_space = survey.roof_space + sap_building_parts = [ + _map_main_building_part( + construction, measurements, roof_space.main_building + ) + ] if construction.extensions and measurements.extensions: for ext_c in construction.extensions: - matching = [m for m in measurements.extensions if m.id == ext_c.id] - if matching: + matching_m = [m for m in measurements.extensions if m.id == ext_c.id] + matching_r = [ + r for r in (roof_space.extensions or []) if r.id == ext_c.id + ] + if matching_m: sap_building_parts.append( - _map_extension_building_part(ext_c, matching[0]) + _map_extension_building_part( + ext_c, matching_m[0], matching_r[0] if matching_r else None + ) ) total_floor_area = round( @@ -101,11 +134,11 @@ class EpcPropertyDataMapper: for floor in part.sap_floor_dimensions ), 2, - ) + ) # TODO: verify that is the correct approach return EpcPropertyData( dwelling_type=f"{general.detachment_type} {general.property_type.lower()}", - inspection_date=date.fromisoformat(general.inspection_date), + inspection_date=general.inspection_date, tenure=general.tenure, transaction_type=general.transaction_type, roofs=[], @@ -113,7 +146,7 @@ class EpcPropertyDataMapper: floors=[], main_heating=[], door_count=room_counts.number_of_external_doors, - sap_heating=_map_sap_heating(heating, ventilation), + sap_heating=_map_sap_heating(heating, ventilation, survey.water_use), sap_windows=[_map_sap_window(w) for w in survey.windows], sap_energy_source=SapEnergySource( mains_gas=general.mains_gas_available, @@ -124,6 +157,16 @@ class EpcPropertyDataMapper: is_dwelling_export_capable=general.dwelling_export_capable, wind_turbines_terrain_type=general.terrain_type, electricity_smart_meter_present=general.electricity_smart_meter, + pv_connection=renewables.pv_connection, + photovoltaic_supply=( + PhotovoltaicSupply( + none_or_no_details=PhotovoltaicSupplyNoneOrNoDetails( + percent_roof_area=renewables.percent_roof_covered_pv, + ) + ) + if renewables.percent_roof_covered_pv is not None + else None + ), ), sap_building_parts=sap_building_parts, solar_water_heating=renewables.solar_hot_water, @@ -145,6 +188,16 @@ class EpcPropertyDataMapper: has_conservatory=survey.conservatories.has_conservatory, blocked_chimneys_count=room_counts.number_of_blocked_chimneys, draughtproofed_door_count=room_counts.number_of_draughtproofed_external_doors, + address_line_1=address_line_1, + post_town=post_town, + postcode=postcode, + report_reference=metadata.report_reference, + number_of_storeys=general.number_of_storeys, + any_unheated_rooms=room_counts.any_unheated_rooms, + waste_water_heat_recovery=room_counts.waste_water_heat_recovery, + hydro=renewables.hydro, + photovoltaic_array=renewables.photovoltaic_array, + sap_ventilation=_map_sap_ventilation(ventilation), ) @staticmethod @@ -227,7 +280,7 @@ class EpcPropertyDataMapper: cylinder_thermostat=None, secondary_fuel_type=None, secondary_heating_type=None, - cylinder_insulation_thickness=None, + cylinder_insulation_thickness_mm=None, ), sap_windows=[], sap_energy_source=SapEnergySource( @@ -368,7 +421,7 @@ class EpcPropertyDataMapper: cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, - cylinder_insulation_thickness=schema.sap_heating.cylinder_insulation_thickness, + cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, ), sap_windows=[], sap_energy_source=SapEnergySource( @@ -509,7 +562,7 @@ class EpcPropertyDataMapper: cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, - cylinder_insulation_thickness=schema.sap_heating.cylinder_insulation_thickness, + cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, ), sap_windows=[], sap_energy_source=SapEnergySource( @@ -657,7 +710,7 @@ class EpcPropertyDataMapper: cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, - cylinder_insulation_thickness=schema.sap_heating.cylinder_insulation_thickness, + cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, ), # 19.0 has no per-window list; individual window fields are at schema root sap_windows=[], @@ -808,7 +861,7 @@ class EpcPropertyDataMapper: cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, - cylinder_insulation_thickness=schema.sap_heating.cylinder_insulation_thickness, + cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, ), # 20.0.0 SapWindow lacks frame/gap/draught fields present in later schemas sap_windows=[ @@ -986,7 +1039,7 @@ class EpcPropertyDataMapper: cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, - cylinder_insulation_thickness=schema.sap_heating.cylinder_insulation_thickness, + cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, ), sap_windows=[ SapWindow( @@ -1219,7 +1272,7 @@ class EpcPropertyDataMapper: cylinder_thermostat=schema.sap_heating.cylinder_thermostat, secondary_fuel_type=schema.sap_heating.secondary_fuel_type, secondary_heating_type=schema.sap_heating.secondary_heating_type, - cylinder_insulation_thickness=schema.sap_heating.cylinder_insulation_thickness, + cylinder_insulation_thickness_mm=schema.sap_heating.cylinder_insulation_thickness, ), # SAP windows sap_windows=[ @@ -1412,16 +1465,33 @@ def _map_floor_dimensions(floors: List[FloorMeasurement]) -> List[SapFloorDimens total_floor_area_m2=floor.area_m2, party_wall_length_m=floor.pwl_m, heat_loss_perimeter_m=floor.heat_loss_perimeter_m, + floor=int(floor.name.split()[-1]), ) for floor in floors ] +def _map_roof( + roof: Optional[Union[RoofSpaceDetail, ExtensionRoofSpace]], +) -> tuple[Optional[str], Optional[Union[str, int]]]: + if roof is None: + return None, None + thickness: Optional[Union[str, int]] = ( + roof.insulation_thickness_mm + if roof.insulation_thickness_mm is not None + else roof.insulation_thickness + ) + return roof.insulation_at or None, thickness + + def _map_main_building_part( construction: BuildingConstruction, measurements: BuildingMeasurements, + roof: RoofSpaceDetail, ) -> SapBuildingPart: main = construction.main_building + floor = construction.floor + roof_location, roof_thickness = _map_roof(roof) return SapBuildingPart( identifier="main", construction_age_band=_extract_age_band(main.age_range), @@ -1431,13 +1501,21 @@ def _map_main_building_part( party_wall_construction=main.party_wall_construction_type, sap_floor_dimensions=_map_floor_dimensions(measurements.main_building.floors), wall_thickness_mm=main.wall_thickness_mm, + roof_insulation_location=roof_location, + roof_insulation_thickness=roof_thickness, + floor_type=floor.floor_type, + floor_construction_type=floor.floor_construction, + floor_insulation_type_str=floor.floor_insulation_type, + floor_u_value_known=floor.floor_u_value_known, ) def _map_extension_building_part( ext_c: ExtensionConstruction, ext_m: ExtensionMeasurements, + roof: Optional[ExtensionRoofSpace], ) -> SapBuildingPart: + roof_location, roof_thickness = _map_roof(roof) return SapBuildingPart( identifier=f"extension_{ext_c.id}", construction_age_band=_extract_age_band(ext_c.age_range), @@ -1447,6 +1525,8 @@ def _map_extension_building_part( party_wall_construction=ext_c.party_wall_construction_type, sap_floor_dimensions=_map_floor_dimensions(ext_m.floors), wall_thickness_mm=ext_c.wall_thickness_mm, + roof_insulation_location=roof_location, + roof_insulation_thickness=roof_thickness, ) @@ -1467,7 +1547,7 @@ def _map_sap_window(window: Window) -> SapWindow: def _map_sap_heating( - heating: HeatingAndHotWater, ventilation: Ventilation + heating: HeatingAndHotWater, ventilation: Ventilation, water_use: WaterUse ) -> SapHeating: main = heating.main_heating secondary = heating.secondary_heating @@ -1479,18 +1559,61 @@ def _map_sap_heating( None if secondary.secondary_fuel == "No Secondary Heating" else None ) + shower_outlets = ( + ShowerOutlets( + shower_outlet=ShowerOutlet( + shower_outlet_type=water_use.showers[0].outlet_type, + ) + ) + if water_use.showers + else None + ) + + _ELECTRIC_SYSTEM_TYPES = {"electric storage heaters", "electric underfloor heating"} + _raw_fuel = main.fuel.split(", ")[0] if main.fuel else "" + fuel_type = ( + _raw_fuel + if _raw_fuel + else ("Electricity" if main.system_type.lower() in _ELECTRIC_SYSTEM_TYPES else _raw_fuel) + ) + return SapHeating( instantaneous_wwhrs=InstantaneousWwhrs(), main_heating_details=[ MainHeatingDetail( has_fghrs=main.flue_gas_heat_recovery_system, - main_fuel_type=main.fuel, + main_fuel_type=fuel_type, heat_emitter_type=main.emitter, emitter_temperature=main.emitter_temperature, fan_flue_present=main.fan_assist, main_heating_control=main.controls, + condensing=main.condensing, + weather_compensator=main.weather_compensator, + central_heating_pump_age_str=main.central_heating_pump_age, ) ], has_fixed_air_conditioning=ventilation.has_fixed_air_conditioning, secondary_fuel_type=secondary_fuel_type, + secondary_heating_type=heating.secondary_heating.secondary_system, + shower_outlets=shower_outlets, + cylinder_size=heating.water_heating.cylinder_size if heating.water_heating.cylinder_size != "No Cylinder" else None, + cylinder_insulation_type=heating.water_heating.insulation_type, + cylinder_insulation_thickness_mm=heating.water_heating.insulation_thickness_mm, + immersion_heating_type=heating.water_heating.immersion_type, + ) + + +def _map_sap_ventilation(ventilation: Ventilation) -> SapVentilation: + return SapVentilation( + ventilation_type=ventilation.ventilation_type, + draught_lobby=ventilation.draught_lobby, + pressure_test=ventilation.pressure_test, + open_flues_count=ventilation.number_of_open_flues, + closed_flues_count=ventilation.number_of_closed_flues, + boiler_flues_count=ventilation.number_of_boiler_flues, + other_flues_count=ventilation.number_of_other_flues, + extract_fans_count=ventilation.number_of_extract_fans, + passive_vents_count=ventilation.number_of_passive_vents, + flueless_gas_fires_count=ventilation.number_of_flueless_gas_fires, + ventilation_in_pcdf_database=ventilation.ventilation_in_pcdf_database, ) diff --git a/datatypes/epc/domain/tests/test_from_site_notes.py b/datatypes/epc/domain/tests/test_from_site_notes.py index 47327ff7..ed4bf1ae 100644 --- a/datatypes/epc/domain/tests/test_from_site_notes.py +++ b/datatypes/epc/domain/tests/test_from_site_notes.py @@ -13,7 +13,10 @@ from datatypes.epc.domain.epc_property_data import ( SapEnergySource, SapFloorDimension, SapHeating, + SapVentilation, SapWindow, + ShowerOutlet, + ShowerOutlets, ) from datatypes.epc.domain.mapper import EpcPropertyDataMapper from datatypes.epc.schema.tests.helpers import from_dict @@ -330,14 +333,14 @@ class TestFromSiteNotesExample1: def test_uprn_absent(self, result: EpcPropertyData) -> None: assert result.uprn is None - def test_address_absent(self, result: EpcPropertyData) -> None: - assert result.address_line_1 is None + def test_address_line_1(self, result: EpcPropertyData) -> None: + assert result.address_line_1 == "1, Test Street" - def test_postcode_absent(self, result: EpcPropertyData) -> None: - assert result.postcode is None + def test_postcode(self, result: EpcPropertyData) -> None: + assert result.postcode == "TE1 1ST" - def test_post_town_absent(self, result: EpcPropertyData) -> None: - assert result.post_town is None + def test_post_town(self, result: EpcPropertyData) -> None: + assert result.post_town == "Test Town" def test_status_absent(self, result: EpcPropertyData) -> None: assert result.status is None @@ -352,9 +355,9 @@ class TestFromSiteNotesExample1: sap_version=None, dwelling_type="Mid-terrace house", uprn=None, - address_line_1=None, - postcode=None, - post_town=None, + address_line_1="1, Test Street", + postcode="TE1 1ST", + post_town="Test Town", inspection_date=date(2026, 3, 31), status=None, tenure="Rented Social", @@ -379,9 +382,18 @@ class TestFromSiteNotesExample1: emitter_temperature="Unknown", fan_flue_present=True, main_heating_control="Programmer, room thermostat and TRVs", + condensing=True, + weather_compensator=False, + central_heating_pump_age_str="Unknown", ) ], has_fixed_air_conditioning=False, + cylinder_size="Normal (90-130 litres)", + cylinder_insulation_type="Factory fitted", + cylinder_insulation_thickness_mm=12, + shower_outlets=ShowerOutlets( + shower_outlet=ShowerOutlet(shower_outlet_type="Non-Electric Shower"), + ), ), # Windows sap_windows=[ @@ -464,15 +476,23 @@ class TestFromSiteNotesExample1: total_floor_area_m2=24.78, party_wall_length_m=6.15, heat_loss_perimeter_m=14.21, + floor=1, ), SapFloorDimension( room_height_m=2.35, total_floor_area_m2=24.78, party_wall_length_m=6.15, heat_loss_perimeter_m=14.21, + floor=0, ), ], wall_thickness_mm=280, + roof_insulation_location="Joists", + roof_insulation_thickness=100, + floor_type="Ground Floor", + floor_construction_type="Suspended, not timber", + floor_insulation_type_str="As Built", + floor_u_value_known=False, ) ], solar_water_heating=False, @@ -495,5 +515,182 @@ class TestFromSiteNotesExample1: has_conservatory=False, blocked_chimneys_count=0, draughtproofed_door_count=2, + report_reference="49D422A9-0779-44DD-9665-464D35DFF1A8", + number_of_storeys=2, + any_unheated_rooms=True, + waste_water_heat_recovery="None", + hydro=False, + photovoltaic_array=False, + sap_ventilation=SapVentilation( + ventilation_type="Natural", + open_flues_count=0, + closed_flues_count=0, + boiler_flues_count=0, + other_flues_count=0, + extract_fans_count=2, + passive_vents_count=0, + flueless_gas_fires_count=0, + pressure_test="No test", + draught_lobby=False, + ), ) assert result == expected + + +class TestFromSiteNotesVentilation: + """ + Fixture: pashub_rdsap_site_notes_example1.json + Ventilation: Natural, 2 extract fans, no flues, no test, no draught lobby. + """ + + @pytest.fixture + def survey(self) -> PasHubRdSapSiteNotes: + return from_dict( + PasHubRdSapSiteNotes, load("pashub_rdsap_site_notes_example1.json") + ) + + @pytest.fixture + def result(self, survey: PasHubRdSapSiteNotes) -> EpcPropertyData: + return EpcPropertyDataMapper.from_site_notes(survey) + + def test_sap_ventilation_present(self, result: EpcPropertyData) -> None: + assert result.sap_ventilation is not None + + def test_ventilation_type(self, result: EpcPropertyData) -> None: + # ventilation.ventilation_type: "Natural" + assert result.sap_ventilation.ventilation_type == "Natural" + + def test_open_flues_count(self, result: EpcPropertyData) -> None: + # ventilation.number_of_open_flues: 0 + assert result.sap_ventilation.open_flues_count == 0 + + def test_closed_flues_count(self, result: EpcPropertyData) -> None: + # ventilation.number_of_closed_flues: 0 + assert result.sap_ventilation.closed_flues_count == 0 + + def test_boiler_flues_count(self, result: EpcPropertyData) -> None: + # ventilation.number_of_boiler_flues: 0 + assert result.sap_ventilation.boiler_flues_count == 0 + + def test_other_flues_count(self, result: EpcPropertyData) -> None: + # ventilation.number_of_other_flues: 0 + assert result.sap_ventilation.other_flues_count == 0 + + def test_extract_fans_count(self, result: EpcPropertyData) -> None: + # ventilation.number_of_extract_fans: 2 + assert result.sap_ventilation.extract_fans_count == 2 + + def test_passive_vents_count(self, result: EpcPropertyData) -> None: + # ventilation.number_of_passive_vents: 0 + assert result.sap_ventilation.passive_vents_count == 0 + + def test_flueless_gas_fires_count(self, result: EpcPropertyData) -> None: + # ventilation.number_of_flueless_gas_fires: 0 + assert result.sap_ventilation.flueless_gas_fires_count == 0 + + def test_pressure_test(self, result: EpcPropertyData) -> None: + # ventilation.pressure_test: "No test" + assert result.sap_ventilation.pressure_test == "No test" + + def test_draught_lobby(self, result: EpcPropertyData) -> None: + # ventilation.draught_lobby: false + assert result.sap_ventilation.draught_lobby is False + + +class TestFromSiteNotesFloorConstruction: + """ + Fixture: pashub_rdsap_site_notes_example1.json + Floor: Suspended not timber, As Built insulation, Ground Floor type. + """ + + @pytest.fixture + def survey(self) -> PasHubRdSapSiteNotes: + return from_dict( + PasHubRdSapSiteNotes, load("pashub_rdsap_site_notes_example1.json") + ) + + @pytest.fixture + def result(self, survey: PasHubRdSapSiteNotes) -> EpcPropertyData: + return EpcPropertyDataMapper.from_site_notes(survey) + + def test_floor_type(self, result: EpcPropertyData) -> None: + # building_construction.floor.floor_type: "Ground Floor" + assert result.sap_building_parts[0].floor_type == "Ground Floor" + + def test_floor_construction_type(self, result: EpcPropertyData) -> None: + # building_construction.floor.floor_construction: "Suspended, not timber" + assert result.sap_building_parts[0].floor_construction_type == "Suspended, not timber" + + def test_floor_insulation_type_str(self, result: EpcPropertyData) -> None: + # building_construction.floor.floor_insulation_type: "As Built" + assert result.sap_building_parts[0].floor_insulation_type_str == "As Built" + + def test_floor_u_value_known(self, result: EpcPropertyData) -> None: + # building_construction.floor.floor_u_value_known: false + assert result.sap_building_parts[0].floor_u_value_known is False + + +class TestFromSiteNotesHeatingBoiler: + """ + Fixture: pashub_rdsap_site_notes_example1.json + Boiler: Vaillant ecoFIT sustain, condensing, no weather compensator, pump age unknown. + """ + + @pytest.fixture + def survey(self) -> PasHubRdSapSiteNotes: + return from_dict( + PasHubRdSapSiteNotes, load("pashub_rdsap_site_notes_example1.json") + ) + + @pytest.fixture + def result(self, survey: PasHubRdSapSiteNotes) -> EpcPropertyData: + return EpcPropertyDataMapper.from_site_notes(survey) + + def test_condensing(self, result: EpcPropertyData) -> None: + # heating_and_hot_water.main_heating.condensing: true + assert result.sap_heating.main_heating_details[0].condensing is True + + def test_weather_compensator(self, result: EpcPropertyData) -> None: + # heating_and_hot_water.main_heating.weather_compensator: false + assert result.sap_heating.main_heating_details[0].weather_compensator is False + + def test_central_heating_pump_age_str(self, result: EpcPropertyData) -> None: + # heating_and_hot_water.main_heating.central_heating_pump_age: "Unknown" + assert result.sap_heating.main_heating_details[0].central_heating_pump_age_str == "Unknown" + + +class TestFromSiteNotesMiscTopLevel: + """ + Fixture: pashub_rdsap_site_notes_example1.json + Misc fields: 2 storeys, unheated rooms present, no hydro, no PV array, no WWHR. + """ + + @pytest.fixture + def survey(self) -> PasHubRdSapSiteNotes: + return from_dict( + PasHubRdSapSiteNotes, load("pashub_rdsap_site_notes_example1.json") + ) + + @pytest.fixture + def result(self, survey: PasHubRdSapSiteNotes) -> EpcPropertyData: + return EpcPropertyDataMapper.from_site_notes(survey) + + def test_number_of_storeys(self, result: EpcPropertyData) -> None: + # general.number_of_storeys: 2 + assert result.number_of_storeys == 2 + + def test_any_unheated_rooms(self, result: EpcPropertyData) -> None: + # room_count_elements.any_unheated_rooms: true + assert result.any_unheated_rooms is True + + def test_waste_water_heat_recovery(self, result: EpcPropertyData) -> None: + # room_count_elements.waste_water_heat_recovery: "None" + assert result.waste_water_heat_recovery == "None" + + def test_hydro(self, result: EpcPropertyData) -> None: + # renewables.hydro: false + assert result.hydro is False + + def test_photovoltaic_array(self, result: EpcPropertyData) -> None: + # renewables.photovoltaic_array: false + assert result.photovoltaic_array is False diff --git a/datatypes/epc/schema/tests/helpers.py b/datatypes/epc/schema/tests/helpers.py index 677bd8b7..22f132d2 100644 --- a/datatypes/epc/schema/tests/helpers.py +++ b/datatypes/epc/schema/tests/helpers.py @@ -1,5 +1,6 @@ import dataclasses import typing +from datetime import date from typing import Any, Dict, Type, TypeVar T = TypeVar("T") @@ -70,4 +71,7 @@ def _coerce(value: Any, hint: Any) -> Any: if dataclasses.is_dataclass(hint) and isinstance(value, dict): return _from_dict_impl(hint, value) + if hint is date and isinstance(value, str): + return date.fromisoformat(value) + return value diff --git a/datatypes/epc/surveys/pashub_rdsap_site_notes.py b/datatypes/epc/surveys/pashub_rdsap_site_notes.py index c5b3dbe4..73a58aa6 100644 --- a/datatypes/epc/surveys/pashub_rdsap_site_notes.py +++ b/datatypes/epc/surveys/pashub_rdsap_site_notes.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import date from typing import List, Optional @@ -8,7 +9,7 @@ class InspectionMetadata: email_address: str report_reference: str created_on: str - date_of_inspection: str + date_of_inspection: date property_address: str property_photo: Optional[bool] = None @@ -17,7 +18,7 @@ class InspectionMetadata: class General: epc_checked_before_assessment: bool epc_exists_at_point_of_assessment: bool - inspection_date: str + inspection_date: date transaction_type: str tenure: str property_type: str @@ -178,6 +179,7 @@ class MainHeating: @dataclass class SecondaryHeating: secondary_fuel: str + secondary_system: Optional[str] = None @dataclass @@ -189,6 +191,7 @@ class WaterHeating: insulation_type: Optional[str] = None insulation_thickness_mm: Optional[int] = None has_thermostat: Optional[bool] = None + immersion_type: Optional[str] = None @dataclass @@ -226,6 +229,8 @@ class Renewables: photovoltaic_array: bool number_of_pv_batteries: int hydro: bool + pv_connection: Optional[str] = None + percent_roof_covered_pv: Optional[int] = None @dataclass diff --git a/datatypes/epc/surveys/tests/fixtures/pashub_rdsap_site_notes_example1.json b/datatypes/epc/surveys/tests/fixtures/pashub_rdsap_site_notes_example1.json index b5772e24..f19bea20 100644 --- a/datatypes/epc/surveys/tests/fixtures/pashub_rdsap_site_notes_example1.json +++ b/datatypes/epc/surveys/tests/fixtures/pashub_rdsap_site_notes_example1.json @@ -5,7 +5,7 @@ "report_reference": "49D422A9-0779-44DD-9665-464D35DFF1A8", "created_on": "2026-03-31", "date_of_inspection": "2026-03-31", - "property_address": "test" + "property_address": "1, Test Street, Test Town, Test County, TE1 1ST" }, "general": { "epc_checked_before_assessment": true, @@ -229,4 +229,4 @@ "hard_to_treat_cavity_high_exposure": false, "hard_to_treat_cavity_narrow_cavities": false } -} +} \ No newline at end of file diff --git a/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py b/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py index d89f989d..a1a126a2 100644 --- a/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py +++ b/datatypes/epc/surveys/tests/test_pashub_rdsap_site_notes_loading.py @@ -1,5 +1,6 @@ import json import os +from datetime import date from typing import Any, Dict import pytest @@ -224,7 +225,7 @@ class TestExample2: self, survey: PasHubRdSapSiteNotes ) -> None: assert survey.inspection_metadata.created_on == "2025-11-10" - assert survey.inspection_metadata.date_of_inspection == "2025-09-25" + assert survey.inspection_metadata.date_of_inspection == date(2025, 9, 25) # --- general --- diff --git a/pytest.ini b/pytest.ini index 8f8ceeef..33231c61 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,6 +3,6 @@ pythonpath = . log_cli = true log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/documents_parser/tests markers = integration: mark a test as an integration test diff --git a/test.requirements.txt b/test.requirements.txt index 936e2f7d..6c95f993 100644 --- a/test.requirements.txt +++ b/test.requirements.txt @@ -6,4 +6,5 @@ dotenv psycopg[binary] pytest-postgresql hubspot-api-client -fuzzywuzzy \ No newline at end of file +fuzzywuzzy +pymupdf \ No newline at end of file