mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Include inspection metadata in output
This commit is contained in:
parent
968f025bc3
commit
0a8b9e0767
10 changed files with 330 additions and 14 deletions
|
|
@ -3,6 +3,7 @@ from typing import List, Optional
|
|||
|
||||
from datatypes.epc.surveys.pashub_rdsap_site_notes import (
|
||||
BuildingConstruction,
|
||||
InspectionMetadata,
|
||||
BuildingMeasurements,
|
||||
Conservatories,
|
||||
CustomerResponse,
|
||||
|
|
@ -81,9 +82,41 @@ class PasHubRdSapSiteNotesExtractor:
|
|||
|
||||
# --- public extract methods ---
|
||||
|
||||
def extract_inspection_metadata(self) -> InspectionMetadata:
|
||||
try:
|
||||
addr_start = self.text_list.index("Property Address:") + 1
|
||||
addr_end = self.text_list.index("Property Photo", addr_start)
|
||||
property_address = ", ".join(
|
||||
t.rstrip(",") for t in self.text_list[addr_start:addr_end]
|
||||
)
|
||||
except ValueError:
|
||||
property_address = ""
|
||||
|
||||
created_on_raw = self._get("Created On:")
|
||||
created_on = (
|
||||
datetime.strptime(created_on_raw, "%d %B %Y").strftime("%Y-%m-%d")
|
||||
if created_on_raw
|
||||
else ""
|
||||
)
|
||||
|
||||
date_of_inspection_raw = self._get("Date of Inspection:")
|
||||
if not date_of_inspection_raw:
|
||||
raise ValueError("Date of Inspection not found in document")
|
||||
date_of_inspection = datetime.strptime(date_of_inspection_raw, "%d %B %Y").date()
|
||||
|
||||
return InspectionMetadata(
|
||||
inspection_surveyor=self._get("Inspection Surveyor:") or "",
|
||||
email_address=self._get("E-Mail Address:") or "",
|
||||
report_reference=self._get("Report Reference:") or "",
|
||||
created_on=created_on,
|
||||
date_of_inspection=date_of_inspection,
|
||||
property_address=property_address,
|
||||
property_photo="Property Photo" in self.text_list,
|
||||
)
|
||||
|
||||
def extract(self) -> PasHubRdSapSiteNotes:
|
||||
return PasHubRdSapSiteNotes(
|
||||
inspection_metadata=None,
|
||||
inspection_metadata=self.extract_inspection_metadata(),
|
||||
general=self.extract_general(),
|
||||
building_construction=self.extract_building_construction(),
|
||||
building_measurements=self.extract_building_measurements(),
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,4 +1,24 @@
|
|||
[
|
||||
"SMART EPC: Record of",
|
||||
"Inspection & Site Notes",
|
||||
"Inspection Surveyor:",
|
||||
"Benjamin Burke",
|
||||
"E-Mail Address:",
|
||||
"ben@mbsolutionsgroup.co.uk",
|
||||
"Report Reference:",
|
||||
"6EA2A86D-94CE-4792-8D49-AB495C744EDD",
|
||||
"Created On:",
|
||||
"10 November 2025",
|
||||
"Date of Inspection:",
|
||||
"25 September 2025",
|
||||
"Property Address:",
|
||||
"40,",
|
||||
"Abbey Place,",
|
||||
"Crewe,",
|
||||
"Cheshire,",
|
||||
"CW1 4JR",
|
||||
"Property Photo",
|
||||
"Page 1",
|
||||
"",
|
||||
"Photo of electricity meter:",
|
||||
"Photo of electricity meter:",
|
||||
|
|
|
|||
236
backend/documents_parser/tests/test_end_to_end.py
Normal file
236
backend/documents_parser/tests/test_end_to_end.py
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
import os
|
||||
from datetime import date
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
|
||||
from backend.documents_parser.pdf import pdf_to_text_list
|
||||
from datatypes.epc.domain.epc_property_data import (
|
||||
EpcPropertyData,
|
||||
InstantaneousWwhrs,
|
||||
MainHeatingDetail,
|
||||
SapBuildingPart,
|
||||
SapEnergySource,
|
||||
SapFloorDimension,
|
||||
SapHeating,
|
||||
SapWindow,
|
||||
)
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
|
||||
|
||||
|
||||
class TestPdfToEpcPropertyData:
|
||||
@pytest.fixture
|
||||
def result(self) -> EpcPropertyData:
|
||||
with open(PDF_PATH, "rb") as f:
|
||||
pdf_bytes = f.read()
|
||||
site_notes = PasHubRdSapSiteNotesExtractor(pdf_to_text_list(pdf_bytes)).extract()
|
||||
return EpcPropertyDataMapper.from_site_notes(site_notes)
|
||||
|
||||
def test_full_epc_property_data(self, result: EpcPropertyData) -> None:
|
||||
assert result == EpcPropertyData(
|
||||
dwelling_type="Mid-terrace house",
|
||||
inspection_date=date(2025, 9, 25),
|
||||
tenure="Rented Social",
|
||||
transaction_type="Grant-Scheme (ECO, RHI, etc.)",
|
||||
roofs=[],
|
||||
walls=[],
|
||||
floors=[],
|
||||
main_heating=[],
|
||||
door_count=2,
|
||||
sap_heating=SapHeating(
|
||||
instantaneous_wwhrs=InstantaneousWwhrs(),
|
||||
main_heating_details=[
|
||||
MainHeatingDetail(
|
||||
has_fghrs=False,
|
||||
main_fuel_type="Mains gas",
|
||||
heat_emitter_type="Radiators",
|
||||
emitter_temperature="Unknown",
|
||||
main_heating_control="Programmer, room thermostat and TRVs",
|
||||
fan_flue_present=True,
|
||||
)
|
||||
],
|
||||
has_fixed_air_conditioning=False,
|
||||
),
|
||||
sap_windows=[
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North West",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=2.3,
|
||||
window_height=1.2,
|
||||
draught_proofed=True,
|
||||
window_location="Main Building",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North West",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=1.0,
|
||||
window_height=1.2,
|
||||
draught_proofed=True,
|
||||
window_location="Main Building",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North East",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=1.0,
|
||||
window_height=0.9,
|
||||
draught_proofed=True,
|
||||
window_location="Main Building",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=1.0,
|
||||
window_height=0.9,
|
||||
draught_proofed=True,
|
||||
window_location="Extension 1",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North East",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=1.7,
|
||||
window_height=0.9,
|
||||
draught_proofed=True,
|
||||
window_location="Extension 1",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North West",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=2.3,
|
||||
window_height=0.9,
|
||||
draught_proofed=True,
|
||||
window_location="Extension 1",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North West",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=1.2,
|
||||
window_height=1.0,
|
||||
draught_proofed=True,
|
||||
window_location="Extension 1",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
SapWindow(
|
||||
pvc_frame="Wooden or PVC",
|
||||
glazing_gap="16 mm or more",
|
||||
orientation="North East",
|
||||
window_type="Window",
|
||||
glazing_type="Double glazing, Unknown install date",
|
||||
window_width=1.0,
|
||||
window_height=0.9,
|
||||
draught_proofed=True,
|
||||
window_location="Extension 1",
|
||||
window_wall_type="External wall",
|
||||
permanent_shutters_present=False,
|
||||
),
|
||||
],
|
||||
sap_energy_source=SapEnergySource(
|
||||
mains_gas=True,
|
||||
meter_type="Single",
|
||||
pv_battery_count=0,
|
||||
wind_turbines_count=0,
|
||||
gas_smart_meter_present=True,
|
||||
is_dwelling_export_capable=True,
|
||||
wind_turbines_terrain_type="Suburban",
|
||||
electricity_smart_meter_present=True,
|
||||
),
|
||||
sap_building_parts=[
|
||||
SapBuildingPart(
|
||||
identifier="main",
|
||||
construction_age_band="1950-1966",
|
||||
wall_construction="Cavity",
|
||||
wall_insulation_type="Filled Cavity",
|
||||
wall_thickness_measured=True,
|
||||
party_wall_construction="Cavity Masonry, Filled",
|
||||
sap_floor_dimensions=[
|
||||
SapFloorDimension(
|
||||
room_height_m=2.19,
|
||||
total_floor_area_m2=35.68,
|
||||
party_wall_length_m=10.62,
|
||||
heat_loss_perimeter_m=13.44,
|
||||
),
|
||||
SapFloorDimension(
|
||||
room_height_m=2.17,
|
||||
total_floor_area_m2=35.68,
|
||||
party_wall_length_m=10.62,
|
||||
heat_loss_perimeter_m=11.0,
|
||||
),
|
||||
],
|
||||
wall_thickness_mm=310,
|
||||
),
|
||||
SapBuildingPart(
|
||||
identifier="extension_1",
|
||||
construction_age_band="2003-2006",
|
||||
wall_construction="Cavity",
|
||||
wall_insulation_type="As built",
|
||||
wall_thickness_measured=True,
|
||||
party_wall_construction="Cavity Masonry, Filled",
|
||||
sap_floor_dimensions=[
|
||||
SapFloorDimension(
|
||||
room_height_m=2.0,
|
||||
total_floor_area_m2=3.8,
|
||||
party_wall_length_m=0.0,
|
||||
heat_loss_perimeter_m=5.7,
|
||||
),
|
||||
],
|
||||
wall_thickness_mm=310,
|
||||
),
|
||||
],
|
||||
solar_water_heating=False,
|
||||
has_hot_water_cylinder=False,
|
||||
has_fixed_air_conditioning=False,
|
||||
wet_rooms_count=0,
|
||||
extensions_count=1,
|
||||
heated_rooms_count=0,
|
||||
open_chimneys_count=0,
|
||||
habitable_rooms_count=3,
|
||||
insulated_door_count=0,
|
||||
cfl_fixed_lighting_bulbs_count=1,
|
||||
led_fixed_lighting_bulbs_count=0,
|
||||
incandescent_fixed_lighting_bulbs_count=4,
|
||||
total_floor_area_m2=75.16,
|
||||
built_form="Mid-terrace",
|
||||
property_type="House",
|
||||
has_conservatory=False,
|
||||
blocked_chimneys_count=0,
|
||||
draughtproofed_door_count=2,
|
||||
address_line_1="40, Abbey Place",
|
||||
post_town="Crewe",
|
||||
postcode="CW1 4JR",
|
||||
report_reference="6EA2A86D-94CE-4792-8D49-AB495C744EDD",
|
||||
)
|
||||
|
|
@ -17,6 +17,7 @@ from datatypes.epc.surveys.pashub_rdsap_site_notes import (
|
|||
FloorMeasurement,
|
||||
General,
|
||||
HeatingAndHotWater,
|
||||
InspectionMetadata,
|
||||
MainBuildingConstruction,
|
||||
MainBuildingMeasurements,
|
||||
MainHeating,
|
||||
|
|
@ -40,6 +41,20 @@ def load_text_fixture() -> list[str]:
|
|||
return json.load(f)
|
||||
|
||||
|
||||
class TestInspectionMetadata:
|
||||
def test_full_inspection_metadata(self) -> None:
|
||||
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_inspection_metadata()
|
||||
assert result == InspectionMetadata(
|
||||
inspection_surveyor="Benjamin Burke",
|
||||
email_address="ben@mbsolutionsgroup.co.uk",
|
||||
report_reference="6EA2A86D-94CE-4792-8D49-AB495C744EDD",
|
||||
created_on="2025-11-10",
|
||||
date_of_inspection=date(2025, 9, 25),
|
||||
property_address="40, Abbey Place, Crewe, Cheshire, CW1 4JR",
|
||||
property_photo=True,
|
||||
)
|
||||
|
||||
|
||||
class TestGeneral:
|
||||
@pytest.fixture
|
||||
def general(self) -> General:
|
||||
|
|
@ -530,7 +545,7 @@ class TestCustomerResponse:
|
|||
class TestExtract:
|
||||
def test_full_extract(self) -> None:
|
||||
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract()
|
||||
assert result.inspection_metadata is None
|
||||
assert result.inspection_metadata.inspection_surveyor == "Benjamin Burke"
|
||||
assert result.general.inspection_date == date(2025, 9, 25)
|
||||
assert result.building_construction.main_building.wall_thickness_mm == 310
|
||||
assert result.building_measurements.main_building.floors[0].area_m2 == 35.68
|
||||
|
|
|
|||
|
|
@ -280,6 +280,7 @@ class EpcPropertyData:
|
|||
schema_type: Optional[str] = None
|
||||
schema_versions_original: Optional[str] = None
|
||||
report_type: Optional[str] = None # TODO: make enum?
|
||||
report_reference: Optional[str] = None
|
||||
uprn_source: Optional[str] = None
|
||||
address_line_2: Optional[str] = None
|
||||
region_code: Optional[str] = None # TODO: make enum?
|
||||
|
|
|
|||
|
|
@ -78,6 +78,12 @@ class EpcPropertyDataMapper:
|
|||
@staticmethod
|
||||
def from_site_notes(survey: PasHubRdSapSiteNotes) -> EpcPropertyData:
|
||||
general = survey.general
|
||||
metadata = survey.inspection_metadata
|
||||
address_parts = [p.strip() for p in metadata.property_address.split(", ")]
|
||||
postcode = address_parts[-1] if len(address_parts) >= 1 else None
|
||||
post_town = address_parts[-3] if len(address_parts) >= 4 else (address_parts[-2] if len(address_parts) >= 3 else None)
|
||||
address_line_1 = ", ".join(address_parts[:-3]) if len(address_parts) >= 4 else ", ".join(address_parts[:-2]) if len(address_parts) >= 3 else address_parts[0] if address_parts else None
|
||||
|
||||
construction = survey.building_construction
|
||||
measurements = survey.building_measurements
|
||||
heating = survey.heating_and_hot_water
|
||||
|
|
@ -145,6 +151,10 @@ class EpcPropertyDataMapper:
|
|||
has_conservatory=survey.conservatories.has_conservatory,
|
||||
blocked_chimneys_count=room_counts.number_of_blocked_chimneys,
|
||||
draughtproofed_door_count=room_counts.number_of_draughtproofed_external_doors,
|
||||
address_line_1=address_line_1,
|
||||
post_town=post_town,
|
||||
postcode=postcode,
|
||||
report_reference=metadata.report_reference,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -330,14 +330,14 @@ class TestFromSiteNotesExample1:
|
|||
def test_uprn_absent(self, result: EpcPropertyData) -> None:
|
||||
assert result.uprn is None
|
||||
|
||||
def test_address_absent(self, result: EpcPropertyData) -> None:
|
||||
assert result.address_line_1 is None
|
||||
def test_address_line_1(self, result: EpcPropertyData) -> None:
|
||||
assert result.address_line_1 == "1, Test Street"
|
||||
|
||||
def test_postcode_absent(self, result: EpcPropertyData) -> None:
|
||||
assert result.postcode is None
|
||||
def test_postcode(self, result: EpcPropertyData) -> None:
|
||||
assert result.postcode == "TE1 1ST"
|
||||
|
||||
def test_post_town_absent(self, result: EpcPropertyData) -> None:
|
||||
assert result.post_town is None
|
||||
def test_post_town(self, result: EpcPropertyData) -> None:
|
||||
assert result.post_town == "Test Town"
|
||||
|
||||
def test_status_absent(self, result: EpcPropertyData) -> None:
|
||||
assert result.status is None
|
||||
|
|
@ -352,9 +352,9 @@ class TestFromSiteNotesExample1:
|
|||
sap_version=None,
|
||||
dwelling_type="Mid-terrace house",
|
||||
uprn=None,
|
||||
address_line_1=None,
|
||||
postcode=None,
|
||||
post_town=None,
|
||||
address_line_1="1, Test Street",
|
||||
postcode="TE1 1ST",
|
||||
post_town="Test Town",
|
||||
inspection_date=date(2026, 3, 31),
|
||||
status=None,
|
||||
tenure="Rented Social",
|
||||
|
|
@ -495,5 +495,6 @@ class TestFromSiteNotesExample1:
|
|||
has_conservatory=False,
|
||||
blocked_chimneys_count=0,
|
||||
draughtproofed_door_count=2,
|
||||
report_reference="49D422A9-0779-44DD-9665-464D35DFF1A8",
|
||||
)
|
||||
assert result == expected
|
||||
|
|
|
|||
|
|
@ -276,7 +276,7 @@ class SurveyAddendum:
|
|||
|
||||
@dataclass
|
||||
class PasHubRdSapSiteNotes:
|
||||
inspection_metadata: Optional[InspectionMetadata]
|
||||
inspection_metadata: InspectionMetadata
|
||||
general: General
|
||||
building_construction: BuildingConstruction
|
||||
building_measurements: BuildingMeasurements
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
"report_reference": "49D422A9-0779-44DD-9665-464D35DFF1A8",
|
||||
"created_on": "2026-03-31",
|
||||
"date_of_inspection": "2026-03-31",
|
||||
"property_address": "test"
|
||||
"property_address": "1, Test Street, Test Town, Test County, TE1 1ST"
|
||||
},
|
||||
"general": {
|
||||
"epc_checked_before_assessment": true,
|
||||
|
|
@ -229,4 +229,4 @@
|
|||
"hard_to_treat_cavity_high_exposure": false,
|
||||
"hard_to_treat_cavity_narrow_cavities": false
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue