Include inspection metadata in output

This commit is contained in:
Daniel Roth 2026-04-20 09:04:54 +00:00
parent 968f025bc3
commit 0a8b9e0767
10 changed files with 330 additions and 14 deletions

View file

@ -3,6 +3,7 @@ from typing import List, Optional
from datatypes.epc.surveys.pashub_rdsap_site_notes import (
BuildingConstruction,
InspectionMetadata,
BuildingMeasurements,
Conservatories,
CustomerResponse,
@ -81,9 +82,41 @@ class PasHubRdSapSiteNotesExtractor:
# --- public extract methods ---
def extract_inspection_metadata(self) -> InspectionMetadata:
try:
addr_start = self.text_list.index("Property Address:") + 1
addr_end = self.text_list.index("Property Photo", addr_start)
property_address = ", ".join(
t.rstrip(",") for t in self.text_list[addr_start:addr_end]
)
except ValueError:
property_address = ""
created_on_raw = self._get("Created On:")
created_on = (
datetime.strptime(created_on_raw, "%d %B %Y").strftime("%Y-%m-%d")
if created_on_raw
else ""
)
date_of_inspection_raw = self._get("Date of Inspection:")
if not date_of_inspection_raw:
raise ValueError("Date of Inspection not found in document")
date_of_inspection = datetime.strptime(date_of_inspection_raw, "%d %B %Y").date()
return InspectionMetadata(
inspection_surveyor=self._get("Inspection Surveyor:") or "",
email_address=self._get("E-Mail Address:") or "",
report_reference=self._get("Report Reference:") or "",
created_on=created_on,
date_of_inspection=date_of_inspection,
property_address=property_address,
property_photo="Property Photo" in self.text_list,
)
def extract(self) -> PasHubRdSapSiteNotes:
return PasHubRdSapSiteNotes(
inspection_metadata=None,
inspection_metadata=self.extract_inspection_metadata(),
general=self.extract_general(),
building_construction=self.extract_building_construction(),
building_measurements=self.extract_building_measurements(),

View file

@ -1,4 +1,24 @@
[
"SMART EPC: Record of",
"Inspection & Site Notes",
"Inspection Surveyor:",
"Benjamin Burke",
"E-Mail Address:",
"ben@mbsolutionsgroup.co.uk",
"Report Reference:",
"6EA2A86D-94CE-4792-8D49-AB495C744EDD",
"Created On:",
"10 November 2025",
"Date of Inspection:",
"25 September 2025",
"Property Address:",
"40,",
"Abbey Place,",
"Crewe,",
"Cheshire,",
"CW1 4JR",
"Property Photo",
"Page 1",
"",
"Photo of electricity meter:",
"Photo of electricity meter:",

View file

@ -0,0 +1,236 @@
import os
from datetime import date
import pytest
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from backend.documents_parser.pdf import pdf_to_text_list
from datatypes.epc.domain.epc_property_data import (
EpcPropertyData,
InstantaneousWwhrs,
MainHeatingDetail,
SapBuildingPart,
SapEnergySource,
SapFloorDimension,
SapHeating,
SapWindow,
)
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
PDF_PATH = os.path.join(os.path.dirname(__file__), "fixtures", "ExampleSiteNotes.pdf")
class TestPdfToEpcPropertyData:
@pytest.fixture
def result(self) -> EpcPropertyData:
with open(PDF_PATH, "rb") as f:
pdf_bytes = f.read()
site_notes = PasHubRdSapSiteNotesExtractor(pdf_to_text_list(pdf_bytes)).extract()
return EpcPropertyDataMapper.from_site_notes(site_notes)
def test_full_epc_property_data(self, result: EpcPropertyData) -> None:
assert result == EpcPropertyData(
dwelling_type="Mid-terrace house",
inspection_date=date(2025, 9, 25),
tenure="Rented Social",
transaction_type="Grant-Scheme (ECO, RHI, etc.)",
roofs=[],
walls=[],
floors=[],
main_heating=[],
door_count=2,
sap_heating=SapHeating(
instantaneous_wwhrs=InstantaneousWwhrs(),
main_heating_details=[
MainHeatingDetail(
has_fghrs=False,
main_fuel_type="Mains gas",
heat_emitter_type="Radiators",
emitter_temperature="Unknown",
main_heating_control="Programmer, room thermostat and TRVs",
fan_flue_present=True,
)
],
has_fixed_air_conditioning=False,
),
sap_windows=[
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=2.3,
window_height=1.2,
draught_proofed=True,
window_location="Main Building",
window_wall_type="External wall",
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=1.0,
window_height=1.2,
draught_proofed=True,
window_location="Main Building",
window_wall_type="External wall",
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=1.0,
window_height=0.9,
draught_proofed=True,
window_location="Main Building",
window_wall_type="External wall",
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=1.0,
window_height=0.9,
draught_proofed=True,
window_location="Extension 1",
window_wall_type="External wall",
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=1.7,
window_height=0.9,
draught_proofed=True,
window_location="Extension 1",
window_wall_type="External wall",
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=2.3,
window_height=0.9,
draught_proofed=True,
window_location="Extension 1",
window_wall_type="External wall",
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North West",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=1.2,
window_height=1.0,
draught_proofed=True,
window_location="Extension 1",
window_wall_type="External wall",
permanent_shutters_present=False,
),
SapWindow(
pvc_frame="Wooden or PVC",
glazing_gap="16 mm or more",
orientation="North East",
window_type="Window",
glazing_type="Double glazing, Unknown install date",
window_width=1.0,
window_height=0.9,
draught_proofed=True,
window_location="Extension 1",
window_wall_type="External wall",
permanent_shutters_present=False,
),
],
sap_energy_source=SapEnergySource(
mains_gas=True,
meter_type="Single",
pv_battery_count=0,
wind_turbines_count=0,
gas_smart_meter_present=True,
is_dwelling_export_capable=True,
wind_turbines_terrain_type="Suburban",
electricity_smart_meter_present=True,
),
sap_building_parts=[
SapBuildingPart(
identifier="main",
construction_age_band="1950-1966",
wall_construction="Cavity",
wall_insulation_type="Filled Cavity",
wall_thickness_measured=True,
party_wall_construction="Cavity Masonry, Filled",
sap_floor_dimensions=[
SapFloorDimension(
room_height_m=2.19,
total_floor_area_m2=35.68,
party_wall_length_m=10.62,
heat_loss_perimeter_m=13.44,
),
SapFloorDimension(
room_height_m=2.17,
total_floor_area_m2=35.68,
party_wall_length_m=10.62,
heat_loss_perimeter_m=11.0,
),
],
wall_thickness_mm=310,
),
SapBuildingPart(
identifier="extension_1",
construction_age_band="2003-2006",
wall_construction="Cavity",
wall_insulation_type="As built",
wall_thickness_measured=True,
party_wall_construction="Cavity Masonry, Filled",
sap_floor_dimensions=[
SapFloorDimension(
room_height_m=2.0,
total_floor_area_m2=3.8,
party_wall_length_m=0.0,
heat_loss_perimeter_m=5.7,
),
],
wall_thickness_mm=310,
),
],
solar_water_heating=False,
has_hot_water_cylinder=False,
has_fixed_air_conditioning=False,
wet_rooms_count=0,
extensions_count=1,
heated_rooms_count=0,
open_chimneys_count=0,
habitable_rooms_count=3,
insulated_door_count=0,
cfl_fixed_lighting_bulbs_count=1,
led_fixed_lighting_bulbs_count=0,
incandescent_fixed_lighting_bulbs_count=4,
total_floor_area_m2=75.16,
built_form="Mid-terrace",
property_type="House",
has_conservatory=False,
blocked_chimneys_count=0,
draughtproofed_door_count=2,
address_line_1="40, Abbey Place",
post_town="Crewe",
postcode="CW1 4JR",
report_reference="6EA2A86D-94CE-4792-8D49-AB495C744EDD",
)

View file

@ -17,6 +17,7 @@ from datatypes.epc.surveys.pashub_rdsap_site_notes import (
FloorMeasurement,
General,
HeatingAndHotWater,
InspectionMetadata,
MainBuildingConstruction,
MainBuildingMeasurements,
MainHeating,
@ -40,6 +41,20 @@ def load_text_fixture() -> list[str]:
return json.load(f)
class TestInspectionMetadata:
def test_full_inspection_metadata(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract_inspection_metadata()
assert result == InspectionMetadata(
inspection_surveyor="Benjamin Burke",
email_address="ben@mbsolutionsgroup.co.uk",
report_reference="6EA2A86D-94CE-4792-8D49-AB495C744EDD",
created_on="2025-11-10",
date_of_inspection=date(2025, 9, 25),
property_address="40, Abbey Place, Crewe, Cheshire, CW1 4JR",
property_photo=True,
)
class TestGeneral:
@pytest.fixture
def general(self) -> General:
@ -530,7 +545,7 @@ class TestCustomerResponse:
class TestExtract:
def test_full_extract(self) -> None:
result = PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract()
assert result.inspection_metadata is None
assert result.inspection_metadata.inspection_surveyor == "Benjamin Burke"
assert result.general.inspection_date == date(2025, 9, 25)
assert result.building_construction.main_building.wall_thickness_mm == 310
assert result.building_measurements.main_building.floors[0].area_m2 == 35.68

View file

@ -280,6 +280,7 @@ class EpcPropertyData:
schema_type: Optional[str] = None
schema_versions_original: Optional[str] = None
report_type: Optional[str] = None # TODO: make enum?
report_reference: Optional[str] = None
uprn_source: Optional[str] = None
address_line_2: Optional[str] = None
region_code: Optional[str] = None # TODO: make enum?

View file

@ -78,6 +78,12 @@ class EpcPropertyDataMapper:
@staticmethod
def from_site_notes(survey: PasHubRdSapSiteNotes) -> EpcPropertyData:
general = survey.general
metadata = survey.inspection_metadata
address_parts = [p.strip() for p in metadata.property_address.split(", ")]
postcode = address_parts[-1] if len(address_parts) >= 1 else None
post_town = address_parts[-3] if len(address_parts) >= 4 else (address_parts[-2] if len(address_parts) >= 3 else None)
address_line_1 = ", ".join(address_parts[:-3]) if len(address_parts) >= 4 else ", ".join(address_parts[:-2]) if len(address_parts) >= 3 else address_parts[0] if address_parts else None
construction = survey.building_construction
measurements = survey.building_measurements
heating = survey.heating_and_hot_water
@ -145,6 +151,10 @@ class EpcPropertyDataMapper:
has_conservatory=survey.conservatories.has_conservatory,
blocked_chimneys_count=room_counts.number_of_blocked_chimneys,
draughtproofed_door_count=room_counts.number_of_draughtproofed_external_doors,
address_line_1=address_line_1,
post_town=post_town,
postcode=postcode,
report_reference=metadata.report_reference,
)
@staticmethod

View file

@ -330,14 +330,14 @@ class TestFromSiteNotesExample1:
def test_uprn_absent(self, result: EpcPropertyData) -> None:
assert result.uprn is None
def test_address_absent(self, result: EpcPropertyData) -> None:
assert result.address_line_1 is None
def test_address_line_1(self, result: EpcPropertyData) -> None:
assert result.address_line_1 == "1, Test Street"
def test_postcode_absent(self, result: EpcPropertyData) -> None:
assert result.postcode is None
def test_postcode(self, result: EpcPropertyData) -> None:
assert result.postcode == "TE1 1ST"
def test_post_town_absent(self, result: EpcPropertyData) -> None:
assert result.post_town is None
def test_post_town(self, result: EpcPropertyData) -> None:
assert result.post_town == "Test Town"
def test_status_absent(self, result: EpcPropertyData) -> None:
assert result.status is None
@ -352,9 +352,9 @@ class TestFromSiteNotesExample1:
sap_version=None,
dwelling_type="Mid-terrace house",
uprn=None,
address_line_1=None,
postcode=None,
post_town=None,
address_line_1="1, Test Street",
postcode="TE1 1ST",
post_town="Test Town",
inspection_date=date(2026, 3, 31),
status=None,
tenure="Rented Social",
@ -495,5 +495,6 @@ class TestFromSiteNotesExample1:
has_conservatory=False,
blocked_chimneys_count=0,
draughtproofed_door_count=2,
report_reference="49D422A9-0779-44DD-9665-464D35DFF1A8",
)
assert result == expected

View file

@ -276,7 +276,7 @@ class SurveyAddendum:
@dataclass
class PasHubRdSapSiteNotes:
inspection_metadata: Optional[InspectionMetadata]
inspection_metadata: InspectionMetadata
general: General
building_construction: BuildingConstruction
building_measurements: BuildingMeasurements

View file

@ -5,7 +5,7 @@
"report_reference": "49D422A9-0779-44DD-9665-464D35DFF1A8",
"created_on": "2026-03-31",
"date_of_inspection": "2026-03-31",
"property_address": "test"
"property_address": "1, Test Street, Test Town, Test County, TE1 1ST"
},
"general": {
"epc_checked_before_assessment": true,
@ -229,4 +229,4 @@
"hard_to_treat_cavity_high_exposure": false,
"hard_to_treat_cavity_narrow_cavities": false
}
}
}