Map to RdSapSiteNotes from site notes JSON 🟥

This commit is contained in:
Daniel Roth 2026-04-16 13:54:03 +00:00
parent 6d210849f4
commit 4f3c7894ae
7 changed files with 790 additions and 1 deletions

View file

View file

@ -0,0 +1,9 @@
from datatypes.epc.surveys.pashub_rdsap_site_notes import PasHubRdSapSiteNotes
class PasHubRdSapSiteNotesExtractor:
def __init__(self, text_list: list[str]) -> None:
self.text_list = text_list
def extract(self) -> PasHubRdSapSiteNotes:
raise NotImplementedError

View file

@ -0,0 +1,101 @@
import json
import os
import time
from typing import Any, List, Mapping
import boto3
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
logger = setup_logger()
BUCKET = "retrofit-energy-assessments-dev"
PDF_S3_KEY = "example/SiteNotesExample.pdf"
PDF_LOCAL_PATH = os.path.join(
os.path.dirname(__file__),
"..",
"..",
"tests",
"test_data",
"SiteNotesExample.pdf",
)
def upload_pdf(local_path: str, bucket: str, key: str) -> None:
logger.info(f"Uploading {local_path} to s3://{bucket}/{key}")
upload_file_to_s3(local_path, bucket, key)
logger.info("Upload complete")
def start_textract_job(bucket: str, key: str) -> str:
client = boto3.client("textract")
response = client.start_document_analysis(
DocumentLocation={"S3Object": {"Bucket": bucket, "Name": key}},
FeatureTypes=["FORMS"],
)
job_id: str = response["JobId"]
logger.info(f"Started Textract job {job_id}")
return job_id
def wait_for_job(job_id: str, poll_interval_seconds: int = 5) -> None:
client = boto3.client("textract")
logger.info(f"Polling Textract job {job_id}...")
while True:
response = client.get_document_analysis(JobId=job_id, MaxResults=1)
status = response["JobStatus"]
logger.info(f"Status: {status}")
if status == "SUCCEEDED":
return
if status == "FAILED":
raise RuntimeError(
f"Textract job {job_id} failed: {response.get('StatusMessage')}"
)
time.sleep(poll_interval_seconds)
def collect_blocks(job_id: str) -> List[Any]:
client = boto3.client("textract")
blocks: List[Any] = []
next_token = None
while True:
kwargs: dict = {"JobId": job_id, "MaxResults": 1000}
if next_token:
kwargs["NextToken"] = next_token
response = client.get_document_analysis(**kwargs)
blocks.extend(response.get("Blocks", []))
next_token = response.get("NextToken")
if not next_token:
break
logger.info(f"Collected {len(blocks)} blocks")
return blocks
def save_blocks(blocks: List[Any], output_path: str) -> None:
with open(output_path, "w") as f:
json.dump(blocks, f, indent=2, default=str)
logger.info(f"Saved blocks to {output_path}")
def handler(event: Mapping[str, Any], context: Any) -> None:
logger.info("Entered handler")
output_path = os.path.join(os.path.dirname(__file__), "..", "textract_blocks.json")
upload_pdf(PDF_LOCAL_PATH, BUCKET, PDF_S3_KEY)
job_id = start_textract_job(BUCKET, PDF_S3_KEY)
wait_for_job(job_id)
blocks = collect_blocks(job_id)
save_blocks(blocks, output_path)
logger.info("Done")
if __name__ == "__main__":
handler({}, None)

View file

@ -0,0 +1,623 @@
[
"",
"Photo of electricity meter:",
"Photo of electricity meter:",
"Photo of electricity meter:",
"Photo of electricity meter:",
"RdSAP Assessment",
"General",
"Confirm you have checked for the existence of an",
"EPC before carrying out another energy assessment.",
"Yes",
"Does an EPC exist at the point of carrying out this",
"energy assessment?",
"No",
"Inspection Date:",
"25/09/2025",
"Transaction Type:",
"Grant-Scheme (ECO, RHI, etc.)",
"Tenure:",
"Rented Social",
"Type of Property:",
"House",
"Detachment Type:",
"Mid-terrace",
"Number of storeys:",
"2 Storeys",
"Terrain Type:",
"Suburban",
"Number of Extensions:",
"1 Extension",
"Is an electricity smart meter present?",
"Yes",
"Electric meter type:",
"Single",
"Is the dwelling export-capable?",
"Yes",
"Is mains gas available?",
"Yes",
"Is there a gas smart meter?",
"Yes",
"Is the gas meter accessible?",
"Yes",
"Page 2",
"",
"Photo of Gas Meter:",
"External indicators of Cavity Wall Construction:",
"External indicators of Cavity Wall Construction:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Select Measurements Location:",
"Internal",
"Building Construction",
"Main Building",
"Age Range:",
"1950-1966",
"Record indicators of property age:",
"local knowledge, enquiries of owner",
"Walls - Construction Type:",
"Cavity",
"Record external indicators of Cavity Construction:",
"wall thickness over 270 mm",
"Walls - Insulation Type:",
"Filled Cavity",
"Record indicators of filled cavity:",
"evidence of cavity fill drill holes",
"Page 3",
"",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Page 4",
"",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Photo indicators of filled cavity insulation:",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"310 mm",
"Page 5",
"",
"Photo wall thickness:",
"External indicators of Cavity Wall Construction:",
"Party wall construction type:",
"Cavity Masonry, Filled",
"Floor type:",
"Ground Floor",
"Floor Construction:",
"Solid",
"Floor Insulation Type:",
"As Built",
"Floor U-Value known?",
"Not Known",
"Extension 1",
"Age Range:",
"2003-2006",
"Record indicators of property age:",
"local knowledge, enquiries of owner",
"Walls - Construction Type:",
"Cavity",
"Record external indicators of Cavity Construction:",
"wall thickness over 270 mm",
"Walls - Insulation Type:",
"As built",
"Thermal conductivity of wall insulation:",
"Unknown",
"Wall U-Value known?",
"Not Known",
"Wall thickness:",
"310 mm",
"Page 6",
"",
"Photo wall thickness:",
"Party wall construction type:",
"Cavity Masonry, Filled",
"Floor type:",
"Ground Floor",
"Floor Construction:",
"Solid",
"Floor Insulation Type:",
"As Built",
"Floor U-Value known?",
"Not Known",
"Building Measurements",
"Area (m2)",
"Height (m)",
"Heat Loss Perimeter (m)",
"PWL (m)",
"Main Building",
"Floor 1",
"35.68",
"2.19",
"13.44",
"10.62",
"Floor 0",
"35.68",
"2.17",
"11",
"10.62",
"Extension 1",
"Floor 0",
"3.8",
"2",
"5.7",
"0",
"Roof Space",
"Main Building",
"Roofs - Construction Type:",
"Pitched roof (Slates or tiles), Access to loft",
"Roofs - Insulation At:",
"Joists",
"Roof U-Value:",
"Not Known",
"Roofs - Insulation Thickness:",
"100 mm",
"Page 7",
"",
"Loft insulation:",
"Loft insulation:",
"Loft insulation:",
"Loft insulation:",
"Loft insulation:",
"Page 8",
"",
"Loft insulation:",
"Loft insulation:",
"Loft insulation:",
"Loft insulation:",
"Loft insulation:",
"Loft insulation:",
"Page 9",
"",
"Loft insulation:",
"Indicators of Cavity Wall Construction in roof space:",
"Indicators of Cavity Wall Construction in roof space:",
"Record indicators of party wall construction in roof space:",
"Record indicators of party wall construction in roof space:",
"Record indicators of Cavity Wall Construction in roof",
"space:",
"cavity visible in roof space",
"Are there rooms in the roof?",
"No",
"Extension 1",
"Roofs - Construction Type:",
"Pitched roof, Sloping ceiling",
"Page 10",
"",
"Photo of glazing type:",
"Photo of glazing type:",
"Roofs - Insulation At:",
"Sloping ceiling insulation",
"Roof U-Value:",
"Not Known",
"Roofs - Insulation Thickness:",
"As built",
"Record indicators of Cavity Wall Construction in roof",
"space:",
"No indicator of construction visible",
"Are there rooms in the roof?",
"No",
"Windows",
"Window 1",
"Window location:",
"Main Building",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1.2 m",
"Window width:",
"2.3 m",
"Orientation:",
"North West",
"Window 2",
"Window location:",
"Main Building",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Page 11",
"",
"Photo of glazing type:",
"Photo of glazing type:",
"Photo of glazing type:",
"Photo of glazing type:",
"Window height:",
"1.2 m",
"Window width:",
"1 m",
"Orientation:",
"North West",
"Window 3",
"Window location:",
"Main Building",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"0.9 m",
"Window width:",
"1 m",
"Orientation:",
"North East",
"Window 4",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Page 12",
"",
"Photo of glazing type:",
"Photo of glazing type:",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"0.9 m",
"Window width:",
"1 m",
"Orientation:",
"North",
"Window 5",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"0.9 m",
"Window width:",
"1.7 m",
"Orientation:",
"North East",
"Page 13",
"",
"Photo of glazing type:",
"Photo of glazing type:",
"Photo of glazing type:",
"Photo of glazing type:",
"Window 6",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"0.9 m",
"Window width:",
"2.3 m",
"Orientation:",
"North West",
"Window 7",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"Page 14",
"",
"Photo of glazing type:",
"Photo of glazing type:",
"Photo of glazing type:",
"Photo of glazing type:",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"1 m",
"Window width:",
"1.2 m",
"Orientation:",
"North West",
"Window 8",
"Window location:",
"Extension 1",
"Window wall type:",
"External wall",
"Glazing Type:",
"Double glazing, Unknown install date",
"Window type:",
"Window",
"Window frame type:",
"Wooden or PVC",
"What size is the glazing gap?",
"16 mm or more",
"Is the window draught proofed?",
"Yes",
"Are there permanent shutters present?",
"No",
"Window height:",
"0.9 m",
"Window width:",
"1 m",
"Orientation:",
"North East",
"Page 15",
"",
"Photo of heating system:",
"Heating & Hot Water",
"Main Heating Systems",
"Main Heating 1",
"How would you like to select the Heating System?",
"PCDF Search",
"System type:",
"Boiler with radiators or underfloor heating",
"Product Id",
"16839",
"Manufacturer",
"Vaillant",
"Model",
"ecoTEC pro 28",
"Orig Manuf",
"Vaillant",
"Fuel",
"Mains gas",
"S. Efficiency",
"0",
"Type",
"Combi",
"Condensing",
"Yes",
"Year",
"2005 - 2015",
"Mount",
"Wall",
"Open Flue",
"Room-sealed",
"Fan Assist",
"Yes",
"Status",
"Normal status for an actual product",
"Central heating pump age:",
"Unknown",
"Controls:",
"Programmer, room thermostat and TRVs",
"Does the boiler have a Flue Gas Heat Recover",
"System (FGHRS)?",
"No",
"Is there a weather compensator?",
"No",
"Emitter:",
"Radiators",
"Emitter Temperature:",
"Unknown",
"Page 16",
"",
"Photo of heating system:",
"Photo of heating system:",
"Photo of heating system:",
"Photo of heating system:",
"Photo of heating system:",
"Photo of heating system:",
"Page 17",
"",
"Photo of heating system:",
"Photo of heating system:",
"Photo of heating system:",
"Photo of heating system:",
"Photo of heating controls:",
"Photo of heating controls:",
"Secondary Heating System",
"Secondary Fuel",
"No Secondary Heating",
"Water Heating & Cylinder",
"Water Heating Type:",
"Regular",
"Water Heating System:",
"From main heating 1",
"Cylinder Size:",
"No Cylinder",
"Ventilation",
"Ventilation type:",
"Mechanical Extract - Decentralised",
"Page 18",
"",
"Photo of ventilation type:",
"Has fixed air conditioning?",
"No",
"Is the ventilation in the PCDF database?",
"No",
"Number of open flues:",
"0",
"Number of closed flues:",
"0",
"Number of boiler flues:",
"0",
"Number of other flues:",
"0",
"Number of extract fans:",
"0",
"Number of passive vents:",
"0",
"Number of flueless gas fires:",
"0",
"Pressure test:",
"No test",
"Is there a draught lobby?",
"No",
"Conservatories",
"Is there conservatory?",
"No conservatory",
"Page 19",
"",
"Photo of incandescent bulbs:",
"Photo of incandescent bulbs:",
"Renewables",
"Wind Turbines",
"Has wind turbines?",
"No",
"Solar hot water",
"Has solar hot water?",
"No",
"Photovoltaics",
"Has photovoltaic array?",
"No",
"Number of PV batteries:",
"None",
"Hydro",
"Is the dwelling connected to Hydro?",
"No",
"Room Count Elements",
"Number of habitable rooms?",
"3",
"Are any of these rooms unheated?",
"No",
"Number of external doors?",
"2",
"Number of insulated external doors?",
"0",
"Number of draughtproofed external doors?",
"2",
"Number of open chimneys?",
"0",
"Number of blocked chimneys?",
"0",
"Number of fixed incandescent bulbs:",
"4",
"Page 20",
"",
"Photo of incandescent bulbs:",
"Photo of incandescent bulbs:",
"Photo of CFL bulbs:",
"Is the exact number of LED and CFL bulbs known?",
"Yes",
"Number of fixed LED bulbs:",
"0",
"Number of fixed CFL bulbs:",
"1",
"Are there any waste water heat recovery systems?",
"None",
"Number of baths:",
"1",
"How many special features are there at the",
"property?",
"0",
"Showers",
"Shower 1",
"Shower outlet type:",
"Non-Electric Shower",
"Page 21",
"",
"Photo of shower:",
"Photo of shower:",
"General Photos:",
"Customer Response",
"Customer present?",
"Yes",
"Customer willing to answer satisfaction survey?",
"No",
"Addendum + Related Party Disclosure",
"Addendum",
"None",
"Related party disclosure",
"No related party",
"Hard to treat cavity walls: Property has access",
"issues?",
"No",
"Hard to treat cavity walls: Property has high",
"exposure?",
"No",
"Hard to treat cavity walls: Property has narrow",
"cavities?",
"No",
"Photographs Required",
"Page 22",
"",
"External Elevations:",
"External Elevations:",
"External Elevations:",
"External Elevations:",
"External Elevations:",
"External Elevations:",
"External Elevations:",
"Page 23",
"",
"Page 24",
"",
"Page 25",
"",
"Page 26",
"",
"Page 27",
""
]

View file

@ -0,0 +1,56 @@
import json
import os
import pytest
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from datatypes.epc.surveys.pashub_rdsap_site_notes import General, PasHubRdSapSiteNotes
FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")
def load_text_fixture() -> list[str]:
with open(os.path.join(FIXTURES, "site_notes_example_text.json")) as f:
return json.load(f)
class TestGeneral:
@pytest.fixture
def general(self) -> General:
return PasHubRdSapSiteNotesExtractor(load_text_fixture()).extract().general
def test_epc_checked_before_assessment(self, general: General) -> None:
assert general.epc_checked_before_assessment is True
def test_epc_exists_at_point_of_assessment(self, general: General) -> None:
assert general.epc_exists_at_point_of_assessment is False
def test_inspection_date(self, general: General) -> None:
assert general.inspection_date == "2025-09-25"
def test_transaction_type(self, general: General) -> None:
assert general.transaction_type == "Grant-Scheme (ECO, RHI, etc.)"
def test_tenure(self, general: General) -> None:
assert general.tenure == "Rented Social"
def test_property_type(self, general: General) -> None:
assert general.property_type == "House"
def test_detachment_type(self, general: General) -> None:
assert general.detachment_type == "Mid-terrace"
def test_number_of_storeys(self, general: General) -> None:
assert general.number_of_storeys == 2
def test_number_of_extensions(self, general: General) -> None:
assert general.number_of_extensions == 1
def test_electricity_smart_meter(self, general: General) -> None:
assert general.electricity_smart_meter is True
def test_mains_gas_available(self, general: General) -> None:
assert general.mains_gas_available is True
def test_measurements_location(self, general: General) -> None:
assert general.measurements_location == "Internal"

View file

@ -3,6 +3,6 @@ pythonpath = .
log_cli = true log_cli = true
log_cli_level = INFO log_cli_level = INFO
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/documents_parser/tests
markers = markers =
integration: mark a test as an integration test integration: mark a test as an integration test