From 1d4655c3c460f9a76d48aa4fe940319ea23af631 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 24 Apr 2026 12:49:06 +0000 Subject: [PATCH] local runner --- local_runner.py | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 local_runner.py diff --git a/local_runner.py b/local_runner.py new file mode 100644 index 00000000..45f9e1ec --- /dev/null +++ b/local_runner.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Parse a local site-notes PDF and load the result into the database. + +Usage: + python local_runner.py +""" +import sys +from typing import List, Optional, Tuple + +from backend.app.db.connection import db_session +from backend.app.db.models.epc_property import ( + EpcBuildingPartModel, + EpcEnergyElementModel, + EpcFlatDetailsModel, + EpcFloorDimensionModel, + EpcMainHeatingDetailModel, + EpcPropertyEnergyPerformanceModel, + EpcPropertyModel, + EpcWindowModel, +) +from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor +from backend.documents_parser.pdf import pdf_to_text_list +from datatypes.epc.domain.epc_property_data import EnergyElement, EpcPropertyData +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from datatypes.epc.surveys.pashub_rdsap_site_notes import PasHubRdSapSiteNotes + + +def _parse_pdf(pdf_path: str) -> EpcPropertyData: + with open(pdf_path, "rb") as f: + pdf_bytes: bytes = f.read() + pages: List[str] = pdf_to_text_list(pdf_bytes) + site_notes: PasHubRdSapSiteNotes = PasHubRdSapSiteNotesExtractor(pages).extract() + return EpcPropertyDataMapper.from_site_notes(site_notes) + + +def _insert_energy_elements( + session, + elements: List[EnergyElement], + element_type: str, + epc_property_id: int, +) -> None: + for el in elements: + session.add( + EpcEnergyElementModel.from_domain(el, element_type, epc_property_id) + ) + + +def _insert_optional_energy_element( + session, + el: Optional[EnergyElement], + element_type: str, + epc_property_id: int, +) -> None: + if el is not None: + session.add( + EpcEnergyElementModel.from_domain(el, element_type, epc_property_id) + ) + + +def run(pdf_path: str) -> None: + data: EpcPropertyData = _parse_pdf(pdf_path) + print("successfully mapped pdf") + + with db_session() as session: + epc_prop: EpcPropertyModel = EpcPropertyModel.from_epc_property_data(data) + session.add(epc_prop) + session.flush() + assert epc_prop.id is not None + epc_property_id: int = epc_prop.id + + session.add( + EpcPropertyEnergyPerformanceModel.from_epc_property_data( + data, epc_property_id=epc_property_id + ) + ) + + for detail in data.sap_heating.main_heating_details: + session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id)) + + for part in data.sap_building_parts: + bp: EpcBuildingPartModel = EpcBuildingPartModel.from_domain( + part, epc_property_id + ) + session.add(bp) + session.flush() + assert bp.id is not None + for dim in part.sap_floor_dimensions: + session.add(EpcFloorDimensionModel.from_domain(dim, bp.id)) + + for window in data.sap_windows: + session.add(EpcWindowModel.from_domain(window, epc_property_id)) + + list_elements: List[Tuple[List[EnergyElement], str]] = [ + (data.roofs, "roof"), + (data.walls, "wall"), + (data.floors, "floor"), + (data.main_heating, "main_heating"), + ] + for elements, etype in list_elements: + _insert_energy_elements(session, elements, etype, epc_property_id) + + optional_elements: List[Tuple[Optional[EnergyElement], str]] = [ + (data.window, "window"), + (data.lighting, "lighting"), + (data.hot_water, "hot_water"), + (data.secondary_heating, "secondary_heating"), + (data.main_heating_controls, "main_heating_controls"), + ] + for el, etype in optional_elements: + _insert_optional_energy_element(session, el, etype, epc_property_id) + + if data.sap_flat_details is not None: + session.add( + EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id) + ) + + print(f"epc_property_id={epc_property_id}") + print(f"address: {data.address_line_1}, {data.post_town}, {data.postcode}") + + +if __name__ == "__main__": + run("backend/documents_parser/tests/fixtures/ExampleSiteNotes_6.pdf")