Model/backend/documents_parser/local_runner.py

122 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
Parse a local site-notes PDF and load the result into the database.
Usage:
python local_runner.py <pdf_path>
"""
from typing import List, Optional, Tuple
from backend.app.db.connection import db_session
from backend.app.db.models.epc_property import (
EpcBuildingPartModel,
EpcEnergyElementModel,
EpcFlatDetailsModel,
EpcFloorDimensionModel,
EpcMainHeatingDetailModel,
EpcPropertyEnergyPerformanceModel,
EpcPropertyModel,
EpcWindowModel,
)
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from backend.documents_parser.pdf import pdf_to_text_list
from datatypes.epc.domain.epc_property_data import EnergyElement, EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from datatypes.epc.surveys.pashub_rdsap_site_notes import PasHubRdSapSiteNotes
def _parse_pdf(pdf_path: str) -> EpcPropertyData:
with open(pdf_path, "rb") as f:
pdf_bytes: bytes = f.read()
pages: List[str] = pdf_to_text_list(pdf_bytes)
site_notes: PasHubRdSapSiteNotes = PasHubRdSapSiteNotesExtractor(pages).extract()
return EpcPropertyDataMapper.from_site_notes(site_notes)
def _insert_energy_elements(
session,
elements: List[EnergyElement],
element_type: str,
epc_property_id: int,
) -> None:
for el in elements:
session.add(
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
)
def _insert_optional_energy_element(
session,
el: Optional[EnergyElement],
element_type: str,
epc_property_id: int,
) -> None:
if el is not None:
session.add(
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
)
def run(pdf_path: str) -> None:
data: EpcPropertyData = _parse_pdf(pdf_path)
print("successfully mapped pdf")
with db_session() as session:
epc_prop: EpcPropertyModel = EpcPropertyModel.from_epc_property_data(data)
session.add(epc_prop)
session.flush()
assert epc_prop.id is not None
epc_property_id: int = epc_prop.id
session.add(
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
data, epc_property_id=epc_property_id
)
)
for detail in data.sap_heating.main_heating_details:
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
for part in data.sap_building_parts:
bp: EpcBuildingPartModel = EpcBuildingPartModel.from_domain(
part, epc_property_id
)
session.add(bp)
session.flush()
assert bp.id is not None
for dim in part.sap_floor_dimensions:
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
for window in data.sap_windows:
session.add(EpcWindowModel.from_domain(window, epc_property_id))
list_elements: List[Tuple[List[EnergyElement], str]] = [
(data.roofs, "roof"),
(data.walls, "wall"),
(data.floors, "floor"),
(data.main_heating, "main_heating"),
]
for elements, etype in list_elements:
_insert_energy_elements(session, elements, etype, epc_property_id)
optional_elements: List[Tuple[Optional[EnergyElement], str]] = [
(data.window, "window"),
(data.lighting, "lighting"),
(data.hot_water, "hot_water"),
(data.secondary_heating, "secondary_heating"),
(data.main_heating_controls, "main_heating_controls"),
]
for el, etype in optional_elements:
_insert_optional_energy_element(session, el, etype, epc_property_id)
if data.sap_flat_details is not None:
session.add(
EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id)
)
print(f"epc_property_id={epc_property_id}")
print(f"address: {data.address_line_1}, {data.post_town}, {data.postcode}")
if __name__ == "__main__":
run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf")