mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
131 lines
4.4 KiB
Python
131 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Parse a local site-notes PDF and load the result into the database.
|
|
|
|
Usage:
|
|
python local_runner.py <pdf_path>
|
|
"""
|
|
from typing import List, Optional, Tuple
|
|
|
|
from backend.app.db.connection import db_session
|
|
from backend.app.db.models.epc_property import (
|
|
EpcBuildingPartModel,
|
|
EpcEnergyElementModel,
|
|
EpcFlatDetailsModel,
|
|
EpcFloorDimensionModel,
|
|
EpcMainHeatingDetailModel,
|
|
EpcPropertyEnergyPerformanceModel,
|
|
EpcPropertyModel,
|
|
EpcWindowModel,
|
|
)
|
|
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
|
|
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
|
|
from backend.documents_parser.pdf import pdf_to_pages, pdf_to_text_list
|
|
from domain.epc.epc_property_data import EnergyElement, EpcPropertyData
|
|
from domain.epc.mapper import EpcPropertyDataMapper
|
|
|
|
|
|
def _parse_pdf(pdf_path: str) -> EpcPropertyData:
|
|
with open(pdf_path, "rb") as f:
|
|
pdf_bytes: bytes = f.read()
|
|
|
|
pages: List[str] = pdf_to_pages(pdf_bytes)
|
|
full_text: str = "\n".join(pages)
|
|
|
|
if "Elmhurst Energy Systems" in full_text:
|
|
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
|
|
return EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
|
|
|
|
tokens: List[str] = pdf_to_text_list(pdf_bytes)
|
|
pashub_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
|
|
return EpcPropertyDataMapper.from_site_notes(pashub_notes)
|
|
|
|
|
|
def _insert_energy_elements(
|
|
session,
|
|
elements: List[EnergyElement],
|
|
element_type: str,
|
|
epc_property_id: int,
|
|
) -> None:
|
|
for el in elements:
|
|
session.add(
|
|
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
|
|
)
|
|
|
|
|
|
def _insert_optional_energy_element(
|
|
session,
|
|
el: Optional[EnergyElement],
|
|
element_type: str,
|
|
epc_property_id: int,
|
|
) -> None:
|
|
if el is not None:
|
|
session.add(
|
|
EpcEnergyElementModel.from_domain(el, element_type, epc_property_id)
|
|
)
|
|
|
|
|
|
def run(pdf_path: str) -> None:
|
|
data: EpcPropertyData = _parse_pdf(pdf_path)
|
|
print("successfully mapped pdf")
|
|
|
|
with db_session() as session:
|
|
epc_prop: EpcPropertyModel = EpcPropertyModel.from_epc_property_data(data)
|
|
session.add(epc_prop)
|
|
session.flush()
|
|
assert epc_prop.id is not None
|
|
epc_property_id: int = epc_prop.id
|
|
|
|
session.add(
|
|
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
|
|
data, epc_property_id=epc_property_id
|
|
)
|
|
)
|
|
|
|
for detail in data.sap_heating.main_heating_details:
|
|
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
|
|
|
|
for part in data.sap_building_parts:
|
|
bp: EpcBuildingPartModel = EpcBuildingPartModel.from_domain(
|
|
part, epc_property_id
|
|
)
|
|
session.add(bp)
|
|
session.flush()
|
|
assert bp.id is not None
|
|
for dim in part.sap_floor_dimensions:
|
|
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
|
|
|
|
for window in data.sap_windows:
|
|
session.add(EpcWindowModel.from_domain(window, epc_property_id))
|
|
|
|
list_elements: List[Tuple[List[EnergyElement], str]] = [
|
|
(data.roofs, "roof"),
|
|
(data.walls, "wall"),
|
|
(data.floors, "floor"),
|
|
(data.main_heating, "main_heating"),
|
|
]
|
|
for elements, etype in list_elements:
|
|
_insert_energy_elements(session, elements, etype, epc_property_id)
|
|
|
|
optional_elements: List[Tuple[Optional[EnergyElement], str]] = [
|
|
(data.window, "window"),
|
|
(data.lighting, "lighting"),
|
|
(data.hot_water, "hot_water"),
|
|
(data.secondary_heating, "secondary_heating"),
|
|
(data.main_heating_controls, "main_heating_controls"),
|
|
]
|
|
for el, etype in optional_elements:
|
|
_insert_optional_energy_element(session, el, etype, epc_property_id)
|
|
|
|
if data.sap_flat_details is not None:
|
|
session.add(
|
|
EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id)
|
|
)
|
|
|
|
print(f"epc_property_id={epc_property_id}")
|
|
print(f"address: {data.address_line_1}, {data.post_town}, {data.postcode}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# run("backend/documents_parser/tests/fixtures/PasHubSiteNotes_6.pdf")
|
|
run("backend/documents_parser/tests/fixtures/ElmhurstSiteNotes.pdf")
|