include updating epc_property_data to pashub to ara workflow

This commit is contained in:
Daniel Roth 2026-04-29 09:55:14 +00:00
parent 1bc8343738
commit 252657a374
3 changed files with 114 additions and 25 deletions

View file

@ -0,0 +1,76 @@
from typing import Optional
from sqlmodel import Session
from backend.app.db.models.epc_property import (
EpcBuildingPartModel,
EpcEnergyElementModel,
EpcFlatDetailsModel,
EpcFloorDimensionModel,
EpcMainHeatingDetailModel,
EpcPropertyEnergyPerformanceModel,
EpcPropertyModel,
EpcWindowModel,
)
from datatypes.epc.domain.epc_property_data import EpcPropertyData
def save_epc_property_data(
session: Session,
data: EpcPropertyData,
uploaded_file_id: Optional[int] = None,
property_id: Optional[int] = None,
portfolio_id: Optional[int] = None,
) -> EpcPropertyModel:
epc_prop = EpcPropertyModel.from_epc_property_data(
data, property_id=property_id, portfolio_id=portfolio_id
)
epc_prop.uploaded_file_id = uploaded_file_id
session.add(epc_prop)
session.flush()
assert epc_prop.id is not None
epc_property_id: int = epc_prop.id
session.add(
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
data, epc_property_id=epc_property_id
)
)
for detail in data.sap_heating.main_heating_details:
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
for part in data.sap_building_parts:
bp = EpcBuildingPartModel.from_domain(part, epc_property_id)
session.add(bp)
session.flush()
assert bp.id is not None
for dim in part.sap_floor_dimensions:
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
for window in data.sap_windows:
session.add(EpcWindowModel.from_domain(window, epc_property_id))
for el in data.roofs:
session.add(EpcEnergyElementModel.from_domain(el, "roof", epc_property_id))
for el in data.walls:
session.add(EpcEnergyElementModel.from_domain(el, "wall", epc_property_id))
for el in data.floors:
session.add(EpcEnergyElementModel.from_domain(el, "floor", epc_property_id))
for el in data.main_heating:
session.add(EpcEnergyElementModel.from_domain(el, "main_heating", epc_property_id))
for el, etype in [
(data.window, "window"),
(data.lighting, "lighting"),
(data.hot_water, "hot_water"),
(data.secondary_heating, "secondary_heating"),
(data.main_heating_controls, "main_heating_controls"),
]:
if el is not None:
session.add(EpcEnergyElementModel.from_domain(el, etype, epc_property_id))
if data.sap_flat_details is not None:
session.add(EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id))
return epc_prop

View file

@ -0,0 +1,13 @@
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
from backend.documents_parser.pdf import pdf_to_text_list
def parse_pashub_site_notes(file_path: str) -> EpcPropertyData:
with open(file_path, "rb") as f:
pdf_bytes = f.read()
tokens = pdf_to_text_list(pdf_bytes)
site_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
return EpcPropertyDataMapper.from_site_notes(site_notes)

View file

@ -1,7 +1,7 @@
from datetime import datetime, timezone
import os
import re
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple, cast
from openpyxl import load_workbook
from backend.app.config import get_settings
@ -11,7 +11,10 @@ from backend.app.db.models.uploaded_file import (
FileTypeEnum,
UploadedFile,
)
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_pashub_site_notes
from backend.pashub_fetcher.core_files import infer_file_type
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from backend.pashub_fetcher.job import Job
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
@ -119,6 +122,7 @@ def upload_job_to_s3_and_update_db(
)
uploaded_files: List[UploadedFile] = []
site_notes_pairs: List[Tuple[UploadedFile, EpcPropertyData]] = []
for file_path in job_files:
filename = os.path.basename(file_path)
@ -126,10 +130,8 @@ def upload_job_to_s3_and_update_db(
upload_file_to_s3(file_path, bucket, file_key)
# load row to db
# TODO: use same upload_file_to_s3_and_update_db method as ecmk fetcher does
uploaded_files.append(
UploadedFile(
uploaded_file = UploadedFile(
s3_file_bucket=bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
@ -138,28 +140,26 @@ def upload_job_to_s3_and_update_db(
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
)
)
uploaded_files.append(uploaded_file)
file_type: Optional[str] = cast(Optional[str], uploaded_file.file_type)
if file_type is not None and FileTypeEnum(file_type) == FileTypeEnum.RD_SAP_SITE_NOTE:
try:
site_notes_pairs.append((uploaded_file, parse_pashub_site_notes(file_path)))
except Exception:
logger.warning(f"Failed to parse site notes {file_path}", exc_info=True)
with db_session() as session:
session.add_all(uploaded_files)
# Ensure IDs are generated
session.flush()
results = [
{"file": file.s3_file_key, "type": file.file_type, "id": file.id}
for file in uploaded_files
]
for uploaded_file, epc_data in site_notes_pairs:
save_epc_property_data(
session, epc_data, uploaded_file_id=cast(int, uploaded_file.id)
)
session.commit()
for result in results:
if FileTypeEnum(result["type"]) == FileTypeEnum.RD_SAP_SITE_NOTE:
# upload site notes to epc_property table
continue
pass
def process_job(
job: PashubToAraTriggerRequest,