mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
include updating epc_property_data to pashub to ara workflow
This commit is contained in:
parent
1bc8343738
commit
252657a374
3 changed files with 114 additions and 25 deletions
76
backend/documents_parser/db_writer.py
Normal file
76
backend/documents_parser/db_writer.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
from typing import Optional
|
||||
|
||||
from sqlmodel import Session
|
||||
|
||||
from backend.app.db.models.epc_property import (
|
||||
EpcBuildingPartModel,
|
||||
EpcEnergyElementModel,
|
||||
EpcFlatDetailsModel,
|
||||
EpcFloorDimensionModel,
|
||||
EpcMainHeatingDetailModel,
|
||||
EpcPropertyEnergyPerformanceModel,
|
||||
EpcPropertyModel,
|
||||
EpcWindowModel,
|
||||
)
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
|
||||
|
||||
def save_epc_property_data(
|
||||
session: Session,
|
||||
data: EpcPropertyData,
|
||||
uploaded_file_id: Optional[int] = None,
|
||||
property_id: Optional[int] = None,
|
||||
portfolio_id: Optional[int] = None,
|
||||
) -> EpcPropertyModel:
|
||||
epc_prop = EpcPropertyModel.from_epc_property_data(
|
||||
data, property_id=property_id, portfolio_id=portfolio_id
|
||||
)
|
||||
epc_prop.uploaded_file_id = uploaded_file_id
|
||||
session.add(epc_prop)
|
||||
session.flush()
|
||||
assert epc_prop.id is not None
|
||||
epc_property_id: int = epc_prop.id
|
||||
|
||||
session.add(
|
||||
EpcPropertyEnergyPerformanceModel.from_epc_property_data(
|
||||
data, epc_property_id=epc_property_id
|
||||
)
|
||||
)
|
||||
|
||||
for detail in data.sap_heating.main_heating_details:
|
||||
session.add(EpcMainHeatingDetailModel.from_domain(detail, epc_property_id))
|
||||
|
||||
for part in data.sap_building_parts:
|
||||
bp = EpcBuildingPartModel.from_domain(part, epc_property_id)
|
||||
session.add(bp)
|
||||
session.flush()
|
||||
assert bp.id is not None
|
||||
for dim in part.sap_floor_dimensions:
|
||||
session.add(EpcFloorDimensionModel.from_domain(dim, bp.id))
|
||||
|
||||
for window in data.sap_windows:
|
||||
session.add(EpcWindowModel.from_domain(window, epc_property_id))
|
||||
|
||||
for el in data.roofs:
|
||||
session.add(EpcEnergyElementModel.from_domain(el, "roof", epc_property_id))
|
||||
for el in data.walls:
|
||||
session.add(EpcEnergyElementModel.from_domain(el, "wall", epc_property_id))
|
||||
for el in data.floors:
|
||||
session.add(EpcEnergyElementModel.from_domain(el, "floor", epc_property_id))
|
||||
for el in data.main_heating:
|
||||
session.add(EpcEnergyElementModel.from_domain(el, "main_heating", epc_property_id))
|
||||
|
||||
for el, etype in [
|
||||
(data.window, "window"),
|
||||
(data.lighting, "lighting"),
|
||||
(data.hot_water, "hot_water"),
|
||||
(data.secondary_heating, "secondary_heating"),
|
||||
(data.main_heating_controls, "main_heating_controls"),
|
||||
]:
|
||||
if el is not None:
|
||||
session.add(EpcEnergyElementModel.from_domain(el, etype, epc_property_id))
|
||||
|
||||
if data.sap_flat_details is not None:
|
||||
session.add(EpcFlatDetailsModel.from_domain(data.sap_flat_details, epc_property_id))
|
||||
|
||||
return epc_prop
|
||||
13
backend/documents_parser/parser.py
Normal file
13
backend/documents_parser/parser.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
|
||||
from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
|
||||
from backend.documents_parser.pdf import pdf_to_text_list
|
||||
|
||||
|
||||
def parse_pashub_site_notes(file_path: str) -> EpcPropertyData:
|
||||
with open(file_path, "rb") as f:
|
||||
pdf_bytes = f.read()
|
||||
tokens = pdf_to_text_list(pdf_bytes)
|
||||
site_notes = PasHubRdSapSiteNotesExtractor(tokens).extract()
|
||||
return EpcPropertyDataMapper.from_site_notes(site_notes)
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
from datetime import datetime, timezone
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple, cast
|
||||
from openpyxl import load_workbook
|
||||
|
||||
from backend.app.config import get_settings
|
||||
|
|
@ -11,7 +11,10 @@ from backend.app.db.models.uploaded_file import (
|
|||
FileTypeEnum,
|
||||
UploadedFile,
|
||||
)
|
||||
from backend.documents_parser.db_writer import save_epc_property_data
|
||||
from backend.documents_parser.parser import parse_pashub_site_notes
|
||||
from backend.pashub_fetcher.core_files import infer_file_type
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
|
||||
from backend.pashub_fetcher.job import Job
|
||||
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
|
||||
|
|
@ -119,6 +122,7 @@ def upload_job_to_s3_and_update_db(
|
|||
)
|
||||
|
||||
uploaded_files: List[UploadedFile] = []
|
||||
site_notes_pairs: List[Tuple[UploadedFile, EpcPropertyData]] = []
|
||||
|
||||
for file_path in job_files:
|
||||
filename = os.path.basename(file_path)
|
||||
|
|
@ -126,10 +130,8 @@ def upload_job_to_s3_and_update_db(
|
|||
|
||||
upload_file_to_s3(file_path, bucket, file_key)
|
||||
|
||||
# load row to db
|
||||
# TODO: use same upload_file_to_s3_and_update_db method as ecmk fetcher does
|
||||
uploaded_files.append(
|
||||
UploadedFile(
|
||||
uploaded_file = UploadedFile(
|
||||
s3_file_bucket=bucket,
|
||||
s3_file_key=file_key,
|
||||
s3_upload_timestamp=datetime.now(timezone.utc),
|
||||
|
|
@ -138,28 +140,26 @@ def upload_job_to_s3_and_update_db(
|
|||
file_source=FileSourceEnum.PAS_HUB.value,
|
||||
file_type=infer_file_type(filename),
|
||||
)
|
||||
)
|
||||
uploaded_files.append(uploaded_file)
|
||||
|
||||
file_type: Optional[str] = cast(Optional[str], uploaded_file.file_type)
|
||||
if file_type is not None and FileTypeEnum(file_type) == FileTypeEnum.RD_SAP_SITE_NOTE:
|
||||
try:
|
||||
site_notes_pairs.append((uploaded_file, parse_pashub_site_notes(file_path)))
|
||||
except Exception:
|
||||
logger.warning(f"Failed to parse site notes {file_path}", exc_info=True)
|
||||
|
||||
with db_session() as session:
|
||||
session.add_all(uploaded_files)
|
||||
|
||||
# Ensure IDs are generated
|
||||
session.flush()
|
||||
|
||||
results = [
|
||||
{"file": file.s3_file_key, "type": file.file_type, "id": file.id}
|
||||
for file in uploaded_files
|
||||
]
|
||||
for uploaded_file, epc_data in site_notes_pairs:
|
||||
save_epc_property_data(
|
||||
session, epc_data, uploaded_file_id=cast(int, uploaded_file.id)
|
||||
)
|
||||
|
||||
session.commit()
|
||||
|
||||
for result in results:
|
||||
if FileTypeEnum(result["type"]) == FileTypeEnum.RD_SAP_SITE_NOTE:
|
||||
# upload site notes to epc_property table
|
||||
continue
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def process_job(
|
||||
job: PashubToAraTriggerRequest,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue