Working upload of documents to backend

This commit is contained in:
Khalim Conn-Kowlessar 2024-09-04 19:39:31 +01:00
parent 9bac1e7132
commit 984abe7292
3 changed files with 126 additions and 54 deletions

View file

@ -1,19 +1,26 @@
from backend.app.db.models.energy_assessments import ( from backend.app.db.models.energy_assessments import (
EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments, DocumentTypeEnum
) )
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from typing import Optional, List from typing import Optional, List, Dict
from sqlalchemy import desc from sqlalchemy import desc
from utils.logger import setup_logger
logger = setup_logger()
def bulk_insert_energy_assessments(session: Session, data_list): def bulk_insert_energy_assessments(session: Session, data_list: List[dict]) -> Dict[int, int]:
""" """
This function inserts or updates multiple energy assessment records into the database. This function inserts or updates multiple energy assessment records into the database and returns a mapping of
uprn to energy_assessment_id.
:param session: The SQLAlchemy session. :param session: The SQLAlchemy session.
:param data_list: A list of dictionaries containing energy assessment data. :param data_list: A list of dictionaries containing energy assessment data.
:return: A dictionary mapping each uprn to its corresponding energy_assessment_id.
""" """
uprn_to_assessment_id = {}
try: try:
for data in data_list: for data in data_list:
uprn = data.get('uprn') uprn = data.get('uprn')
@ -30,19 +37,30 @@ def bulk_insert_energy_assessments(session: Session, data_list):
for key, value in data.items(): for key, value in data.items():
setattr(existing_record, key, value) setattr(existing_record, key, value)
session.add(existing_record) session.add(existing_record)
# Map the uprn to the existing record's ID
uprn_to_assessment_id[uprn] = existing_record.id
else: else:
# Insert a new record # Insert a new record
new_assessment = EnergyAssessment(**data) new_assessment = EnergyAssessment(**data)
session.add(new_assessment) session.add(new_assessment)
# Flush the session to get the newly created ID before commit
session.flush()
# Map the uprn to the new record's ID
uprn_to_assessment_id[uprn] = new_assessment.id
# Commit the transaction # Commit the transaction
session.commit() session.commit()
print("All records inserted or updated successfully.") logger.info("All records inserted or updated successfully.")
except IntegrityError as e: except IntegrityError as e:
# Rollback the session in case of error # Rollback the session in case of error
session.rollback() session.rollback()
print(f"Error occurred: {e}") logger.info(f"Error occurred: {e}")
return uprn_to_assessment_id
def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]: def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]:
@ -60,77 +78,81 @@ def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[Energ
return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response() return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response()
except Exception as e: except Exception as e:
print(f"An error occurred: {e}") logger.info(f"An error occurred: {e}")
return None return None
def create_energy_assessment_scenario(session: Session, data_list: List[dict], energy_assessment_id: int): def create_scenarios_for_documents(session: Session, document_list: List[dict], uprn_to_assessment_id: dict):
""" """
This function creates the necessary energy assessment scenarios if they don't already exist. Creates scenarios for documents by UPRN and links them to the energy assessments.
:param session: The SQLAlchemy session. :param session: The SQLAlchemy session.
:param data_list: A list of dictionaries containing document data with scenario information. :param document_list: A list of dictionaries containing document data.
:param energy_assessment_id: The ID of the energy assessment. :param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
""" """
try: try:
# Extract unique scenario names from the data for document in document_list:
scenario_names = {item['scenario_id'] for item in data_list if item['scenario_id'] is not None} uprn = document.get('uprn')
scenario_name = document.get('scenario_id')
for scenario_name in scenario_names: if scenario_name:
# Check if the scenario already exists in the database # Get the associated energy_assessment_id for the UPRN
existing_scenario = session.query(EnergyAssessmentScenarios).filter_by(scenario_name=scenario_name).first() energy_assessment_id = uprn_to_assessment_id.get(uprn)
if not existing_scenario: # Check if the scenario already exists
# Create a new scenario existing_scenario = session.query(EnergyAssessmentScenarios).filter_by(
new_scenario = EnergyAssessmentScenarios( scenario_name=scenario_name,
scenario_name=scenario_name, energy_assessment_id=energy_assessment_id energy_assessment_id=energy_assessment_id
) ).first()
session.add(new_scenario)
# Commit all scenario creations if not existing_scenario:
# Create the scenario
new_scenario = EnergyAssessmentScenarios(
scenario_name=scenario_name,
energy_assessment_id=energy_assessment_id
)
session.add(new_scenario)
session.flush() # Get the new scenario ID
# Update document with new scenario ID
document['scenario_id'] = new_scenario.id
else:
# If the scenario already exists, just use its ID
document['scenario_id'] = existing_scenario.id
# Commit the scenarios
session.commit() session.commit()
print("Scenarios created successfully.") logger.info("Scenarios created successfully.")
except IntegrityError as e: except IntegrityError as e:
session.rollback() session.rollback()
print(f"Error occurred: {e}") logger.info(f"Error occurred: {e}")
def create_scenario_documents(session: Session, data_list: List[dict]): def create_documents(session: Session, document_list: List[dict]):
""" """
This function creates documents in the energy_assessment_documents table, linking them to scenarios if applicable. Inserts documents into the energy_assessment_documents table, linking them to scenarios and assessments.
For usage in the energy assessment upload router
:param session: The SQLAlchemy session. :param session: The SQLAlchemy session.
:param data_list: A list of dictionaries containing document data. :param document_list: A list of dictionaries containing document data.
""" """
try: try:
for data in data_list: for document in document_list:
scenario_name = data.get('scenario_id') # Ensure the document_type is cast to Enum
if scenario_name:
# Get the scenario ID from the scenario name
scenario = session.query(EnergyAssessmentScenarios).filter_by(scenario_name=scenario_name).first()
if scenario:
data['scenario_id'] = scenario.id
else:
print(f"Scenario '{scenario_name}' not found. Skipping document.")
# Create the new document
new_document = EnergyAssessmentDocuments( new_document = EnergyAssessmentDocuments(
uprn=data['uprn'], uprn=document['uprn'],
document_type=data['document_type'], document_type=DocumentTypeEnum(document['document_type']).value,
document_location=data['document_location'], document_location=document['document_location'],
scenario_id=data['scenario_id'] # Might be None energy_assessment_id=document['energy_assessment_id'],
scenario_id=document.get('scenario_id') # Might be None if no scenario
) )
session.add(new_document) session.add(new_document)
# Commit all document insertions # Commit all document insertions
session.commit() session.commit()
print("Documents created successfully.") logger.info("Documents created successfully.")
except IntegrityError as e: except IntegrityError as e:
session.rollback() session.rollback()
print(f"Error occurred: {e}") logger.info(f"Error occurred: {e}")

View file

@ -1,5 +1,8 @@
from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.postgresql import ENUM as PgEnum
import enum
from datetime import datetime
Base = declarative_base() Base = declarative_base()
@ -172,19 +175,33 @@ class EnergyAssessmentScenarios(Base):
energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False) energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
class DocumentTypeEnum(enum.Enum):
EPR = "EPR"
ConditionReport = "Condition Report"
EvidenceReport = "Evidence Report"
SummaryInformation = "Summary Information"
FloorPlan = "Floor Plan"
ScenarioDraftEPC = "Scenario Draft EPC"
ScenarioSiteNotes = "Scenario Site Notes"
class EnergyAssessmentDocuments(Base): class EnergyAssessmentDocuments(Base):
__tablename__ = 'energy_assessment_documents' __tablename__ = 'energy_assessment_documents'
id = Column(BigInteger, primary_key=True, autoincrement=True) id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(BigInteger, nullable=False) uprn = Column(BigInteger, nullable=False)
energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False) energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
document_type = Column(Text, nullable=False) # You can handle this using an enum if needed document_type = Column(PgEnum(DocumentTypeEnum, name="document_type", create_type=False), nullable=False)
document_location = Column(Text, nullable=False) document_location = Column(Text, nullable=False)
uploaded_at = Column(DateTime(timezone=True), nullable=False) uploaded_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True) scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True)
@staticmethod @staticmethod
def empty_response(): def empty_response():
return { return {
"id": None, "uprn": None, "document_type": None, "document_location": None, "uploaded_at": None, "id": None,
"uprn": None,
"document_type": None,
"document_location": None,
"uploaded_at": None,
"scenario_id": None "scenario_id": None
} }

View file

@ -1,5 +1,6 @@
import os import os
from io import BytesIO from io import BytesIO
from typing import List
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from starlette.responses import Response from starlette.responses import Response
@ -11,7 +12,9 @@ from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.exc import IntegrityError, OperationalError
from backend.app.db.connection import db_engine from backend.app.db.connection import db_engine
from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments from backend.app.db.functions.energy_assessment_functions import (
bulk_insert_energy_assessments, create_scenarios_for_documents, create_documents
)
from etl.xml_survey_extraction.XmlParser import XmlParser from etl.xml_survey_extraction.XmlParser import XmlParser
@ -23,6 +26,29 @@ from utils.logger import setup_logger
logger = setup_logger() logger = setup_logger()
def insert_energy_assessment_documents(document_list: List[dict], uprn_to_assessment_id: dict):
"""
Inserts or updates energy assessment documents, assigning the correct energy_assessment_id.
:param document_list: A list of dictionaries containing document data.
:param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
"""
for document in document_list:
uprn = document['uprn']
# Assign the energy_assessment_id based on uprn
energy_assessment_id = uprn_to_assessment_id.get(uprn)
if not energy_assessment_id:
logger.info(f"No energy_assessment_id found for UPRN: {uprn}. Skipping document.")
continue
# Attach energy_assessment_id to each document
document['energy_assessment_id'] = energy_assessment_id
logger.info("Energy Assessment IDs assigned to documents.")
router = APIRouter( router = APIRouter(
prefix="/energy-assessments", prefix="/energy-assessments",
tags=["energy-assessments"], tags=["energy-assessments"],
@ -214,9 +240,16 @@ async def upload(body: EnergyAssessmentUploadPayload):
xml_data_to_store.append(extracted_data) xml_data_to_store.append(extracted_data)
logger.info("Storing energy assessment xml data to database") logger.info("Storing energy assessment xml data to database")
bulk_insert_energy_assessments(session, xml_data_to_store) uprn_to_assessment_id = bulk_insert_energy_assessments(session, xml_data_to_store)
# TODO: Store energy_assessment_documents # Insert energy assessment id into the documents data
insert_energy_assessment_documents(energy_assessment_documents, uprn_to_assessment_id)
create_scenarios_for_documents(session, energy_assessment_documents, uprn_to_assessment_id)
create_documents(session, energy_assessment_documents)
session.close()
except IntegrityError: except IntegrityError:
logger.error("Database integrity error occurred", exc_info=True) logger.error("Database integrity error occurred", exc_info=True)