migrating xml extraction to new router

This commit is contained in:
Khalim Conn-Kowlessar 2024-09-04 10:26:39 +01:00
parent 3eed20ff05
commit 92564be655
3 changed files with 36 additions and 5 deletions

View file

@ -10,6 +10,16 @@ from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3
from utils.logger import setup_logger
from etl.xml_survey_extraction.XmlParser import XmlParser
import os
import pandas as pd
from io import BytesIO
from utils.logger import setup_logger from utils.logger import setup_logger
logger = setup_logger() logger = setup_logger()
@ -37,14 +47,32 @@ async def upload(body: EnergyAssessmentUploadPayload):
Eventually, we will this service to collect the key documents from the service where they're uploaded Eventually, we will this service to collect the key documents from the service where they're uploaded
(e.g. Onedrive) and store them to S3, but for the moment, this is sufficient (e.g. Onedrive) and store them to S3, but for the moment, this is sufficient
# TODO - Holding up on implementing this
""" """
logger.info("Connecting to db") logger.info("Connecting to db")
session = sessionmaker(bind=db_engine)() session = sessionmaker(bind=db_engine)()
try: try:
logger.info("Uploading energy assessment data") logger.info("Extracting energy assessment data")
energy_assessments = list_files_and_subfolders_in_s3_folder(
bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
folder_name=f"{body.surveyor}/{body.project_code}/"
)
logger.info(
f"Found {len(energy_assessments)} energy assessments for {body.surveyor} and {body.project_code}"
)
assessments_map = {}
for assessment in energy_assessments:
uploaded_xmls = list_xmls_in_s3_folder(
bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
folder_name=os.path.join(assessment, "docs & plans")
)
uprn = int(assessment.rstrip("/").split("/")[-1])
assessments_map[uprn] = uploaded_xmls
logger.info(f"Exatracted XMLS for the energy assessments")
except IntegrityError: except IntegrityError:
logger.error("Database integrity error occurred", exc_info=True) logger.error("Database integrity error occurred", exc_info=True)
session.rollback() session.rollback()

View file

@ -3,5 +3,8 @@ from pydantic import BaseModel
class EnergyAssessmentUploadPayload(BaseModel): class EnergyAssessmentUploadPayload(BaseModel):
portfolio_id: int portfolio_id: int
# This is the s3 location, where the informaton collected during the energy assessment is stored # This is the energy assessment company/individual that conducted the energy assessment, where the data is uploaded
s3_filepath: str # against
surveyor: str
# is a code, like VEC001, which is used to identify the project and also where the data is uploaded against
project_code: str

View file

@ -15,7 +15,7 @@ USER_ID = 8
non_invasive_recommendations_filepath = "{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" non_invasive_recommendations_filepath = "{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
SCENARIOS = { SCENARIOS = {
101: { 101: {
"project_code": "VDE001", "project_code": "VEC001",
"surveyor": "JAFFERSONS ENERGY CONSULTANTS", "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
"bodies": [ "bodies": [
# Scenario A: Cavity wall insulation # Scenario A: Cavity wall insulation