From 64b423ad2e8f810e75048e3c447b9e820352c5e4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 4 Sep 2024 11:02:56 +0100 Subject: [PATCH] pulling out other file types from s3 during energy assessment upload --- backend/app/energy_assessments/router.py | 43 ++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py index c4e0308b..21c4e4c1 100644 --- a/backend/app/energy_assessments/router.py +++ b/backend/app/energy_assessments/router.py @@ -13,7 +13,10 @@ from sqlalchemy.orm import sessionmaker from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3 +from utils.s3 import ( + read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3, + list_files_in_s3_folder +) from utils.logger import setup_logger from etl.xml_survey_extraction.XmlParser import XmlParser import os @@ -68,8 +71,44 @@ async def upload(body: EnergyAssessmentUploadPayload): bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET, folder_name=os.path.join(assessment, "docs & plans") ) + + energy_assessment_files = list_files_in_s3_folder( + bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET, + folder_name=os.path.join(assessment, "docs & plans") + ) + # Remove xmls from the list of files + energy_assessment_files = [file for file in energy_assessment_files if file not in uploaded_xmls] + # We now split this into the different types of files + # EPR + eprs = [ + file for file in energy_assessment_files if "EPR.pdf" in file.split("/")[-1].replace(" ", "").lower() + ] + # Condition report + condition_reports = [ + file for file in energy_assessment_files if "cr.pdf" in file.split("/")[-1].replace(" ", "").lower() + ] + # Evidence report + evidence_reports = [ + file for file in energy_assessment_files + if "evidence.pdf" in file.split("/")[-1].replace(" ", "").lower() + ] + # Summary report + summary_reports = [ + file for file in energy_assessment_files + if "sn.pdf" in file.split("/")[-1].replace(" ", "").lower() + ] + # Floor plans - these are just the jpgs + floor_plans = [file for file in energy_assessment_files if file.endswith(".jpg")] + uprn = int(assessment.rstrip("/").split("/")[-1]) - assessments_map[uprn] = uploaded_xmls + assessments_map[uprn] = { + "xmls": uploaded_xmls, + "eprs": eprs, + "condition_reports": condition_reports, + "evidence_reports": evidence_reports, + "summary_reports": summary_reports, + "floor_plans": floor_plans + } logger.info(f"Exatracted XMLS for the energy assessments")