summary report and epr with data is now identified

This commit is contained in:
Jun-te Kim 2025-07-15 15:15:42 +00:00
parent 83ffa4234e
commit 0811339557
6 changed files with 49 additions and 21 deletions

View file

@ -2,6 +2,7 @@ from etl.surveyedData.surveryedData import surveyedDataProcessor
files = [
"/tmp/sharepoint/Sandwell/SANDWELL-001/26 Willow close B64 6EG/Content (13).pdf",
"/tmp/sharepoint/Livewest/Livewest-001/12 Birch End/Summary Information 12 Birch End.pdf"
]
from sqlalchemy.dialects.postgresql import UUID

View file

@ -1,7 +1,15 @@
from etl.utils.logger import Logger
import logging
import pymupdf
from etl.fileReader.sitenotes import QuidosSiteNotesExtractor, CSR, WarmHomesConditionReport, ECOConditionReport, RDSAPEnergyReport
from etl.fileReader.sitenotes import (
QuidosSiteNotesExtractor,
CSR,
WarmHomesConditionReport,
ECOConditionReport,
EnergyPerformanceReportWithData,
EnergyPerformanceReportSummaryInformation
)
from etl.fileReader.reportType import ReportType
from pprint import pprint
@ -46,8 +54,10 @@ class pdfReaderToText():
self.type = ReportType.WARM_HOMES_CONDITION_REPORT
elif "Domna NEW PAS 2035 ECO Condition Report".lower() in self.text_list[0].lower():
self.type = ReportType.ECO_CONDITION_REPORT
elif "ENERGY REPORT".lower() == self.text_list[0].lower():
self.type = ReportType.RDSAP_ENERGY_REPORT
elif "ENERGY REPORT".lower() == self.text_list[0].lower() and "Data inputs" in self.text_list:
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA
elif "Summary Information".lower() == self.text_list[0].lower():
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION
else:
pass
return self.type
@ -63,6 +73,7 @@ class pdfReaderToText():
return WarmHomesConditionReport(self.text_list)
elif self.type == ReportType.ECO_CONDITION_REPORT:
return ECOConditionReport(self.text_list)
elif self.type == ReportType.RDSAP_ENERGY_REPORT:
return RDSAPEnergyReport(self.text_list)
elif self.type == ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA:
return EnergyPerformanceReportWithData(self.text_list)
elif self.type == ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION:
return EnergyPerformanceReportSummaryInformation(self.text_list)

View file

@ -4,12 +4,12 @@ from enum import Enum
class ReportType(Enum):
QUIDOS_PRESITE_NOTE = "quidos_presite_note"
CHARTED_SURVEYOR_REPORT = "charted_surveyor_report"
ENERGY_PERFORMANCE_REPORT = "energy_performance_report"
U_VALUE_CALCULATOR_REPORT = "u_value_calculator_report"
OVERWRITING_U_VALUE_DECLARATION_FORM = "overwriting_u_value_declaration_form"
ECO_CONDITION_REPORT = "osmosis_condition_pas_2035_report"
WARM_HOMES_CONDITION_REPORT = "warm_homes_condition_pas_2035_report"
RDSAP_ENERGY_REPORT = "rdsap_energy_report"
ENERGY_PERFORMANCE_REPORT_WITH_DATA = "energy_performance_report_with_data"
ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION = "energy_performance_report_summary_information"
LIG_XML = "lodgement_xml_needed_for_lodgement_to_like_trademark"
RDSAP_XML = "reduce_xml_needed_to_generate_full_sap_xml"
FULLSAP_XML = "full_xml_needed_for_co_ordination"

View file

@ -88,14 +88,7 @@ class CSR(SiteNotesExtractor):
type=dict_.get('detailed_description_of_existing_cavity_wall_insulation_', "")
) if dict_ is not None else None
class RDSAPEnergyReport(SiteNotesExtractor):
def __init__(self, data_list):
super().__init__(data_list)
self.type = ReportType.RDSAP_ENERGY_REPORT
self.master_obj = self.setup_energy_report()
def setup_energy_report(self):
pass
class ECOConditionReport(SiteNotesExtractor):
def __init__(self, data_list):
@ -1597,4 +1590,20 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
main_gas_avalible=True if dict_.get("main_gas_available", "NO").upper() == "YES" else False,
)
class EnergyPerformanceReportWithData(SiteNotesExtractor):
def __init__(self, data_list):
super().__init__(data_list)
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA
self.master_obj = self.setup()
def setup(self):
pass
class EnergyPerformanceReportSummaryInformation(SiteNotesExtractor):
def __init__(self, data_list):
super().__init__(data_list)
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION
self.master_obj = self.setup()
def setup(self):
pass

View file

@ -80,8 +80,8 @@ class SubmissionInfoFromDeal(BaseModel):
if sdp.condition_report is None:
missing_items.append("Condition Report")
if sdp.energy_report is None:
missing_items.append("Energy Report PDF")
if sdp.epr_summary_information is None:
missing_items.append("EPR Energy report with data is missing")
if sdp.rd_sap_xml is None:
missing_items.append("RDSAP XML")
@ -89,6 +89,9 @@ class SubmissionInfoFromDeal(BaseModel):
if sdp.lig_sap_xml is None:
missing_items.append("LIG SAP XML")
if sdp.epr_summary_information is None:
missing_items.append("EPR Summary information is missing")
if missing_items:
raise ValueError(f"Missing required items: {', '.join(missing_items)}")

View file

@ -41,7 +41,8 @@ class surveyedDataProcessor():
self.csr = None
self.condition_report = None
self.hubspot_deal_id = None
self.energy_report = None
self.epr_with_data = None
self.epr_summary_information = None
self.full_sap_xml = None
self.lig_sap_xml = None
self.rd_sap_xml = None
@ -64,8 +65,11 @@ class surveyedDataProcessor():
self.condition_report = pdf.get_reader()
elif pdf.type == ReportType.ECO_CONDITION_REPORT:
self.condition_report = pdf.get_reader()
elif pdf.type == ReportType.RDSAP_ENERGY_REPORT:
self.energy_report = pdf.get_reader()
elif pdf.type == ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA:
self.epr_with_data = pdf.get_reader()
elif pdf.type == ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION:
self.epr_summary_information = pdf.get_reader()
elif file.lower().endswith('.xml'):
xml = xmlReader(file)
if xml: