mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
summary report and epr with data is now identified
This commit is contained in:
parent
83ffa4234e
commit
0811339557
6 changed files with 49 additions and 21 deletions
|
|
@ -2,6 +2,7 @@ from etl.surveyedData.surveryedData import surveyedDataProcessor
|
|||
|
||||
files = [
|
||||
"/tmp/sharepoint/Sandwell/SANDWELL-001/26 Willow close B64 6EG/Content (13).pdf",
|
||||
"/tmp/sharepoint/Livewest/Livewest-001/12 Birch End/Summary Information 12 Birch End.pdf"
|
||||
]
|
||||
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
|
|
|||
|
|
@ -1,7 +1,15 @@
|
|||
from etl.utils.logger import Logger
|
||||
import logging
|
||||
import pymupdf
|
||||
from etl.fileReader.sitenotes import QuidosSiteNotesExtractor, CSR, WarmHomesConditionReport, ECOConditionReport, RDSAPEnergyReport
|
||||
from etl.fileReader.sitenotes import (
|
||||
QuidosSiteNotesExtractor,
|
||||
CSR,
|
||||
WarmHomesConditionReport,
|
||||
ECOConditionReport,
|
||||
EnergyPerformanceReportWithData,
|
||||
EnergyPerformanceReportSummaryInformation
|
||||
|
||||
)
|
||||
from etl.fileReader.reportType import ReportType
|
||||
from pprint import pprint
|
||||
|
||||
|
|
@ -46,8 +54,10 @@ class pdfReaderToText():
|
|||
self.type = ReportType.WARM_HOMES_CONDITION_REPORT
|
||||
elif "Domna NEW PAS 2035 ECO Condition Report".lower() in self.text_list[0].lower():
|
||||
self.type = ReportType.ECO_CONDITION_REPORT
|
||||
elif "ENERGY REPORT".lower() == self.text_list[0].lower():
|
||||
self.type = ReportType.RDSAP_ENERGY_REPORT
|
||||
elif "ENERGY REPORT".lower() == self.text_list[0].lower() and "Data inputs" in self.text_list:
|
||||
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA
|
||||
elif "Summary Information".lower() == self.text_list[0].lower():
|
||||
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION
|
||||
else:
|
||||
pass
|
||||
return self.type
|
||||
|
|
@ -63,6 +73,7 @@ class pdfReaderToText():
|
|||
return WarmHomesConditionReport(self.text_list)
|
||||
elif self.type == ReportType.ECO_CONDITION_REPORT:
|
||||
return ECOConditionReport(self.text_list)
|
||||
elif self.type == ReportType.RDSAP_ENERGY_REPORT:
|
||||
return RDSAPEnergyReport(self.text_list)
|
||||
|
||||
elif self.type == ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA:
|
||||
return EnergyPerformanceReportWithData(self.text_list)
|
||||
elif self.type == ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION:
|
||||
return EnergyPerformanceReportSummaryInformation(self.text_list)
|
||||
|
|
@ -4,12 +4,12 @@ from enum import Enum
|
|||
class ReportType(Enum):
|
||||
QUIDOS_PRESITE_NOTE = "quidos_presite_note"
|
||||
CHARTED_SURVEYOR_REPORT = "charted_surveyor_report"
|
||||
ENERGY_PERFORMANCE_REPORT = "energy_performance_report"
|
||||
U_VALUE_CALCULATOR_REPORT = "u_value_calculator_report"
|
||||
OVERWRITING_U_VALUE_DECLARATION_FORM = "overwriting_u_value_declaration_form"
|
||||
ECO_CONDITION_REPORT = "osmosis_condition_pas_2035_report"
|
||||
WARM_HOMES_CONDITION_REPORT = "warm_homes_condition_pas_2035_report"
|
||||
RDSAP_ENERGY_REPORT = "rdsap_energy_report"
|
||||
ENERGY_PERFORMANCE_REPORT_WITH_DATA = "energy_performance_report_with_data"
|
||||
ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION = "energy_performance_report_summary_information"
|
||||
LIG_XML = "lodgement_xml_needed_for_lodgement_to_like_trademark"
|
||||
RDSAP_XML = "reduce_xml_needed_to_generate_full_sap_xml"
|
||||
FULLSAP_XML = "full_xml_needed_for_co_ordination"
|
||||
|
|
|
|||
|
|
@ -88,14 +88,7 @@ class CSR(SiteNotesExtractor):
|
|||
type=dict_.get('detailed_description_of_existing_cavity_wall_insulation_', "")
|
||||
) if dict_ is not None else None
|
||||
|
||||
class RDSAPEnergyReport(SiteNotesExtractor):
|
||||
def __init__(self, data_list):
|
||||
super().__init__(data_list)
|
||||
self.type = ReportType.RDSAP_ENERGY_REPORT
|
||||
self.master_obj = self.setup_energy_report()
|
||||
|
||||
def setup_energy_report(self):
|
||||
pass
|
||||
|
||||
class ECOConditionReport(SiteNotesExtractor):
|
||||
def __init__(self, data_list):
|
||||
|
|
@ -1597,4 +1590,20 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor):
|
|||
main_gas_avalible=True if dict_.get("main_gas_available", "NO").upper() == "YES" else False,
|
||||
)
|
||||
|
||||
|
||||
class EnergyPerformanceReportWithData(SiteNotesExtractor):
|
||||
def __init__(self, data_list):
|
||||
super().__init__(data_list)
|
||||
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA
|
||||
self.master_obj = self.setup()
|
||||
|
||||
def setup(self):
|
||||
pass
|
||||
|
||||
class EnergyPerformanceReportSummaryInformation(SiteNotesExtractor):
|
||||
def __init__(self, data_list):
|
||||
super().__init__(data_list)
|
||||
self.type = ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION
|
||||
self.master_obj = self.setup()
|
||||
|
||||
def setup(self):
|
||||
pass
|
||||
|
|
@ -80,8 +80,8 @@ class SubmissionInfoFromDeal(BaseModel):
|
|||
if sdp.condition_report is None:
|
||||
missing_items.append("Condition Report")
|
||||
|
||||
if sdp.energy_report is None:
|
||||
missing_items.append("Energy Report PDF")
|
||||
if sdp.epr_summary_information is None:
|
||||
missing_items.append("EPR Energy report with data is missing")
|
||||
|
||||
if sdp.rd_sap_xml is None:
|
||||
missing_items.append("RDSAP XML")
|
||||
|
|
@ -89,6 +89,9 @@ class SubmissionInfoFromDeal(BaseModel):
|
|||
if sdp.lig_sap_xml is None:
|
||||
missing_items.append("LIG SAP XML")
|
||||
|
||||
if sdp.epr_summary_information is None:
|
||||
missing_items.append("EPR Summary information is missing")
|
||||
|
||||
if missing_items:
|
||||
raise ValueError(f"Missing required items: {', '.join(missing_items)}")
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ class surveyedDataProcessor():
|
|||
self.csr = None
|
||||
self.condition_report = None
|
||||
self.hubspot_deal_id = None
|
||||
self.energy_report = None
|
||||
self.epr_with_data = None
|
||||
self.epr_summary_information = None
|
||||
self.full_sap_xml = None
|
||||
self.lig_sap_xml = None
|
||||
self.rd_sap_xml = None
|
||||
|
|
@ -64,8 +65,11 @@ class surveyedDataProcessor():
|
|||
self.condition_report = pdf.get_reader()
|
||||
elif pdf.type == ReportType.ECO_CONDITION_REPORT:
|
||||
self.condition_report = pdf.get_reader()
|
||||
elif pdf.type == ReportType.RDSAP_ENERGY_REPORT:
|
||||
self.energy_report = pdf.get_reader()
|
||||
elif pdf.type == ReportType.ENERGY_PERFORMANCE_REPORT_WITH_DATA:
|
||||
self.epr_with_data = pdf.get_reader()
|
||||
elif pdf.type == ReportType.ENERGY_PERFORMANCE_REPORT_SUMMARY_INFORMATION:
|
||||
self.epr_summary_information = pdf.get_reader()
|
||||
|
||||
elif file.lower().endswith('.xml'):
|
||||
xml = xmlReader(file)
|
||||
if xml:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue