diff --git a/etl/condition_report_etl.py b/etl/condition_report_etl.py index e75154b..47c7af4 100644 --- a/etl/condition_report_etl.py +++ b/etl/condition_report_etl.py @@ -1 +1,4 @@ -print("hello world") \ No newline at end of file +from etl.surveyedData.surveryedData import surveyedDataProcessor + +condition_report_file_path = "/workspaces/survey-extractor/etl/files/osmosis_condition_report.pdf" +sdp = surveyedDataProcessor("123 Fake Street", [condition_report_file_path]) diff --git a/etl/pdfReader/pdfReaderToText.py b/etl/pdfReader/pdfReaderToText.py index 5ea7b42..e8a1bef 100644 --- a/etl/pdfReader/pdfReaderToText.py +++ b/etl/pdfReader/pdfReaderToText.py @@ -1,7 +1,7 @@ from etl.utils.logger import Logger import logging import pymupdf -from etl.pdfReader.sitenotes import QuidosSiteNotesExtractor, CSR +from etl.pdfReader.sitenotes import QuidosSiteNotesExtractor, CSR, ConditionReport from etl.pdfReader.reportType import ReportType class pdfReaderToText(): @@ -40,6 +40,8 @@ class pdfReaderToText(): self.type = ReportType.ENERGY_PERFORMANCE_REPORT elif "Chartered Surveyor Report: Recommending Extraction of Defective Cavity Wall Insulation " in self.text_list: self.type = ReportType.CHARTED_SURVEYOR_REPORT + elif "Osmosis ACD PAS 2035 Condition Report".lower() in self.text_list[0].lower(): + self.type = ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT else: pass return self.type @@ -51,4 +53,6 @@ class pdfReaderToText(): return QuidosSiteNotesExtractor(self.text_list) elif self.type == ReportType.CHARTED_SURVEYOR_REPORT: return CSR(self.text_list) + elif self.type == ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT: + return ConditionReport(self.text_list) \ No newline at end of file diff --git a/etl/pdfReader/reportType.py b/etl/pdfReader/reportType.py index c77ad9f..61bc908 100644 --- a/etl/pdfReader/reportType.py +++ b/etl/pdfReader/reportType.py @@ -6,4 +6,5 @@ class ReportType(Enum): CHARTED_SURVEYOR_REPORT = "charted_surveyor_report" ENERGY_PERFORMANCE_REPORT = "energy_performance_report" U_VALUE_CALCULATOR_REPORT = "u_value_calculator_report" - OVERWRITING_U_VALUE_DECLARATION_FORM = "overwriting_u_value_declaration_form" \ No newline at end of file + OVERWRITING_U_VALUE_DECLARATION_FORM = "overwriting_u_value_declaration_form" + OSMOSIS_CONDITION_PAS_2035_REPORT = "osmosis_condition_pas_2035_report" \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 7f36384..c4c06ba 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -8,7 +8,11 @@ from etl.transform.preSiteNoteTypes import ( WindTurbine, OtherDetails, Windows, Heating, HeatingSystemControls, HeatingType, Insulation ) +from etl.transform.conditionReportTypes import ( + ConditionReport +) from datetime import datetime +from pprint import pprint class SiteNotesExtractor(): def __init__(self, data_list): @@ -58,8 +62,17 @@ class CSR(SiteNotesExtractor): self.insulation_info = Insulation( type=dict_.get('detailed_description_of_existing_cavity_wall_insulation_', "") ) if dict_ is not None else None - - + + +class ConditionReport(SiteNotesExtractor): + def __init__(self, data_list): + super().__init__(data_list) + self.type = ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT + self.setup() + + def setup(self): + self.property_reference_code() + class QuidosSiteNotesExtractor(SiteNotesExtractor): diff --git a/etl/surveyedData/surveryedData.py b/etl/surveyedData/surveryedData.py index 427a9a4..8525278 100644 --- a/etl/surveyedData/surveryedData.py +++ b/etl/surveyedData/surveryedData.py @@ -21,6 +21,7 @@ class surveyedDataProcessor(): self.files = files self.pre_site_note = None self.csr = None + self.condition_report = None self.identify_files() self.hubspot_deal_id = None @@ -34,6 +35,8 @@ class surveyedDataProcessor(): self.address = self.pre_site_note.survey_information.address elif pdf.type == ReportType.CHARTED_SURVEYOR_REPORT: self.csr = pdf.get_reader() + elif pdf.type == ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT: + self.condition_report = pdf.get_reader() def load_pre_site_notes_summary_table(self, db_session): summary_data = self.pre_site_note.survey_information.model_dump()