data saved

This commit is contained in:
Jun-te Kim 2025-05-30 14:32:37 +00:00
parent a4f239a0fd
commit 1d756b95b2
5 changed files with 29 additions and 5 deletions

View file

@ -1 +1,4 @@
print("hello world")
from etl.surveyedData.surveryedData import surveyedDataProcessor
condition_report_file_path = "/workspaces/survey-extractor/etl/files/osmosis_condition_report.pdf"
sdp = surveyedDataProcessor("123 Fake Street", [condition_report_file_path])

View file

@ -1,7 +1,7 @@
from etl.utils.logger import Logger
import logging
import pymupdf
from etl.pdfReader.sitenotes import QuidosSiteNotesExtractor, CSR
from etl.pdfReader.sitenotes import QuidosSiteNotesExtractor, CSR, ConditionReport
from etl.pdfReader.reportType import ReportType
class pdfReaderToText():
@ -40,6 +40,8 @@ class pdfReaderToText():
self.type = ReportType.ENERGY_PERFORMANCE_REPORT
elif "Chartered Surveyor Report: Recommending Extraction of Defective Cavity Wall Insulation " in self.text_list:
self.type = ReportType.CHARTED_SURVEYOR_REPORT
elif "Osmosis ACD PAS 2035 Condition Report".lower() in self.text_list[0].lower():
self.type = ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT
else:
pass
return self.type
@ -51,4 +53,6 @@ class pdfReaderToText():
return QuidosSiteNotesExtractor(self.text_list)
elif self.type == ReportType.CHARTED_SURVEYOR_REPORT:
return CSR(self.text_list)
elif self.type == ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT:
return ConditionReport(self.text_list)

View file

@ -6,4 +6,5 @@ class ReportType(Enum):
CHARTED_SURVEYOR_REPORT = "charted_surveyor_report"
ENERGY_PERFORMANCE_REPORT = "energy_performance_report"
U_VALUE_CALCULATOR_REPORT = "u_value_calculator_report"
OVERWRITING_U_VALUE_DECLARATION_FORM = "overwriting_u_value_declaration_form"
OVERWRITING_U_VALUE_DECLARATION_FORM = "overwriting_u_value_declaration_form"
OSMOSIS_CONDITION_PAS_2035_REPORT = "osmosis_condition_pas_2035_report"

View file

@ -8,7 +8,11 @@ from etl.transform.preSiteNoteTypes import (
WindTurbine, OtherDetails, Windows, Heating, HeatingSystemControls,
HeatingType, Insulation
)
from etl.transform.conditionReportTypes import (
ConditionReport
)
from datetime import datetime
from pprint import pprint
class SiteNotesExtractor():
def __init__(self, data_list):
@ -58,8 +62,17 @@ class CSR(SiteNotesExtractor):
self.insulation_info = Insulation(
type=dict_.get('detailed_description_of_existing_cavity_wall_insulation_', "")
) if dict_ is not None else None
class ConditionReport(SiteNotesExtractor):
def __init__(self, data_list):
super().__init__(data_list)
self.type = ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT
self.setup()
def setup(self):
self.property_reference_code()
class QuidosSiteNotesExtractor(SiteNotesExtractor):

View file

@ -21,6 +21,7 @@ class surveyedDataProcessor():
self.files = files
self.pre_site_note = None
self.csr = None
self.condition_report = None
self.identify_files()
self.hubspot_deal_id = None
@ -34,6 +35,8 @@ class surveyedDataProcessor():
self.address = self.pre_site_note.survey_information.address
elif pdf.type == ReportType.CHARTED_SURVEYOR_REPORT:
self.csr = pdf.get_reader()
elif pdf.type == ReportType.OSMOSIS_CONDITION_PAS_2035_REPORT:
self.condition_report = pdf.get_reader()
def load_pre_site_notes_summary_table(self, db_session):
summary_data = self.pre_site_note.survey_information.model_dump()