diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index a2b19cb..e6772ed 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -91,7 +91,7 @@ class HubspotTodb(): def load_pre_site_note(self, db_session): for surveyedData in self.data_in_sharepoint: - surveyedData.load_assessor_table(db_session) - - - + # Loads Assessor information and Company information to db + assessor = surveyedData.load_assessor_table(db_session) + # Loads the pre site summary information + summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) \ No newline at end of file diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index 8f33bc9..43b17e8 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -1,6 +1,6 @@ from etl.pdfReader.reportType import ReportType from etl.transform.preSiteNoteTypes import ( - CompanyInfo, SurverySummaryInfo, AssessorInfo, + CompanyInfo, PreSiteNotesSummaryInfo, AssessorInfo, PropertyDescription, PropertyDetail, Dimension, Walls, Roofs, Floors, Door, VentilationAndCooling, Lighting, WaterHeating, HotWaterCylinder, SolarWaterHeating, @@ -122,7 +122,7 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): else: including_9_92_emission_factor = None - self.survey_information = SurverySummaryInfo( + self.survey_information = PreSiteNotesSummaryInfo( reference_number = get_value('Reference Number'), epc_language = get_value('EPC Language'), uprn = get_value('UPRN'), diff --git a/etl/surveyedData/surveryedData.py b/etl/surveyedData/surveryedData.py index 3bfb418..3837773 100644 --- a/etl/surveyedData/surveryedData.py +++ b/etl/surveyedData/surveryedData.py @@ -1,7 +1,7 @@ from etl.pdfReader.pdfReaderToText import pdfReaderToText from etl.pdfReader.reportType import ReportType import math -from etl.transform.preSiteNoteTypes import AssessorInfo, CompanyInfo +from etl.transform.preSiteNoteTypes import AssessorInfo, CompanyInfo, PreSiteNotesSummaryInfo class surveyedDataProcessor(): def __init__(self, address, files): @@ -23,43 +23,82 @@ class surveyedDataProcessor(): elif pdf.type == ReportType.CHARTED_SURVEYOR_REPORT: self.csr = pdf.get_reader() + def load_pre_site_notes_summary_table(self, db_session): + summary_data = self.pre_site_note.survey_information.__dict__ + return self.upsert_record( + db_session=db_session, + model_class=PreSiteNotesSummaryInfo, + data_dict=summary_data, + lookup_field="reference_number" + ) + def load_company_table(self, db_session): company_data = self.pre_site_note.company_information.__dict__ + return self.upsert_record( + db_session=db_session, + model_class=CompanyInfo, + data_dict=company_data, + lookup_field="trading_name" + ) + + def upsert_record( + self, + db_session, + model_class, + data_dict, + lookup_field: str, + update_if_exists: bool = False, + additional_fields: dict = None + ): + """ + Upserts a SQLAlchemy model instance based on a lookup field. - company_name = company_data.get('trading_name') + Args: + db_session: SQLAlchemy session. + model_class: SQLAlchemy model/table class. + data_dict: Dictionary of data for creating the model. + lookup_field: Unique identifier field name (str). + update_if_exists: Whether to update existing record or not. + additional_fields: Extra fields to inject (e.g., foreign keys). - existing_company = db_session.query(CompanyInfo).filter_by( - trading_name=company_name + Returns: + SQLAlchemy model instance. + """ + lookup_value = data_dict.get(lookup_field) + if not lookup_value: + raise ValueError(f"Missing lookup field '{lookup_field}' in data.") + + # Merge in additional fields if provided + if additional_fields: + data_dict.update(additional_fields) + + existing_record = db_session.query(model_class).filter( + getattr(model_class, lookup_field) == lookup_value ).first() - if existing_company: - return existing_company + if existing_record: + if update_if_exists: + for key, value in data_dict.items(): + setattr(existing_record, key, value) + db_session.commit() + return existing_record else: - new_company = CompanyInfo(**company_data) - db_session.add(new_company) + new_record = model_class(**data_dict) + db_session.add(new_record) db_session.commit() - return new_company - + return new_record + def load_assessor_table(self, db_session): company = self.load_company_table(db_session) assessor_data = self.pre_site_note.assessor_information.__dict__ - assessor_data['company_id'] = company.id - - accreditation_number = assessor_data.get('accreditation_number') - - existing_assessor = db_session.query(AssessorInfo).filter_by( - accreditation_number=accreditation_number - ).first() - - if existing_assessor: - # Update existing record - for key, value in assessor_data.items(): - setattr(existing_assessor, key, value) - else: - # Insert new record - db_session.add(AssessorInfo(**assessor_data)) - - db_session.commit() + return self.upsert_record( + db_session=db_session, + model_class=AssessorInfo, + data_dict=assessor_data, + lookup_field="accreditation_number", + update_if_exists=True, + additional_fields={"company_id": company.id} + ) def get_insulation_info(self): if self.csr: diff --git a/etl/transform/preSiteNoteTypes.py b/etl/transform/preSiteNoteTypes.py index 278e153..2ba96ca 100644 --- a/etl/transform/preSiteNoteTypes.py +++ b/etl/transform/preSiteNoteTypes.py @@ -1,4 +1,4 @@ -from sqlmodel import Field, SQLModel +from sqlmodel import Field, SQLModel, Relationship import uuid from datetime import datetime from pydantic import field_validator, EmailStr @@ -32,7 +32,7 @@ class CompanyInfo(BaseModel, table=True): return None return v -class SurverySummaryInfo(BaseModel): +class PreSiteNotesSummaryInfo(BaseModel, table=True): reference_number: str epc_language: str uprn: Optional[str] = "" @@ -51,7 +51,7 @@ class SurverySummaryInfo(BaseModel): current_annual_emissions: str current_annual_emission_including_0925_multiplayer: str current_annual_energy_costs: str - + class Walls(BaseModel): construction: str insulation: str @@ -209,11 +209,14 @@ class PropertyDescription(BaseModel): mainHeating2: Optional[Heating] secondaryHeatingType: Optional[HeatingType] -# class PropertyReport(): - # TODO: Property description - # TODO: Due consideration foregin key - # TODO: Which company did it (Osmosis, Warmfront etc) - # TODO: Links to more foreign keys per report etc - class Insulation(BaseModel): - type: str \ No newline at end of file + type: str + +# One class to rule them all +# class PreSiteNote(BaseModel, table=True): +# summary_info_id: uuid.UUID = Field( +# foreign_key="presitenotessummaryinfo.id", +# nullable=False +# ) +# # Relationship to summary info +# summary_info: Optional["PreSiteNotesSummaryInfo"] = Relationship(back_populates="pre_site_notes") \ No newline at end of file