From bf54d759f237e2f2fc1f1bb1d82e3b5fd16bf3e4 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 6 May 2025 16:09:47 +0000 Subject: [PATCH] pre site table is now made --- etl/db/hubSpotLoad.py | 4 ++- etl/surveyedData/surveryedData.py | 46 ++++++++++++++++++++++++------- etl/transform/preSiteNoteTypes.py | 33 +++++++++++++++++----- 3 files changed, 65 insertions(+), 18 deletions(-) diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index e6772ed..0556fec 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -94,4 +94,6 @@ class HubspotTodb(): # Loads Assessor information and Company information to db assessor = surveyedData.load_assessor_table(db_session) # Loads the pre site summary information - summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) \ No newline at end of file + summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) + # Creates the a final pre site note table that links all information + surveyedData.create_pre_site_note_table(db_session, assessor, summary_info) diff --git a/etl/surveyedData/surveryedData.py b/etl/surveyedData/surveryedData.py index 3837773..f0e5995 100644 --- a/etl/surveyedData/surveryedData.py +++ b/etl/surveyedData/surveryedData.py @@ -1,7 +1,10 @@ from etl.pdfReader.pdfReaderToText import pdfReaderToText from etl.pdfReader.reportType import ReportType import math -from etl.transform.preSiteNoteTypes import AssessorInfo, CompanyInfo, PreSiteNotesSummaryInfo +from etl.transform.preSiteNoteTypes import ( + AssessorInfo, CompanyInfo, + PreSiteNotesSummaryInfo, PreSiteNote, +) class surveyedDataProcessor(): def __init__(self, address, files): @@ -15,7 +18,6 @@ class surveyedDataProcessor(): def identify_files(self): for file in self.files: pdf = pdfReaderToText(file) - print(file) if pdf: if pdf.type == ReportType.QUIDOS_PRESITE_NOTE: self.pre_site_note = pdf.get_reader() @@ -40,6 +42,24 @@ class surveyedDataProcessor(): data_dict=company_data, lookup_field="trading_name" ) + + + def create_pre_site_note_table( + self, + db_session, + assessor, + summary_info + ): + preSiteNote = PreSiteNote( + summary_info_id=summary_info.id, + assessor_id=assessor.id, + ) + db_session.add(preSiteNote) + db_session.commit() + + return preSiteNote + + def upsert_record( self, @@ -64,39 +84,45 @@ class surveyedDataProcessor(): Returns: SQLAlchemy model instance. """ - lookup_value = data_dict.get(lookup_field) + # Filter out private/internal fields like _sa_instance_state + clean_data = { + k: v for k, v in data_dict.items() + if not k.startswith('_') and hasattr(model_class, k) + } + + # Merge additional fields if provided + if additional_fields: + clean_data.update(additional_fields) + + lookup_value = clean_data.get(lookup_field) if not lookup_value: raise ValueError(f"Missing lookup field '{lookup_field}' in data.") - # Merge in additional fields if provided - if additional_fields: - data_dict.update(additional_fields) - existing_record = db_session.query(model_class).filter( getattr(model_class, lookup_field) == lookup_value ).first() if existing_record: if update_if_exists: - for key, value in data_dict.items(): + for key, value in clean_data.items(): setattr(existing_record, key, value) db_session.commit() return existing_record else: - new_record = model_class(**data_dict) + new_record = model_class(**clean_data) db_session.add(new_record) db_session.commit() return new_record def load_assessor_table(self, db_session): company = self.load_company_table(db_session) + print(f"Company ID: {company.id}") assessor_data = self.pre_site_note.assessor_information.__dict__ return self.upsert_record( db_session=db_session, model_class=AssessorInfo, data_dict=assessor_data, lookup_field="accreditation_number", - update_if_exists=True, additional_fields={"company_id": company.id} ) diff --git a/etl/transform/preSiteNoteTypes.py b/etl/transform/preSiteNoteTypes.py index 2ba96ca..f90ac57 100644 --- a/etl/transform/preSiteNoteTypes.py +++ b/etl/transform/preSiteNoteTypes.py @@ -4,6 +4,7 @@ from datetime import datetime from pydantic import field_validator, EmailStr from typing import Optional, List from sqlalchemy import Column +from sqlalchemy.orm import relationship from sqlalchemy.dialects.postgresql import UUID class BaseModel(SQLModel): @@ -52,6 +53,12 @@ class PreSiteNotesSummaryInfo(BaseModel, table=True): current_annual_emission_including_0925_multiplayer: str current_annual_energy_costs: str + pre_site_note: Optional["PreSiteNote"] = Relationship( + back_populates="summary_info", + sa_relationship=relationship("PreSiteNote", back_populates="summary_info", uselist=False) + ) + + class Walls(BaseModel): construction: str insulation: str @@ -90,6 +97,8 @@ class AssessorInfo(BaseModel, table=True): foreign_key="companyinfo.id", # Referencing CompanyInfo nullable=False ) + + pre_site_notes: List["PreSiteNote"] = Relationship(back_populates="assessor") class VentilationAndCooling(BaseModel): no_of_open_fireplaces: int @@ -213,10 +222,20 @@ class Insulation(BaseModel): type: str # One class to rule them all -# class PreSiteNote(BaseModel, table=True): -# summary_info_id: uuid.UUID = Field( -# foreign_key="presitenotessummaryinfo.id", -# nullable=False -# ) -# # Relationship to summary info -# summary_info: Optional["PreSiteNotesSummaryInfo"] = Relationship(back_populates="pre_site_notes") \ No newline at end of file +class PreSiteNote(BaseModel, table=True): + # Summary Info + summary_info_id: uuid.UUID = Field( + foreign_key="presitenotessummaryinfo.id", + nullable=False + ) + + summary_info: Optional["PreSiteNotesSummaryInfo"] = Relationship(back_populates="pre_site_note") + + + # Assessor Info + assessor_id: uuid.UUID = Field( + foreign_key="assessorinfo.id", + nullable=False + ) + + assessor: Optional["AssessorInfo"] = Relationship(back_populates="pre_site_notes")