From 66343cd2205fc6a2b821c2bc6948f025fa696b1c Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte@domna.homes>
Date: Wed, 21 May 2025 13:27:12 +0000
Subject: [PATCH] hubspot is done by each deal

---
 etl/db/hubSpotLoad.py             | 76 ++++++++++++++++++-------
 etl/hubspot_to_invoice_rewrite.py | 16 +++---
 etl/surveyPrice/surveyPrice.py    | 94 ++++++++++++++++++-------------
 3 files changed, 119 insertions(+), 67 deletions(-)

diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py
index 4adaa71..db82946 100644
--- a/etl/db/hubSpotLoad.py
+++ b/etl/db/hubSpotLoad.py
@@ -3,6 +3,7 @@ from etl.surveyPrice.surveyPrice import SurveyPrice
 from etl.surveyedData.surveryedData import surveyedDataProcessor
 from etl.scraper.scraper import SharePointScraper, SharePointInstaller
 from etl.db.db import get_db_session, init_db
+import pandas as pd
 
 from urllib.parse import unquote
 
@@ -72,6 +73,37 @@ class HubspotTodb():
             each_file.append(file_path)
         address_paths.update({address: each_file})
         return address_paths
+    
+    def string_to_installer(self, installer):
+        if installer.upper() == "J & J CRUMP":
+            return SharePointInstaller.JJC
+        elif installer.upper() == "SCIS":
+            return SharePointInstaller.SOUTH_COAST_INSULATION
+        else:
+            return None
+
+    def work_out_invoice(self, row):
+        survey = self.gather_data_from_sharepoint_url(row)
+        installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
+        survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
+        hubspot_data = pd.DataFrame([row])
+        merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
+        return self.sp.calculate_all_price(merged_df)
+
+
+
+        # self.sp.calculate_one_price_with_sharepoint_url(row, )
+
+    def gather_data_from_sharepoint_url(self, row):
+        sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
+        path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
+        data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
+
+        for add, file_loc in data_loc.items():
+            sdp = surveyedDataProcessor(add, file_loc)
+            sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
+            return sdp
+
 
     def gather_data_from_each_sharepoint(self):
         self.get_all_deals()
@@ -90,29 +122,31 @@ class HubspotTodb():
         if fast is False:
             self.gather_data_from_each_sharepoint()
         with get_db_session() as session:
-            self.load_pre_site_note(session)
+            self.load_all_pre_site_note(session)
             session.commit()
 
-    def load_pre_site_note(self, db_session):
+    def load_one_pre_site_note(self, db_session, surveyedData):
+        assessor = surveyedData.load_assessor_table(db_session)
+        # Loads the pre site summary information
+        summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
+
+        property_description = surveyedData.load_property_description(db_session)
+
+        # Creates the a final pre site note table that links all information
+        presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
+
+        df = self.deals_in_hubspot
+        df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
+        building_table = surveyedData.create_buildings_table(
+            db_session,
+            df["HUBSPOT_LANDLORD_ID"].values[0],
+            df["HUBSPOT_DOMNA_ID"].values[0],
+        )
+        documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
+
+    def load_all_pre_site_note(self, db_session):
+        # Loads all pre
         for surveyedData in self.data_in_sharepoint:
-            # Loads Assessor information and Company information to db
-            assessor = surveyedData.load_assessor_table(db_session)
-            # Loads the pre site summary information
-            summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
-
-            property_description = surveyedData.load_property_description(db_session)
-
-            # Creates the a final pre site note table that links all information
-            presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
-
-            df = self.deals_in_hubspot
-            df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
-            building_table = surveyedData.create_buildings_table(
-                db_session,
-                df["HUBSPOT_LANDLORD_ID"].values[0],
-                df["HUBSPOT_DOMNA_ID"].values[0],
-            )
-            documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
-            # Create building table or find building table to add new pre_site_note
+            self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)
 
 
diff --git a/etl/hubspot_to_invoice_rewrite.py b/etl/hubspot_to_invoice_rewrite.py
index 7ca77dc..ffe7edc 100644
--- a/etl/hubspot_to_invoice_rewrite.py
+++ b/etl/hubspot_to_invoice_rewrite.py
@@ -1,27 +1,29 @@
 import os
-
-
+import pandas as pd
 os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
 os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
 os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
 os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
 os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
+
 # Local development
 os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres"
 
 from etl.surveyPrice.surveyPrice import SurveyPrice
 from etl.db.hubSpotLoad import HubspotTodb
 
-
-
 # Load to db
 dbLoader = HubspotTodb()
 
 df = dbLoader.get_all_deals()
 
+panda_final = []
+for index, rows in df.iterrows():
+    invoice_row = dbLoader.work_out_invoice(rows)
+    panda_final.append(invoice_row)
 
-
-
+panda_final = pd.concat(panda_final, ignore_index=True)
+panda_final
 # For each deal
 # if not Validate:
 #     move to a different stage, with option to add a note to this to state what the error was
@@ -29,4 +31,4 @@ df = dbLoader.get_all_deals()
 # load to db and move deal correctly to correct position
 # 
 # Once grand list is finsihed for price
-# uploaad to sharepoint
+# uploaad to sharepoint
\ No newline at end of file
diff --git a/etl/surveyPrice/surveyPrice.py b/etl/surveyPrice/surveyPrice.py
index 408b42e..ae51422 100644
--- a/etl/surveyPrice/surveyPrice.py
+++ b/etl/surveyPrice/surveyPrice.py
@@ -14,6 +14,7 @@ class SurveyPrice():
         self.master_rate_card_path = None
         self.all_hubspot_submissions = None
         self.all_survey_info_from_sharepoint = None
+        self.downloaded_price_card = False
 
 
         self.required_sheets = [
@@ -66,16 +67,17 @@ class SurveyPrice():
         }
 
     def download_price_card(self):
-        url = None
-        # TODO: Some sanity checks to ensure rate cards title stays consistent
-        for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
-            if files['name'] == "MASTER RATE CARD.xlsx":
-                url = files['@microsoft.graph.downloadUrl']
-                break
+        if self.downloaded_price_card is False:
+            url = None
+            # TODO: Some sanity checks to ensure rate cards title stays consistent
+            for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
+                if files['name'] == "MASTER RATE CARD.xlsx":
+                    url = files['@microsoft.graph.downloadUrl']
+                    break
 
-        if url:
-            content = self.sharepoint_client.get_file_content(url)
-            self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
+            if url:
+                content = self.sharepoint_client.get_file_content(url)
+                self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
 
         return self.master_rate_card_path
     
@@ -169,19 +171,7 @@ class SurveyPrice():
         self.all_survey_info_from_sharepoint = pd.concat([jjc_pd, scis_pd], ignore_index=True)
         return self.all_survey_info_from_sharepoint
     
-    
-    def sharepoint_data_for_installer(self, installer):
-
-        sp = SharePointScraper(installer)
-        file_paths = sp.download_file_for_each_address()
-        surveys = []
-
-        for eachAddress in tqdm(file_paths):
-            for address, files in eachAddress.items():
-                surveys.append(surveyedDataProcessor(address, files))
-
-        all_survey_info = []
-        for surveyInfo in surveys:
+    def survey_to_pandas_format(self, surveyInfo, installer):
             cavity_wall_as_built = False
             csr = False
             foam_insulation = False
@@ -240,16 +230,28 @@ class SurveyPrice():
                     "DOMNA JOB TYPE": "ECO4 PV"
                 })
 
+            return info
 
+    
+    
+    def sharepoint_data_for_installer(self, installer):
+
+        sp = SharePointScraper(installer)
+        file_paths = sp.download_file_for_each_address()
+        surveys = []
+
+        for eachAddress in tqdm(file_paths):
+            for address, files in eachAddress.items():
+                surveys.append(surveyedDataProcessor(address, files))
+
+        all_survey_info = []
+        for surveyInfo in surveys:
+            info = self.survey_to_pandas_format(surveyInfo)
             all_survey_info.append(info)
 
         return pd.DataFrame(all_survey_info)
     
-    def merge_hub_spot_and_survey_information(self):
-        if self.all_survey_info_from_sharepoint is None:
-            raise RuntimeError("No survey information found from Sharepoint")
-        if self.all_hubspot_submissions is None:
-            raise RuntimeError("No information found from Hubspot")
+    def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data):
 
         # Standardise address
         def extract_start_and_postcode(addr):
@@ -261,23 +263,23 @@ class SurveyPrice():
             return start, postcode
 
         # Extract start + postcode from both datasets
-        self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply(
+        survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
             lambda x: pd.Series(extract_start_and_postcode(x))
         )
 
-        self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply(
+        hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
             lambda x: pd.Series(extract_start_and_postcode(x))
         )
 
 
         # re-name to installer
-        self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename(
+        survey_data = survey_data.rename(
             columns={
                 'SHAREPOINT FLOOR_AREA_BANDING': 'FLOOR_AREA_BANDING',
             }
         )
 
-        self.all_hubspot_submissions = self.all_hubspot_submissions.rename(
+        hubspot_data = hubspot_data.rename(
             columns={
                 'HUBSPOT_WETROOMS': 'NO_OF_WETROOMS',
                 'HUBSPOT_TRICKLE_VENT': 'TRICKLE_VENT',
@@ -285,8 +287,8 @@ class SurveyPrice():
         )
 
         merged_df = pd.merge(
-            self.all_survey_info_from_sharepoint,
-            self.all_hubspot_submissions,
+            survey_data,
+            hubspot_data,
             on=['address_start', 'postcode'],
             how='inner'
         )
@@ -294,8 +296,6 @@ class SurveyPrice():
         # if hubspot detects 
 
         merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
-
-
         def compute_energy_grant(row):
             pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
             post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1]
@@ -320,12 +320,28 @@ class SurveyPrice():
         merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1)
 
         return merged_df
+
     
-    def calculate_all_price(self):
+    
+    def merge_hub_spot_and_survey_information(self):
+        if self.all_survey_info_from_sharepoint is None:
+            raise RuntimeError("No survey information found from Sharepoint")
+        if self.all_hubspot_submissions is None:
+            raise RuntimeError("No information found from Hubspot")
+
+        merged_df = self.merge_hub_spot_and_survey_information_from_sharepoint_url(self.all_hubspot_submissions, self.all_survey_info_from_sharepoint)
+        return merged_df
+    
+
+    def calculate_all_price(self, merged_data=None):
         self.download_price_card()
-        self.get_all_surveys_from_hubspot()
-        self.get_all_surveyed_data_from_sharepoint()
-        submission_data = self.merge_hub_spot_and_survey_information()
+        if merged_data is None:
+            self.get_all_surveys_from_hubspot()
+            self.get_all_surveyed_data_from_sharepoint()
+            submission_data = self.merge_hub_spot_and_survey_information()
+        else:
+            submission_data = merged_data
+        
         final_list = []
         for _, row in submission_data.iterrows():
             if "PV" in row["HUBSPOT_WORK_TYPE"].upper():