hubspot is done by each deal

2026-06-30 13:10:56 +00:00 · 2025-05-21 13:27:12 +00:00 · 2025-05-21 13:27:12 +00:00 · 66343cd220
commit 66343cd220
parent 0ecb5dce9d
3 changed files with 119 additions and 67 deletions
--- a/etl/db/hubSpotLoad.py
+++ b/etl/db/hubSpotLoad.py
@ -3,6 +3,7 @@ from etl.surveyPrice.surveyPrice import SurveyPrice
 from etl.surveyedData.surveryedData import surveyedDataProcessor
 from etl.scraper.scraper import SharePointScraper, SharePointInstaller
 from etl.db.db import get_db_session, init_db
+import pandas as pd

 from urllib.parse import unquote

@ -72,6 +73,37 @@ class HubspotTodb():
            each_file.append(file_path)
        address_paths.update({address: each_file})
        return address_paths
+    
+    def string_to_installer(self, installer):
+        if installer.upper() == "J & J CRUMP":
+            return SharePointInstaller.JJC
+        elif installer.upper() == "SCIS":
+            return SharePointInstaller.SOUTH_COAST_INSULATION
+        else:
+            return None
+
+    def work_out_invoice(self, row):
+        survey = self.gather_data_from_sharepoint_url(row)
+        installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
+        survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
+        hubspot_data = pd.DataFrame([row])
+        merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
+        return self.sp.calculate_all_price(merged_df)
+
+
+
+        # self.sp.calculate_one_price_with_sharepoint_url(row, )
+
+    def gather_data_from_sharepoint_url(self, row):
+        sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
+        path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
+        data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
+
+        for add, file_loc in data_loc.items():
+            sdp = surveyedDataProcessor(add, file_loc)
+            sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
+            return sdp
+

    def gather_data_from_each_sharepoint(self):
        self.get_all_deals()
@ -90,29 +122,31 @@ class HubspotTodb():
        if fast is False:
            self.gather_data_from_each_sharepoint()
        with get_db_session() as session:
-            self.load_pre_site_note(session)
+            self.load_all_pre_site_note(session)
            session.commit()

-    def load_pre_site_note(self, db_session):
+    def load_one_pre_site_note(self, db_session, surveyedData):
+        assessor = surveyedData.load_assessor_table(db_session)
+        # Loads the pre site summary information
+        summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
+
+        property_description = surveyedData.load_property_description(db_session)
+
+        # Creates the a final pre site note table that links all information
+        presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
+
+        df = self.deals_in_hubspot
+        df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
+        building_table = surveyedData.create_buildings_table(
+            db_session,
+            df["HUBSPOT_LANDLORD_ID"].values[0],
+            df["HUBSPOT_DOMNA_ID"].values[0],
+        )
+        documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
+
+    def load_all_pre_site_note(self, db_session):
+        # Loads all pre
        for surveyedData in self.data_in_sharepoint:
-            # Loads Assessor information and Company information to db
-            assessor = surveyedData.load_assessor_table(db_session)
-            # Loads the pre site summary information
-            summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
-
-            property_description = surveyedData.load_property_description(db_session)
-
-            # Creates the a final pre site note table that links all information
-            presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
-
-            df = self.deals_in_hubspot
-            df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
-            building_table = surveyedData.create_buildings_table(
-                db_session,
-                df["HUBSPOT_LANDLORD_ID"].values[0],
-                df["HUBSPOT_DOMNA_ID"].values[0],
-            )
-            documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
-            # Create building table or find building table to add new pre_site_note
+            self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)


--- a/etl/hubspot_to_invoice_rewrite.py
+++ b/etl/hubspot_to_invoice_rewrite.py
@ -1,27 +1,29 @@
 import os
-
-
+import pandas as pd
 os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
 os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
 os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
 os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
 os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
+
 # Local development
 os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres"

 from etl.surveyPrice.surveyPrice import SurveyPrice
 from etl.db.hubSpotLoad import HubspotTodb

-
-
 # Load to db
 dbLoader = HubspotTodb()

 df = dbLoader.get_all_deals()

+panda_final = []
+for index, rows in df.iterrows():
+    invoice_row = dbLoader.work_out_invoice(rows)
+    panda_final.append(invoice_row)

-
-
+panda_final = pd.concat(panda_final, ignore_index=True)
+panda_final
 # For each deal
 # if not Validate:
 #     move to a different stage, with option to add a note to this to state what the error was
@ -29,4 +31,4 @@ df = dbLoader.get_all_deals()
 # load to db and move deal correctly to correct position
 # 
 # Once grand list is finsihed for price
-# uploaad to sharepoint
+# uploaad to sharepoint
--- a/etl/surveyPrice/surveyPrice.py
+++ b/etl/surveyPrice/surveyPrice.py
@ -14,6 +14,7 @@ class SurveyPrice():
        self.master_rate_card_path = None
        self.all_hubspot_submissions = None
        self.all_survey_info_from_sharepoint = None
+        self.downloaded_price_card = False


        self.required_sheets = [
@ -66,16 +67,17 @@ class SurveyPrice():
        }

    def download_price_card(self):
-        url = None
-        # TODO: Some sanity checks to ensure rate cards title stays consistent
-        for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
-            if files['name'] == "MASTER RATE CARD.xlsx":
-                url = files['@microsoft.graph.downloadUrl']
-                break
+        if self.downloaded_price_card is False:
+            url = None
+            # TODO: Some sanity checks to ensure rate cards title stays consistent
+            for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
+                if files['name'] == "MASTER RATE CARD.xlsx":
+                    url = files['@microsoft.graph.downloadUrl']
+                    break

-        if url:
-            content = self.sharepoint_client.get_file_content(url)
-            self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
+            if url:
+                content = self.sharepoint_client.get_file_content(url)
+                self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")

        return self.master_rate_card_path
    
@ -169,19 +171,7 @@ class SurveyPrice():
        self.all_survey_info_from_sharepoint = pd.concat([jjc_pd, scis_pd], ignore_index=True)
        return self.all_survey_info_from_sharepoint
    
-    
-    def sharepoint_data_for_installer(self, installer):
-
-        sp = SharePointScraper(installer)
-        file_paths = sp.download_file_for_each_address()
-        surveys = []
-
-        for eachAddress in tqdm(file_paths):
-            for address, files in eachAddress.items():
-                surveys.append(surveyedDataProcessor(address, files))
-
-        all_survey_info = []
-        for surveyInfo in surveys:
+    def survey_to_pandas_format(self, surveyInfo, installer):
            cavity_wall_as_built = False
            csr = False
            foam_insulation = False
@ -240,16 +230,28 @@ class SurveyPrice():
                    "DOMNA JOB TYPE": "ECO4 PV"
                })

+            return info

+    
+    
+    def sharepoint_data_for_installer(self, installer):
+
+        sp = SharePointScraper(installer)
+        file_paths = sp.download_file_for_each_address()
+        surveys = []
+
+        for eachAddress in tqdm(file_paths):
+            for address, files in eachAddress.items():
+                surveys.append(surveyedDataProcessor(address, files))
+
+        all_survey_info = []
+        for surveyInfo in surveys:
+            info = self.survey_to_pandas_format(surveyInfo)
            all_survey_info.append(info)

        return pd.DataFrame(all_survey_info)
    
-    def merge_hub_spot_and_survey_information(self):
-        if self.all_survey_info_from_sharepoint is None:
-            raise RuntimeError("No survey information found from Sharepoint")
-        if self.all_hubspot_submissions is None:
-            raise RuntimeError("No information found from Hubspot")
+    def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data):

        # Standardise address
        def extract_start_and_postcode(addr):
@ -261,23 +263,23 @@ class SurveyPrice():
            return start, postcode

        # Extract start + postcode from both datasets
-        self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply(
+        survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
            lambda x: pd.Series(extract_start_and_postcode(x))
        )

-        self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply(
+        hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
            lambda x: pd.Series(extract_start_and_postcode(x))
        )


        # re-name to installer
-        self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename(
+        survey_data = survey_data.rename(
            columns={
                'SHAREPOINT FLOOR_AREA_BANDING': 'FLOOR_AREA_BANDING',
            }
        )

-        self.all_hubspot_submissions = self.all_hubspot_submissions.rename(
+        hubspot_data = hubspot_data.rename(
            columns={
                'HUBSPOT_WETROOMS': 'NO_OF_WETROOMS',
                'HUBSPOT_TRICKLE_VENT': 'TRICKLE_VENT',
@ -285,8 +287,8 @@ class SurveyPrice():
        )

        merged_df = pd.merge(
-            self.all_survey_info_from_sharepoint,
-            self.all_hubspot_submissions,
+            survey_data,
+            hubspot_data,
            on=['address_start', 'postcode'],
            how='inner'
        )
@ -294,8 +296,6 @@ class SurveyPrice():
        # if hubspot detects 

        merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
-
-
        def compute_energy_grant(row):
            pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
            post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1]
@ -320,12 +320,28 @@ class SurveyPrice():
        merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1)

        return merged_df
+
    
-    def calculate_all_price(self):
+    
+    def merge_hub_spot_and_survey_information(self):
+        if self.all_survey_info_from_sharepoint is None:
+            raise RuntimeError("No survey information found from Sharepoint")
+        if self.all_hubspot_submissions is None:
+            raise RuntimeError("No information found from Hubspot")
+
+        merged_df = self.merge_hub_spot_and_survey_information_from_sharepoint_url(self.all_hubspot_submissions, self.all_survey_info_from_sharepoint)
+        return merged_df
+    
+
+    def calculate_all_price(self, merged_data=None):
        self.download_price_card()
-        self.get_all_surveys_from_hubspot()
-        self.get_all_surveyed_data_from_sharepoint()
-        submission_data = self.merge_hub_spot_and_survey_information()
+        if merged_data is None:
+            self.get_all_surveys_from_hubspot()
+            self.get_all_surveyed_data_from_sharepoint()
+            submission_data = self.merge_hub_spot_and_survey_information()
+        else:
+            submission_data = merged_data
+        
        final_list = []
        for _, row in submission_data.iterrows():
            if "PV" in row["HUBSPOT_WORK_TYPE"].upper():