From 66343cd2205fc6a2b821c2bc6948f025fa696b1c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 21 May 2025 13:27:12 +0000 Subject: [PATCH] hubspot is done by each deal --- etl/db/hubSpotLoad.py | 76 ++++++++++++++++++------- etl/hubspot_to_invoice_rewrite.py | 16 +++--- etl/surveyPrice/surveyPrice.py | 94 ++++++++++++++++++------------- 3 files changed, 119 insertions(+), 67 deletions(-) diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index 4adaa71..db82946 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -3,6 +3,7 @@ from etl.surveyPrice.surveyPrice import SurveyPrice from etl.surveyedData.surveryedData import surveyedDataProcessor from etl.scraper.scraper import SharePointScraper, SharePointInstaller from etl.db.db import get_db_session, init_db +import pandas as pd from urllib.parse import unquote @@ -72,6 +73,37 @@ class HubspotTodb(): each_file.append(file_path) address_paths.update({address: each_file}) return address_paths + + def string_to_installer(self, installer): + if installer.upper() == "J & J CRUMP": + return SharePointInstaller.JJC + elif installer.upper() == "SCIS": + return SharePointInstaller.SOUTH_COAST_INSULATION + else: + return None + + def work_out_invoice(self, row): + survey = self.gather_data_from_sharepoint_url(row) + installer = self.string_to_installer(row["HUBSPOT_INSTALLER"]) + survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)]) + hubspot_data = pd.DataFrame([row]) + merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd) + return self.sp.calculate_all_price(merged_df) + + + + # self.sp.calculate_one_price_with_sharepoint_url(row, ) + + def gather_data_from_sharepoint_url(self, row): + sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"]) + path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"]) + data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"]) + + for add, file_loc in data_loc.items(): + sdp = surveyedDataProcessor(add, file_loc) + sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"] + return sdp + def gather_data_from_each_sharepoint(self): self.get_all_deals() @@ -90,29 +122,31 @@ class HubspotTodb(): if fast is False: self.gather_data_from_each_sharepoint() with get_db_session() as session: - self.load_pre_site_note(session) + self.load_all_pre_site_note(session) session.commit() - def load_pre_site_note(self, db_session): + def load_one_pre_site_note(self, db_session, surveyedData): + assessor = surveyedData.load_assessor_table(db_session) + # Loads the pre site summary information + summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) + + property_description = surveyedData.load_property_description(db_session) + + # Creates the a final pre site note table that links all information + presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description) + + df = self.deals_in_hubspot + df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)] + building_table = surveyedData.create_buildings_table( + db_session, + df["HUBSPOT_LANDLORD_ID"].values[0], + df["HUBSPOT_DOMNA_ID"].values[0], + ) + documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table) + + def load_all_pre_site_note(self, db_session): + # Loads all pre for surveyedData in self.data_in_sharepoint: - # Loads Assessor information and Company information to db - assessor = surveyedData.load_assessor_table(db_session) - # Loads the pre site summary information - summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) - - property_description = surveyedData.load_property_description(db_session) - - # Creates the a final pre site note table that links all information - presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description) - - df = self.deals_in_hubspot - df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)] - building_table = surveyedData.create_buildings_table( - db_session, - df["HUBSPOT_LANDLORD_ID"].values[0], - df["HUBSPOT_DOMNA_ID"].values[0], - ) - documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table) - # Create building table or find building table to add new pre_site_note + self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session) diff --git a/etl/hubspot_to_invoice_rewrite.py b/etl/hubspot_to_invoice_rewrite.py index 7ca77dc..ffe7edc 100644 --- a/etl/hubspot_to_invoice_rewrite.py +++ b/etl/hubspot_to_invoice_rewrite.py @@ -1,27 +1,29 @@ import os - - +import pandas as pd os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3" os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" + # Local development os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres" from etl.surveyPrice.surveyPrice import SurveyPrice from etl.db.hubSpotLoad import HubspotTodb - - # Load to db dbLoader = HubspotTodb() df = dbLoader.get_all_deals() +panda_final = [] +for index, rows in df.iterrows(): + invoice_row = dbLoader.work_out_invoice(rows) + panda_final.append(invoice_row) - - +panda_final = pd.concat(panda_final, ignore_index=True) +panda_final # For each deal # if not Validate: # move to a different stage, with option to add a note to this to state what the error was @@ -29,4 +31,4 @@ df = dbLoader.get_all_deals() # load to db and move deal correctly to correct position # # Once grand list is finsihed for price -# uploaad to sharepoint +# uploaad to sharepoint \ No newline at end of file diff --git a/etl/surveyPrice/surveyPrice.py b/etl/surveyPrice/surveyPrice.py index 408b42e..ae51422 100644 --- a/etl/surveyPrice/surveyPrice.py +++ b/etl/surveyPrice/surveyPrice.py @@ -14,6 +14,7 @@ class SurveyPrice(): self.master_rate_card_path = None self.all_hubspot_submissions = None self.all_survey_info_from_sharepoint = None + self.downloaded_price_card = False self.required_sheets = [ @@ -66,16 +67,17 @@ class SurveyPrice(): } def download_price_card(self): - url = None - # TODO: Some sanity checks to ensure rate cards title stays consistent - for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']: - if files['name'] == "MASTER RATE CARD.xlsx": - url = files['@microsoft.graph.downloadUrl'] - break + if self.downloaded_price_card is False: + url = None + # TODO: Some sanity checks to ensure rate cards title stays consistent + for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']: + if files['name'] == "MASTER RATE CARD.xlsx": + url = files['@microsoft.graph.downloadUrl'] + break - if url: - content = self.sharepoint_client.get_file_content(url) - self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx") + if url: + content = self.sharepoint_client.get_file_content(url) + self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx") return self.master_rate_card_path @@ -169,19 +171,7 @@ class SurveyPrice(): self.all_survey_info_from_sharepoint = pd.concat([jjc_pd, scis_pd], ignore_index=True) return self.all_survey_info_from_sharepoint - - def sharepoint_data_for_installer(self, installer): - - sp = SharePointScraper(installer) - file_paths = sp.download_file_for_each_address() - surveys = [] - - for eachAddress in tqdm(file_paths): - for address, files in eachAddress.items(): - surveys.append(surveyedDataProcessor(address, files)) - - all_survey_info = [] - for surveyInfo in surveys: + def survey_to_pandas_format(self, surveyInfo, installer): cavity_wall_as_built = False csr = False foam_insulation = False @@ -240,16 +230,28 @@ class SurveyPrice(): "DOMNA JOB TYPE": "ECO4 PV" }) + return info + + + def sharepoint_data_for_installer(self, installer): + + sp = SharePointScraper(installer) + file_paths = sp.download_file_for_each_address() + surveys = [] + + for eachAddress in tqdm(file_paths): + for address, files in eachAddress.items(): + surveys.append(surveyedDataProcessor(address, files)) + + all_survey_info = [] + for surveyInfo in surveys: + info = self.survey_to_pandas_format(surveyInfo) all_survey_info.append(info) return pd.DataFrame(all_survey_info) - def merge_hub_spot_and_survey_information(self): - if self.all_survey_info_from_sharepoint is None: - raise RuntimeError("No survey information found from Sharepoint") - if self.all_hubspot_submissions is None: - raise RuntimeError("No information found from Hubspot") + def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data): # Standardise address def extract_start_and_postcode(addr): @@ -261,23 +263,23 @@ class SurveyPrice(): return start, postcode # Extract start + postcode from both datasets - self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply( + survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply( lambda x: pd.Series(extract_start_and_postcode(x)) ) - self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply( + hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply( lambda x: pd.Series(extract_start_and_postcode(x)) ) # re-name to installer - self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename( + survey_data = survey_data.rename( columns={ 'SHAREPOINT FLOOR_AREA_BANDING': 'FLOOR_AREA_BANDING', } ) - self.all_hubspot_submissions = self.all_hubspot_submissions.rename( + hubspot_data = hubspot_data.rename( columns={ 'HUBSPOT_WETROOMS': 'NO_OF_WETROOMS', 'HUBSPOT_TRICKLE_VENT': 'TRICKLE_VENT', @@ -285,8 +287,8 @@ class SurveyPrice(): ) merged_df = pd.merge( - self.all_survey_info_from_sharepoint, - self.all_hubspot_submissions, + survey_data, + hubspot_data, on=['address_start', 'postcode'], how='inner' ) @@ -294,8 +296,6 @@ class SurveyPrice(): # if hubspot detects merged_df.drop(columns=['address_start', 'postcode'], inplace=True) - - def compute_energy_grant(row): pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1] post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1] @@ -320,12 +320,28 @@ class SurveyPrice(): merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1) return merged_df + - def calculate_all_price(self): + + def merge_hub_spot_and_survey_information(self): + if self.all_survey_info_from_sharepoint is None: + raise RuntimeError("No survey information found from Sharepoint") + if self.all_hubspot_submissions is None: + raise RuntimeError("No information found from Hubspot") + + merged_df = self.merge_hub_spot_and_survey_information_from_sharepoint_url(self.all_hubspot_submissions, self.all_survey_info_from_sharepoint) + return merged_df + + + def calculate_all_price(self, merged_data=None): self.download_price_card() - self.get_all_surveys_from_hubspot() - self.get_all_surveyed_data_from_sharepoint() - submission_data = self.merge_hub_spot_and_survey_information() + if merged_data is None: + self.get_all_surveys_from_hubspot() + self.get_all_surveyed_data_from_sharepoint() + submission_data = self.merge_hub_spot_and_survey_information() + else: + submission_data = merged_data + final_list = [] for _, row in submission_data.iterrows(): if "PV" in row["HUBSPOT_WORK_TYPE"].upper():