hubspot is done by each deal

This commit is contained in:
Jun-te Kim 2025-05-21 13:27:12 +00:00
parent 0ecb5dce9d
commit 66343cd220
3 changed files with 119 additions and 67 deletions

View file

@ -3,6 +3,7 @@ from etl.surveyPrice.surveyPrice import SurveyPrice
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
from etl.db.db import get_db_session, init_db
import pandas as pd
from urllib.parse import unquote
@ -72,6 +73,37 @@ class HubspotTodb():
each_file.append(file_path)
address_paths.update({address: each_file})
return address_paths
def string_to_installer(self, installer):
if installer.upper() == "J & J CRUMP":
return SharePointInstaller.JJC
elif installer.upper() == "SCIS":
return SharePointInstaller.SOUTH_COAST_INSULATION
else:
return None
def work_out_invoice(self, row):
survey = self.gather_data_from_sharepoint_url(row)
installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
hubspot_data = pd.DataFrame([row])
merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
return self.sp.calculate_all_price(merged_df)
# self.sp.calculate_one_price_with_sharepoint_url(row, )
def gather_data_from_sharepoint_url(self, row):
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
for add, file_loc in data_loc.items():
sdp = surveyedDataProcessor(add, file_loc)
sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
return sdp
def gather_data_from_each_sharepoint(self):
self.get_all_deals()
@ -90,29 +122,31 @@ class HubspotTodb():
if fast is False:
self.gather_data_from_each_sharepoint()
with get_db_session() as session:
self.load_pre_site_note(session)
self.load_all_pre_site_note(session)
session.commit()
def load_pre_site_note(self, db_session):
def load_one_pre_site_note(self, db_session, surveyedData):
assessor = surveyedData.load_assessor_table(db_session)
# Loads the pre site summary information
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
property_description = surveyedData.load_property_description(db_session)
# Creates the a final pre site note table that links all information
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
df = self.deals_in_hubspot
df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
building_table = surveyedData.create_buildings_table(
db_session,
df["HUBSPOT_LANDLORD_ID"].values[0],
df["HUBSPOT_DOMNA_ID"].values[0],
)
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
def load_all_pre_site_note(self, db_session):
# Loads all pre
for surveyedData in self.data_in_sharepoint:
# Loads Assessor information and Company information to db
assessor = surveyedData.load_assessor_table(db_session)
# Loads the pre site summary information
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
property_description = surveyedData.load_property_description(db_session)
# Creates the a final pre site note table that links all information
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
df = self.deals_in_hubspot
df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
building_table = surveyedData.create_buildings_table(
db_session,
df["HUBSPOT_LANDLORD_ID"].values[0],
df["HUBSPOT_DOMNA_ID"].values[0],
)
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
# Create building table or find building table to add new pre_site_note
self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)

View file

@ -1,27 +1,29 @@
import os
import pandas as pd
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
# Local development
os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres"
from etl.surveyPrice.surveyPrice import SurveyPrice
from etl.db.hubSpotLoad import HubspotTodb
# Load to db
dbLoader = HubspotTodb()
df = dbLoader.get_all_deals()
panda_final = []
for index, rows in df.iterrows():
invoice_row = dbLoader.work_out_invoice(rows)
panda_final.append(invoice_row)
panda_final = pd.concat(panda_final, ignore_index=True)
panda_final
# For each deal
# if not Validate:
# move to a different stage, with option to add a note to this to state what the error was
@ -29,4 +31,4 @@ df = dbLoader.get_all_deals()
# load to db and move deal correctly to correct position
#
# Once grand list is finsihed for price
# uploaad to sharepoint
# uploaad to sharepoint

View file

@ -14,6 +14,7 @@ class SurveyPrice():
self.master_rate_card_path = None
self.all_hubspot_submissions = None
self.all_survey_info_from_sharepoint = None
self.downloaded_price_card = False
self.required_sheets = [
@ -66,16 +67,17 @@ class SurveyPrice():
}
def download_price_card(self):
url = None
# TODO: Some sanity checks to ensure rate cards title stays consistent
for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
if files['name'] == "MASTER RATE CARD.xlsx":
url = files['@microsoft.graph.downloadUrl']
break
if self.downloaded_price_card is False:
url = None
# TODO: Some sanity checks to ensure rate cards title stays consistent
for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
if files['name'] == "MASTER RATE CARD.xlsx":
url = files['@microsoft.graph.downloadUrl']
break
if url:
content = self.sharepoint_client.get_file_content(url)
self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
if url:
content = self.sharepoint_client.get_file_content(url)
self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
return self.master_rate_card_path
@ -169,19 +171,7 @@ class SurveyPrice():
self.all_survey_info_from_sharepoint = pd.concat([jjc_pd, scis_pd], ignore_index=True)
return self.all_survey_info_from_sharepoint
def sharepoint_data_for_installer(self, installer):
sp = SharePointScraper(installer)
file_paths = sp.download_file_for_each_address()
surveys = []
for eachAddress in tqdm(file_paths):
for address, files in eachAddress.items():
surveys.append(surveyedDataProcessor(address, files))
all_survey_info = []
for surveyInfo in surveys:
def survey_to_pandas_format(self, surveyInfo, installer):
cavity_wall_as_built = False
csr = False
foam_insulation = False
@ -240,16 +230,28 @@ class SurveyPrice():
"DOMNA JOB TYPE": "ECO4 PV"
})
return info
def sharepoint_data_for_installer(self, installer):
sp = SharePointScraper(installer)
file_paths = sp.download_file_for_each_address()
surveys = []
for eachAddress in tqdm(file_paths):
for address, files in eachAddress.items():
surveys.append(surveyedDataProcessor(address, files))
all_survey_info = []
for surveyInfo in surveys:
info = self.survey_to_pandas_format(surveyInfo)
all_survey_info.append(info)
return pd.DataFrame(all_survey_info)
def merge_hub_spot_and_survey_information(self):
if self.all_survey_info_from_sharepoint is None:
raise RuntimeError("No survey information found from Sharepoint")
if self.all_hubspot_submissions is None:
raise RuntimeError("No information found from Hubspot")
def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data):
# Standardise address
def extract_start_and_postcode(addr):
@ -261,23 +263,23 @@ class SurveyPrice():
return start, postcode
# Extract start + postcode from both datasets
self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply(
survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
lambda x: pd.Series(extract_start_and_postcode(x))
)
self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply(
hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
lambda x: pd.Series(extract_start_and_postcode(x))
)
# re-name to installer
self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename(
survey_data = survey_data.rename(
columns={
'SHAREPOINT FLOOR_AREA_BANDING': 'FLOOR_AREA_BANDING',
}
)
self.all_hubspot_submissions = self.all_hubspot_submissions.rename(
hubspot_data = hubspot_data.rename(
columns={
'HUBSPOT_WETROOMS': 'NO_OF_WETROOMS',
'HUBSPOT_TRICKLE_VENT': 'TRICKLE_VENT',
@ -285,8 +287,8 @@ class SurveyPrice():
)
merged_df = pd.merge(
self.all_survey_info_from_sharepoint,
self.all_hubspot_submissions,
survey_data,
hubspot_data,
on=['address_start', 'postcode'],
how='inner'
)
@ -294,8 +296,6 @@ class SurveyPrice():
# if hubspot detects
merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
def compute_energy_grant(row):
pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1]
@ -320,12 +320,28 @@ class SurveyPrice():
merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1)
return merged_df
def calculate_all_price(self):
def merge_hub_spot_and_survey_information(self):
if self.all_survey_info_from_sharepoint is None:
raise RuntimeError("No survey information found from Sharepoint")
if self.all_hubspot_submissions is None:
raise RuntimeError("No information found from Hubspot")
merged_df = self.merge_hub_spot_and_survey_information_from_sharepoint_url(self.all_hubspot_submissions, self.all_survey_info_from_sharepoint)
return merged_df
def calculate_all_price(self, merged_data=None):
self.download_price_card()
self.get_all_surveys_from_hubspot()
self.get_all_surveyed_data_from_sharepoint()
submission_data = self.merge_hub_spot_and_survey_information()
if merged_data is None:
self.get_all_surveys_from_hubspot()
self.get_all_surveyed_data_from_sharepoint()
submission_data = self.merge_hub_spot_and_survey_information()
else:
submission_data = merged_data
final_list = []
for _, row in submission_data.iterrows():
if "PV" in row["HUBSPOT_WORK_TYPE"].upper():