mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
hubspot is done by each deal
This commit is contained in:
parent
0ecb5dce9d
commit
66343cd220
3 changed files with 119 additions and 67 deletions
|
|
@ -3,6 +3,7 @@ from etl.surveyPrice.surveyPrice import SurveyPrice
|
|||
from etl.surveyedData.surveryedData import surveyedDataProcessor
|
||||
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
|
||||
from etl.db.db import get_db_session, init_db
|
||||
import pandas as pd
|
||||
|
||||
from urllib.parse import unquote
|
||||
|
||||
|
|
@ -72,6 +73,37 @@ class HubspotTodb():
|
|||
each_file.append(file_path)
|
||||
address_paths.update({address: each_file})
|
||||
return address_paths
|
||||
|
||||
def string_to_installer(self, installer):
|
||||
if installer.upper() == "J & J CRUMP":
|
||||
return SharePointInstaller.JJC
|
||||
elif installer.upper() == "SCIS":
|
||||
return SharePointInstaller.SOUTH_COAST_INSULATION
|
||||
else:
|
||||
return None
|
||||
|
||||
def work_out_invoice(self, row):
|
||||
survey = self.gather_data_from_sharepoint_url(row)
|
||||
installer = self.string_to_installer(row["HUBSPOT_INSTALLER"])
|
||||
survey_pd = pd.DataFrame([self.sp.survey_to_pandas_format(surveyInfo=survey, installer=installer)])
|
||||
hubspot_data = pd.DataFrame([row])
|
||||
merged_df = self.sp.merge_hub_spot_and_survey_information_from_sharepoint_url(hubspot_data, survey_pd)
|
||||
return self.sp.calculate_all_price(merged_df)
|
||||
|
||||
|
||||
|
||||
# self.sp.calculate_one_price_with_sharepoint_url(row, )
|
||||
|
||||
def gather_data_from_sharepoint_url(self, row):
|
||||
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
|
||||
path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
|
||||
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
|
||||
|
||||
for add, file_loc in data_loc.items():
|
||||
sdp = surveyedDataProcessor(add, file_loc)
|
||||
sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"]
|
||||
return sdp
|
||||
|
||||
|
||||
def gather_data_from_each_sharepoint(self):
|
||||
self.get_all_deals()
|
||||
|
|
@ -90,29 +122,31 @@ class HubspotTodb():
|
|||
if fast is False:
|
||||
self.gather_data_from_each_sharepoint()
|
||||
with get_db_session() as session:
|
||||
self.load_pre_site_note(session)
|
||||
self.load_all_pre_site_note(session)
|
||||
session.commit()
|
||||
|
||||
def load_pre_site_note(self, db_session):
|
||||
def load_one_pre_site_note(self, db_session, surveyedData):
|
||||
assessor = surveyedData.load_assessor_table(db_session)
|
||||
# Loads the pre site summary information
|
||||
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
|
||||
|
||||
property_description = surveyedData.load_property_description(db_session)
|
||||
|
||||
# Creates the a final pre site note table that links all information
|
||||
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
|
||||
|
||||
df = self.deals_in_hubspot
|
||||
df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
|
||||
building_table = surveyedData.create_buildings_table(
|
||||
db_session,
|
||||
df["HUBSPOT_LANDLORD_ID"].values[0],
|
||||
df["HUBSPOT_DOMNA_ID"].values[0],
|
||||
)
|
||||
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
|
||||
|
||||
def load_all_pre_site_note(self, db_session):
|
||||
# Loads all pre
|
||||
for surveyedData in self.data_in_sharepoint:
|
||||
# Loads Assessor information and Company information to db
|
||||
assessor = surveyedData.load_assessor_table(db_session)
|
||||
# Loads the pre site summary information
|
||||
summary_info = surveyedData.load_pre_site_notes_summary_table(db_session)
|
||||
|
||||
property_description = surveyedData.load_property_description(db_session)
|
||||
|
||||
# Creates the a final pre site note table that links all information
|
||||
presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description)
|
||||
|
||||
df = self.deals_in_hubspot
|
||||
df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)]
|
||||
building_table = surveyedData.create_buildings_table(
|
||||
db_session,
|
||||
df["HUBSPOT_LANDLORD_ID"].values[0],
|
||||
df["HUBSPOT_DOMNA_ID"].values[0],
|
||||
)
|
||||
documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table)
|
||||
# Create building table or find building table to add new pre_site_note
|
||||
self.load_one_pre_site_note(surveyedData=surveyedData, db_session=db_session)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,27 +1,29 @@
|
|||
import os
|
||||
|
||||
|
||||
import pandas as pd
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f"
|
||||
os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3"
|
||||
os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284"
|
||||
|
||||
# Local development
|
||||
os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres"
|
||||
|
||||
from etl.surveyPrice.surveyPrice import SurveyPrice
|
||||
from etl.db.hubSpotLoad import HubspotTodb
|
||||
|
||||
|
||||
|
||||
# Load to db
|
||||
dbLoader = HubspotTodb()
|
||||
|
||||
df = dbLoader.get_all_deals()
|
||||
|
||||
panda_final = []
|
||||
for index, rows in df.iterrows():
|
||||
invoice_row = dbLoader.work_out_invoice(rows)
|
||||
panda_final.append(invoice_row)
|
||||
|
||||
|
||||
|
||||
panda_final = pd.concat(panda_final, ignore_index=True)
|
||||
panda_final
|
||||
# For each deal
|
||||
# if not Validate:
|
||||
# move to a different stage, with option to add a note to this to state what the error was
|
||||
|
|
@ -29,4 +31,4 @@ df = dbLoader.get_all_deals()
|
|||
# load to db and move deal correctly to correct position
|
||||
#
|
||||
# Once grand list is finsihed for price
|
||||
# uploaad to sharepoint
|
||||
# uploaad to sharepoint
|
||||
|
|
@ -14,6 +14,7 @@ class SurveyPrice():
|
|||
self.master_rate_card_path = None
|
||||
self.all_hubspot_submissions = None
|
||||
self.all_survey_info_from_sharepoint = None
|
||||
self.downloaded_price_card = False
|
||||
|
||||
|
||||
self.required_sheets = [
|
||||
|
|
@ -66,16 +67,17 @@ class SurveyPrice():
|
|||
}
|
||||
|
||||
def download_price_card(self):
|
||||
url = None
|
||||
# TODO: Some sanity checks to ensure rate cards title stays consistent
|
||||
for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
|
||||
if files['name'] == "MASTER RATE CARD.xlsx":
|
||||
url = files['@microsoft.graph.downloadUrl']
|
||||
break
|
||||
if self.downloaded_price_card is False:
|
||||
url = None
|
||||
# TODO: Some sanity checks to ensure rate cards title stays consistent
|
||||
for files in self.sharepoint_client.get_folders_in_path("/Commercials/Rate Cards")['value']:
|
||||
if files['name'] == "MASTER RATE CARD.xlsx":
|
||||
url = files['@microsoft.graph.downloadUrl']
|
||||
break
|
||||
|
||||
if url:
|
||||
content = self.sharepoint_client.get_file_content(url)
|
||||
self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
|
||||
if url:
|
||||
content = self.sharepoint_client.get_file_content(url)
|
||||
self.master_rate_card_path = self.sharepoint_client.create_temp_file(content, "rate_card/rate_card_all.xlsx")
|
||||
|
||||
return self.master_rate_card_path
|
||||
|
||||
|
|
@ -169,19 +171,7 @@ class SurveyPrice():
|
|||
self.all_survey_info_from_sharepoint = pd.concat([jjc_pd, scis_pd], ignore_index=True)
|
||||
return self.all_survey_info_from_sharepoint
|
||||
|
||||
|
||||
def sharepoint_data_for_installer(self, installer):
|
||||
|
||||
sp = SharePointScraper(installer)
|
||||
file_paths = sp.download_file_for_each_address()
|
||||
surveys = []
|
||||
|
||||
for eachAddress in tqdm(file_paths):
|
||||
for address, files in eachAddress.items():
|
||||
surveys.append(surveyedDataProcessor(address, files))
|
||||
|
||||
all_survey_info = []
|
||||
for surveyInfo in surveys:
|
||||
def survey_to_pandas_format(self, surveyInfo, installer):
|
||||
cavity_wall_as_built = False
|
||||
csr = False
|
||||
foam_insulation = False
|
||||
|
|
@ -240,16 +230,28 @@ class SurveyPrice():
|
|||
"DOMNA JOB TYPE": "ECO4 PV"
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
||||
def sharepoint_data_for_installer(self, installer):
|
||||
|
||||
sp = SharePointScraper(installer)
|
||||
file_paths = sp.download_file_for_each_address()
|
||||
surveys = []
|
||||
|
||||
for eachAddress in tqdm(file_paths):
|
||||
for address, files in eachAddress.items():
|
||||
surveys.append(surveyedDataProcessor(address, files))
|
||||
|
||||
all_survey_info = []
|
||||
for surveyInfo in surveys:
|
||||
info = self.survey_to_pandas_format(surveyInfo)
|
||||
all_survey_info.append(info)
|
||||
|
||||
return pd.DataFrame(all_survey_info)
|
||||
|
||||
def merge_hub_spot_and_survey_information(self):
|
||||
if self.all_survey_info_from_sharepoint is None:
|
||||
raise RuntimeError("No survey information found from Sharepoint")
|
||||
if self.all_hubspot_submissions is None:
|
||||
raise RuntimeError("No information found from Hubspot")
|
||||
def merge_hub_spot_and_survey_information_from_sharepoint_url(self, hubspot_data, survey_data):
|
||||
|
||||
# Standardise address
|
||||
def extract_start_and_postcode(addr):
|
||||
|
|
@ -261,23 +263,23 @@ class SurveyPrice():
|
|||
return start, postcode
|
||||
|
||||
# Extract start + postcode from both datasets
|
||||
self.all_survey_info_from_sharepoint[['address_start', 'postcode']] = self.all_survey_info_from_sharepoint['SHAREPOINT ADDRESS'].apply(
|
||||
survey_data[['address_start', 'postcode']] = survey_data['SHAREPOINT ADDRESS'].apply(
|
||||
lambda x: pd.Series(extract_start_and_postcode(x))
|
||||
)
|
||||
|
||||
self.all_hubspot_submissions[['address_start', 'postcode']] = self.all_hubspot_submissions['HUBSPOT_DEAL_ADDRESS'].apply(
|
||||
hubspot_data[['address_start', 'postcode']] = hubspot_data['HUBSPOT_DEAL_ADDRESS'].apply(
|
||||
lambda x: pd.Series(extract_start_and_postcode(x))
|
||||
)
|
||||
|
||||
|
||||
# re-name to installer
|
||||
self.all_survey_info_from_sharepoint = self.all_survey_info_from_sharepoint.rename(
|
||||
survey_data = survey_data.rename(
|
||||
columns={
|
||||
'SHAREPOINT FLOOR_AREA_BANDING': 'FLOOR_AREA_BANDING',
|
||||
}
|
||||
)
|
||||
|
||||
self.all_hubspot_submissions = self.all_hubspot_submissions.rename(
|
||||
hubspot_data = hubspot_data.rename(
|
||||
columns={
|
||||
'HUBSPOT_WETROOMS': 'NO_OF_WETROOMS',
|
||||
'HUBSPOT_TRICKLE_VENT': 'TRICKLE_VENT',
|
||||
|
|
@ -285,8 +287,8 @@ class SurveyPrice():
|
|||
)
|
||||
|
||||
merged_df = pd.merge(
|
||||
self.all_survey_info_from_sharepoint,
|
||||
self.all_hubspot_submissions,
|
||||
survey_data,
|
||||
hubspot_data,
|
||||
on=['address_start', 'postcode'],
|
||||
how='inner'
|
||||
)
|
||||
|
|
@ -294,8 +296,6 @@ class SurveyPrice():
|
|||
# if hubspot detects
|
||||
|
||||
merged_df.drop(columns=['address_start', 'postcode'], inplace=True)
|
||||
|
||||
|
||||
def compute_energy_grant(row):
|
||||
pre_band_letter = row["SHAREPOINT PRE_INSTALL_SAP_SCORE_BANDING"][-1]
|
||||
post_band_letter = surveyedDataProcessor.get_band(row["HUBSPOT_POST_INSTALL_SAP_SCORE"])[-1]
|
||||
|
|
@ -320,12 +320,28 @@ class SurveyPrice():
|
|||
merged_df["WORK TYPE"] = merged_df.apply(work_type, axis=1)
|
||||
|
||||
return merged_df
|
||||
|
||||
|
||||
def calculate_all_price(self):
|
||||
|
||||
def merge_hub_spot_and_survey_information(self):
|
||||
if self.all_survey_info_from_sharepoint is None:
|
||||
raise RuntimeError("No survey information found from Sharepoint")
|
||||
if self.all_hubspot_submissions is None:
|
||||
raise RuntimeError("No information found from Hubspot")
|
||||
|
||||
merged_df = self.merge_hub_spot_and_survey_information_from_sharepoint_url(self.all_hubspot_submissions, self.all_survey_info_from_sharepoint)
|
||||
return merged_df
|
||||
|
||||
|
||||
def calculate_all_price(self, merged_data=None):
|
||||
self.download_price_card()
|
||||
self.get_all_surveys_from_hubspot()
|
||||
self.get_all_surveyed_data_from_sharepoint()
|
||||
submission_data = self.merge_hub_spot_and_survey_information()
|
||||
if merged_data is None:
|
||||
self.get_all_surveys_from_hubspot()
|
||||
self.get_all_surveyed_data_from_sharepoint()
|
||||
submission_data = self.merge_hub_spot_and_survey_information()
|
||||
else:
|
||||
submission_data = merged_data
|
||||
|
||||
final_list = []
|
||||
for _, row in submission_data.iterrows():
|
||||
if "PV" in row["HUBSPOT_WORK_TYPE"].upper():
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue