diff --git a/etl/filechecker.py b/etl/filechecker.py index 0cceadd..ce97157 100644 --- a/etl/filechecker.py +++ b/etl/filechecker.py @@ -68,6 +68,7 @@ for files in list_of_pictures: for file in content['value']: if 'file' in file: url = file['@microsoft.graph.downloadUrl'] + print(f"Downloading {files}/{file['name']}") sha256 = calculate_sha256(south_coast_scraper.get_file_content(url)) final_list.append({ "Directories": files, diff --git a/etl/hubSpotClient/hubspot.py b/etl/hubSpotClient/hubspot.py index 9a19007..353ecd2 100644 --- a/etl/hubSpotClient/hubspot.py +++ b/etl/hubSpotClient/hubspot.py @@ -48,21 +48,18 @@ class HubSpotClient(): after = response.paging.next.after all_deals = [] - if hasattr(found_deals, "results"): - for deal in found_deals.results: - all_deals.append(SubmissionInfoFromDeal( - deal_id= deal.properties["hs_object_id"], - deal_name=deal.properties["dealname"], - work_type=deal.properties["work_type"], - needs_trickle_ventilation=True if deal.properties["property_needs_trickle_vents"].upper() == "YES" else False, - post_sap_score=int(deal.properties["domna_survey_post_sap"]), - existing_wall_insulation=deal.properties["existing_wall_insulation"], - no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]), - installer=deal.properties["Installer"], - )) - return all_deals - else: - return None + for deal in found_deals: + all_deals.append(SubmissionInfoFromDeal( + deal_id= deal.properties["hs_object_id"], + deal_name=deal.properties["dealname"], + work_type=deal.properties["work_type"], + needs_trickle_ventilation=True if deal.properties["property_needs_trickle_vents"].upper() == "YES" else False, + post_sap_score=int(deal.properties["domna_survey_post_sap"]), + existing_wall_insulation=deal.properties["existing_wall_insulation"], + no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]), + installer=deal.properties["installer"], + )) + return all_deals def print_all_pipeline_ids(self): pipelines = self.client.crm.pipelines.pipelines_api.get_all(object_type="deals") diff --git a/etl/hubspot_to_deemed_calculator.py b/etl/hubspot_to_deemed_calculator.py deleted file mode 100644 index 0081db6..0000000 --- a/etl/hubspot_to_deemed_calculator.py +++ /dev/null @@ -1,132 +0,0 @@ -from etl.hubSpotClient.hubspot import HubSpotClient, DealStage -import pandas as pd -from etl.jjc_old_lewis_manual_way_ import get_jjc_price_matrix, work_out_total_floor_area, type_of_work, get_band -from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING -from etl.surveyedData.surveryedData import surveyedDataProcessor -import os -os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" -os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" -os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" -os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" - - -hubSpotClient = HubSpotClient() - -deals = hubSpotClient.get_deals_from_deal_stage(DealStage.CUSTOMER_CONTACTED) -print(deals) -hubSpotClient.print_all_pipeline_ids() - -csv_list = [] - -for deal in deals: - csv_list.append({ - "hubspot_work_type": deal.work_type, - "Address": deal.deal_name, - "Trickle Vent": 1 if deal.needs_trickle_ventilation else 0, - "wetrooms": deal.no_of_wet_rooms, - "hubspot_wall_insulation": deal.existing_wall_insulation, - "POST INSTALL SAP SCORE": deal.post_sap_score, - "Installer": deal.installer, - }) - -# hubspot_submissions = pd.DataFrame(csv_list) -# price_empty = get_jjc_price_matrix() -# price_foam = get_jjc_price_matrix("foam.csv") -# price_general = get_jjc_price_matrix("general.csv") -# total_price = [] - - -# jjc = SharePointScraper(SharePointInstaller.JJC, development=True) -# file_paths = jjc.download_file_for_each_address() -# list_of_surveys = [] - -# for eachAddress in file_paths: -# for address, files in eachAddress.items(): -# list_of_surveys.append(surveyedDataProcessor(address, files)) -# for survey in list_of_surveys: -# if survey.pre_site_note: -# floor_banding, total_floor_area = work_out_total_floor_area(survey.pre_site_note) -# letter, number = survey.pre_site_note.survey_information.current_sap.split(" ") -# pre_sap_score = number+letter - -# address = survey.pre_site_note.survey_information.address.split(",") -# address = [item.strip() for item in address][0] -# filtered_df = hubspot_submissions[hubspot_submissions["Address"].apply(lambda x: address.upper() == x.split(",")[0].upper())] -# if len(filtered_df) == 1: -# funding_type = type_of_work(letter.upper(), get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])[-1]) -# data = { -# "Address": survey.pre_site_note.survey_information.address, -# "HubSpot Address": filtered_df["Address"].values[0], -# "Pre SAP from sharepoint": number, -# "Post SAP from surveyor": filtered_df["POST INSTALL SAP SCORE"].values[0], -# "Surveyor's Name": survey.pre_site_note.assessor_information.name, -# "floor_area_group" : floor_banding, -# "wetrooms" : filtered_df["wetrooms"].values[0], -# "Trickle Vent" : filtered_df["Trickle Vent"].values[0], -# "survey_stated_work_type": filtered_df["hubspot_work_type"].values[0], -# } - -# csr_insulation = None -# merged_df = pd.DataFrame() -# if survey.csr: -# if survey.csr.insulation_info: -# csr_insulation = survey.csr.insulation_info.type.upper() - -# hubspot_wall_insulation = None -# hubspot_wall_insulation = filtered_df["hubspot_wall_insulation"].values[0] -# data.update({"csr_insulation": csr_insulation}) -# data.update({"hubspot_wall_insulation": hubspot_wall_insulation}) - -# if funding_type == "GBIS": -# if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": -# data.update({"funding": funding_type.upper()}) -# df = pd.DataFrame([data]) -# merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') -# elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): -# data.update({"funding": funding_type.upper() + " Remedial"}) -# df = pd.DataFrame([data]) -# merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') -# else: -# data.update({"funding": funding_type.upper() + " Remedial"}) -# df = pd.DataFrame([data]) -# merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') -# elif funding_type == "ECO4": -# if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": -# formatted_funding_type = f"{funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" -# data.update({"funding": formatted_funding_type}) -# df = pd.DataFrame([data]) -# merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') -# elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): -# formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" -# data.update({"funding": formatted_funding_type}) -# df = pd.DataFrame([data]) -# merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') -# else: -# formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" -# data.update({"funding": formatted_funding_type}) -# df = pd.DataFrame([data]) -# merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') -# else: -# raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}") - -# if not merged_df.empty: -# total_price.append(merged_df) - -# final_df = pd.concat(total_price, ignore_index=True) - -# final_df.to_csv("survery_data.csv", index=False) - - -# print(f"WEEK COMMENCING {WEEK_COMMENCING}") -# print("Excel file 'survey_data.xlsx' created successfully!") - - - -# All three installers - - - -# Rate card -# Different installer detection detection -# Sheet for each installer -# Uploaded into sharepoint -> Adminstrators -> different location -> one document per housing association \ No newline at end of file diff --git a/etl/hubspot_to_jjc_deemed_calculator.py b/etl/hubspot_to_jjc_deemed_calculator.py new file mode 100644 index 0000000..7137e07 --- /dev/null +++ b/etl/hubspot_to_jjc_deemed_calculator.py @@ -0,0 +1,130 @@ +from etl.hubSpotClient.hubspot import HubSpotClient, DealStage +import pandas as pd +from etl.jjc_old_lewis_manual_way_ import get_jjc_price_matrix, work_out_total_floor_area, type_of_work, get_band +from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING +from etl.surveyedData.surveryedData import surveyedDataProcessor +import os +os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" +os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" +os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" +os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" + + +hubSpotClient = HubSpotClient() + +deals = hubSpotClient.get_deals_from_deal_stage(DealStage.SURVEYED_COMPLETE_NEEDS_SIGN_OFF) + +csv_list = [] + +for deal in deals: + csv_list.append({ + "hubspot_work_type": deal.work_type, + "Address": deal.deal_name, + "Trickle Vent": 1 if deal.needs_trickle_ventilation else 0, + "wetrooms": deal.no_of_wet_rooms, + "hubspot_wall_insulation": deal.existing_wall_insulation, + "POST INSTALL SAP SCORE": deal.post_sap_score, + "Installer": deal.installer, + }) + +hubspot_submissions = pd.DataFrame(csv_list) +price_empty = get_jjc_price_matrix() +price_foam = get_jjc_price_matrix("foam.csv") +price_general = get_jjc_price_matrix("general.csv") +total_price = [] + + +jjc = SharePointScraper(SharePointInstaller.JJC, development=True) +file_paths = jjc.download_file_for_each_address() +list_of_surveys = [] + +for eachAddress in file_paths: + for address, files in eachAddress.items(): + list_of_surveys.append(surveyedDataProcessor(address, files)) +for survey in list_of_surveys: + if survey.pre_site_note: + floor_banding, total_floor_area = work_out_total_floor_area(survey.pre_site_note) + letter, number = survey.pre_site_note.survey_information.current_sap.split(" ") + pre_sap_score = number+letter + + address = survey.pre_site_note.survey_information.address.split(",") + address = [item.strip() for item in address][0] + filtered_df = hubspot_submissions[hubspot_submissions["Address"].apply(lambda x: address.upper() == x.split(",")[0].upper())] + if len(filtered_df) == 1: + funding_type = type_of_work(letter.upper(), get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])[-1]) + data = { + "Address": survey.pre_site_note.survey_information.address, + "HubSpot Address": filtered_df["Address"].values[0], + "Pre SAP from sharepoint": number, + "Post SAP from surveyor": filtered_df["POST INSTALL SAP SCORE"].values[0], + "Surveyor's Name": survey.pre_site_note.assessor_information.name, + "floor_area_group" : floor_banding, + "wetrooms" : filtered_df["wetrooms"].values[0], + "Trickle Vent" : filtered_df["Trickle Vent"].values[0], + "survey_stated_work_type": filtered_df["hubspot_work_type"].values[0], + } + + csr_insulation = None + merged_df = pd.DataFrame() + if survey.csr: + if survey.csr.insulation_info: + csr_insulation = survey.csr.insulation_info.type.upper() + + hubspot_wall_insulation = None + hubspot_wall_insulation = filtered_df["hubspot_wall_insulation"].values[0] + data.update({"csr_insulation": csr_insulation}) + data.update({"hubspot_wall_insulation": hubspot_wall_insulation}) + + if funding_type == "GBIS": + if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": + data.update({"funding": funding_type.upper()}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') + elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): + data.update({"funding": funding_type.upper() + " Remedial"}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') + else: + data.update({"funding": funding_type.upper() + " Remedial"}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') + elif funding_type == "ECO4": + if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": + formatted_funding_type = f"{funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" + data.update({"funding": formatted_funding_type}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') + elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): + formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" + data.update({"funding": formatted_funding_type}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') + else: + formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" + data.update({"funding": formatted_funding_type}) + df = pd.DataFrame([data]) + merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') + else: + raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}") + + if not merged_df.empty: + total_price.append(merged_df) + +final_df = pd.concat(total_price, ignore_index=True) + +final_df.to_csv("survery_data.csv", index=False) + + +print(f"WEEK COMMENCING {WEEK_COMMENCING}") +print("Excel file 'survey_data.xlsx' created successfully!") + + + +# All three installers + + + +# Rate card +# Different installer detection detection +# Sheet for each installer +# Uploaded into sharepoint -> Adminstrators -> different location -> one document per housing association \ No newline at end of file diff --git a/etl/surveyPrice/__init__.py b/etl/surveyPrice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/etl/surveyPrice/surveyPrice.py b/etl/surveyPrice/surveyPrice.py new file mode 100644 index 0000000..e69de29