diff --git a/etl/filechecker.py b/etl/filechecker.py new file mode 100644 index 0000000..0cceadd --- /dev/null +++ b/etl/filechecker.py @@ -0,0 +1,80 @@ +import os +os.environ["SHAREPOINT_CLIENT_ID"] = "895e3b77-b1d7-43ec-b18f-dcfe07cdfeaf" +os.environ["SHAREPOINT_CLIENT_SECRET"] = "SOf8Q~-is4wdQiqvEEm9FlJQRAY9ELGaj5Qz-a6E" +os.environ["SHAREPOINT_TENANT_ID"] = "c3f7519c-2719-4547-af04-6da6cbfd8f8f" +os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0-b03e-90ec7681e2d3" +os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" +from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COMMENCING +import pandas as pd +import hashlib + +def calculate_sha256(bytes_io): + bytes_io.seek(0) # Make sure we're at the start + data = bytes_io.read() + return hashlib.sha256(data).hexdigest() + +south_coast_scraper = SharePointScraper(SharePointInstaller.JJC) + + +folders = south_coast_scraper.get_folders_in_path('/') + + +list_of_file_names = [] +for folder in folders['value']: + if "Khalim" in folder["name"]: + continue + elif ".Training" in folder["name"]: + continue + if 'file' not in folder: + list_of_file_names.append("/" + folder["name"]) + +list_of_dates = [] +for folder in list_of_file_names: + dates = south_coast_scraper.get_folders_in_path(folder) + for date in dates['value']: + if 'file' not in date: + list_of_dates.append(folder + "/" + date["name"]) + +print(list_of_dates) + +list_of_housing_associations = [] +for folder in list_of_dates: + house_ass = south_coast_scraper.get_folders_in_path(folder) + for house in house_ass['value']: + if 'file' not in house: + list_of_housing_associations.append(folder + "/" + house["name"]) + +list_of_address = [] + +for folder in list_of_housing_associations: + address = south_coast_scraper.get_folders_in_path(folder) + for add in address['value']: + if 'file' not in add: + list_of_address.append(folder + "/" + add['name']) + +list_of_pictures = [] + +for folder in list_of_address: + pictures = south_coast_scraper.get_folders_in_path(folder) + for pic in pictures['value']: + if 'file' not in pic: + list_of_pictures.append(folder + "/" + pic['name']) + +print(list_of_pictures) + +final_list = [] +for files in list_of_pictures: + content = south_coast_scraper.get_folders_in_path(files) + for file in content['value']: + if 'file' in file: + url = file['@microsoft.graph.downloadUrl'] + sha256 = calculate_sha256(south_coast_scraper.get_file_content(url)) + final_list.append({ + "Directories": files, + "Photo Name": file['name'], + "sha256": sha256, + }) + +final_df = pd.DataFrame(final_list) + +final_df.to_csv("jjc.csv") \ No newline at end of file diff --git a/etl/hubSpotClient/hubspot.py b/etl/hubSpotClient/hubspot.py index 283a7ce..9a19007 100644 --- a/etl/hubSpotClient/hubspot.py +++ b/etl/hubSpotClient/hubspot.py @@ -21,7 +21,6 @@ class HubSpotClient(): found_deals = [] after = None while True: - print("hello world") search_request = PublicObjectSearchRequest( filter_groups=[{ "filters": [{ @@ -36,7 +35,8 @@ class HubSpotClient(): "work_type", "property_needs_trickle_vents", "domna_survey_post_sap", - "existing_wall_insulation" + "existing_wall_insulation", + "installer", ], limit=200, after=after, @@ -46,7 +46,6 @@ class HubSpotClient(): if not response.paging or not response.paging.next: break after = response.paging.next.after - return found_deals all_deals = [] if hasattr(found_deals, "results"): @@ -58,7 +57,8 @@ class HubSpotClient(): needs_trickle_ventilation=True if deal.properties["property_needs_trickle_vents"].upper() == "YES" else False, post_sap_score=int(deal.properties["domna_survey_post_sap"]), existing_wall_insulation=deal.properties["existing_wall_insulation"], - no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]) + no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]), + installer=deal.properties["Installer"], )) return all_deals else: @@ -70,4 +70,4 @@ class HubSpotClient(): print(f"Pipeline: {pipeline.label}") for stage in pipeline.stages: print(f" - Label: {stage.label}") - print(f" ID: {stage.id}") # \ No newline at end of file + print(f" ID: {stage.id}") # diff --git a/etl/hubSpotClient/types.py b/etl/hubSpotClient/types.py index c109f53..b4dd30e 100644 --- a/etl/hubSpotClient/types.py +++ b/etl/hubSpotClient/types.py @@ -17,4 +17,5 @@ class SubmissionInfoFromDeal(BaseModel): needs_trickle_ventilation: bool post_sap_score: int existing_wall_insulation: str - no_of_wet_rooms: int \ No newline at end of file + no_of_wet_rooms: int + installer: str \ No newline at end of file diff --git a/etl/hubspot_to_deemed_calculator.py b/etl/hubspot_to_deemed_calculator.py index 7dcb212..0081db6 100644 --- a/etl/hubspot_to_deemed_calculator.py +++ b/etl/hubspot_to_deemed_calculator.py @@ -13,7 +13,7 @@ os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" hubSpotClient = HubSpotClient() deals = hubSpotClient.get_deals_from_deal_stage(DealStage.CUSTOMER_CONTACTED) - +print(deals) hubSpotClient.print_all_pipeline_ids() csv_list = [] @@ -26,99 +26,99 @@ for deal in deals: "wetrooms": deal.no_of_wet_rooms, "hubspot_wall_insulation": deal.existing_wall_insulation, "POST INSTALL SAP SCORE": deal.post_sap_score, + "Installer": deal.installer, }) - -hubspot_submissions = pd.DataFrame(csv_list) -price_empty = get_jjc_price_matrix() -price_foam = get_jjc_price_matrix("foam.csv") -price_general = get_jjc_price_matrix("general.csv") -total_price = [] +# hubspot_submissions = pd.DataFrame(csv_list) +# price_empty = get_jjc_price_matrix() +# price_foam = get_jjc_price_matrix("foam.csv") +# price_general = get_jjc_price_matrix("general.csv") +# total_price = [] -jjc = SharePointScraper(SharePointInstaller.JJC, development=True) -file_paths = jjc.download_file_for_each_address() -list_of_surveys = [] +# jjc = SharePointScraper(SharePointInstaller.JJC, development=True) +# file_paths = jjc.download_file_for_each_address() +# list_of_surveys = [] -for eachAddress in file_paths: - for address, files in eachAddress.items(): - list_of_surveys.append(surveyedDataProcessor(address, files)) -for survey in list_of_surveys: - if survey.pre_site_note: - floor_banding, total_floor_area = work_out_total_floor_area(survey.pre_site_note) - letter, number = survey.pre_site_note.survey_information.current_sap.split(" ") - pre_sap_score = number+letter +# for eachAddress in file_paths: +# for address, files in eachAddress.items(): +# list_of_surveys.append(surveyedDataProcessor(address, files)) +# for survey in list_of_surveys: +# if survey.pre_site_note: +# floor_banding, total_floor_area = work_out_total_floor_area(survey.pre_site_note) +# letter, number = survey.pre_site_note.survey_information.current_sap.split(" ") +# pre_sap_score = number+letter - address = survey.pre_site_note.survey_information.address.split(",") - address = [item.strip() for item in address][0] - filtered_df = hubspot_submissions[hubspot_submissions["Address"].apply(lambda x: address.upper() == x.split(",")[0].upper())] - if len(filtered_df) == 1: - funding_type = type_of_work(letter.upper(), get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])[-1]) - data = { - "Address": survey.pre_site_note.survey_information.address, - "HubSpot Address": filtered_df["Address"].values[0], - "Pre SAP from sharepoint": number, - "Post SAP from surveyor": filtered_df["POST INSTALL SAP SCORE"].values[0], - "Surveyor's Name": survey.pre_site_note.assessor_information.name, - "floor_area_group" : floor_banding, - "wetrooms" : filtered_df["wetrooms"].values[0], - "Trickle Vent" : filtered_df["Trickle Vent"].values[0], - "survey_stated_work_type": filtered_df["hubspot_work_type"].values[0], - } +# address = survey.pre_site_note.survey_information.address.split(",") +# address = [item.strip() for item in address][0] +# filtered_df = hubspot_submissions[hubspot_submissions["Address"].apply(lambda x: address.upper() == x.split(",")[0].upper())] +# if len(filtered_df) == 1: +# funding_type = type_of_work(letter.upper(), get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])[-1]) +# data = { +# "Address": survey.pre_site_note.survey_information.address, +# "HubSpot Address": filtered_df["Address"].values[0], +# "Pre SAP from sharepoint": number, +# "Post SAP from surveyor": filtered_df["POST INSTALL SAP SCORE"].values[0], +# "Surveyor's Name": survey.pre_site_note.assessor_information.name, +# "floor_area_group" : floor_banding, +# "wetrooms" : filtered_df["wetrooms"].values[0], +# "Trickle Vent" : filtered_df["Trickle Vent"].values[0], +# "survey_stated_work_type": filtered_df["hubspot_work_type"].values[0], +# } - csr_insulation = None - merged_df = pd.DataFrame() - if survey.csr: - if survey.csr.insulation_info: - csr_insulation = survey.csr.insulation_info.type.upper() +# csr_insulation = None +# merged_df = pd.DataFrame() +# if survey.csr: +# if survey.csr.insulation_info: +# csr_insulation = survey.csr.insulation_info.type.upper() - hubspot_wall_insulation = None - hubspot_wall_insulation = filtered_df["hubspot_wall_insulation"].values[0] - data.update({"csr_insulation": csr_insulation}) - data.update({"hubspot_wall_insulation": hubspot_wall_insulation}) +# hubspot_wall_insulation = None +# hubspot_wall_insulation = filtered_df["hubspot_wall_insulation"].values[0] +# data.update({"csr_insulation": csr_insulation}) +# data.update({"hubspot_wall_insulation": hubspot_wall_insulation}) - if funding_type == "GBIS": - if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": - data.update({"funding": funding_type.upper()}) - df = pd.DataFrame([data]) - merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): - data.update({"funding": funding_type.upper() + " Remedial"}) - df = pd.DataFrame([data]) - merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - else: - data.update({"funding": funding_type.upper() + " Remedial"}) - df = pd.DataFrame([data]) - merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - elif funding_type == "ECO4": - if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": - formatted_funding_type = f"{funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" - data.update({"funding": formatted_funding_type}) - df = pd.DataFrame([data]) - merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): - formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" - data.update({"funding": formatted_funding_type}) - df = pd.DataFrame([data]) - merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - else: - formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" - data.update({"funding": formatted_funding_type}) - df = pd.DataFrame([data]) - merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') - else: - raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}") +# if funding_type == "GBIS": +# if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": +# data.update({"funding": funding_type.upper()}) +# df = pd.DataFrame([data]) +# merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') +# elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): +# data.update({"funding": funding_type.upper() + " Remedial"}) +# df = pd.DataFrame([data]) +# merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') +# else: +# data.update({"funding": funding_type.upper() + " Remedial"}) +# df = pd.DataFrame([data]) +# merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') +# elif funding_type == "ECO4": +# if csr_insulation is None and hubspot_wall_insulation.upper() == "EMPTY": +# formatted_funding_type = f"{funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" +# data.update({"funding": formatted_funding_type}) +# df = pd.DataFrame([data]) +# merged_df = pd.merge(df, price_empty, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') +# elif "FOAM" in csr_insulation.upper() and "FOAM" in hubspot_wall_insulation.upper(): +# formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" +# data.update({"funding": formatted_funding_type}) +# df = pd.DataFrame([data]) +# merged_df = pd.merge(df, price_foam, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') +# else: +# formatted_funding_type = f"REMEDIAL - {funding_type.upper()} - SAP {get_band(int(number))} to {get_band(filtered_df["POST INSTALL SAP SCORE"].values[0])}" +# data.update({"funding": formatted_funding_type}) +# df = pd.DataFrame([data]) +# merged_df = pd.merge(df, price_general, on=['funding', 'Trickle Vent', 'floor_area_group', 'wetrooms'], how='left') +# else: +# raise RuntimeError(f"UNKNOWN FUNDING TYPE {funding_type}") - if not merged_df.empty: - total_price.append(merged_df) +# if not merged_df.empty: +# total_price.append(merged_df) -final_df = pd.concat(total_price, ignore_index=True) +# final_df = pd.concat(total_price, ignore_index=True) -final_df.to_csv("survery_data.csv", index=False) +# final_df.to_csv("survery_data.csv", index=False) -print(f"WEEK COMMENCING {WEEK_COMMENCING}") -print("Excel file 'survey_data.xlsx' created successfully!") +# print(f"WEEK COMMENCING {WEEK_COMMENCING}") +# print("Excel file 'survey_data.xlsx' created successfully!")