From ba521fb004bd40eba5cf9d06f9f5c375108bfc2a Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 23 Apr 2025 14:35:45 +0000 Subject: [PATCH] solar now works --- .vscode/settings.json | 5 +- etl/hubSpotClient/hubspot.py | 2 +- etl/hubspot_to_invoice.py | 4 +- etl/imagefilenamechcker.py | 123 ++++++++++++++++++--------------- etl/surveyPrice/surveyPrice.py | 14 ++-- 5 files changed, 84 insertions(+), 64 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index e8c08c6..9868a02 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,9 @@ { "jupyter.interactiveWindow.textEditor.executeSelection": true, - "python.REPL.sendToNativeREPL": true + "python.REPL.sendToNativeREPL": true, + "notebook.output.scrolling": true, + "notebook.output.textLineLimit": 0 + // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ diff --git a/etl/hubSpotClient/hubspot.py b/etl/hubSpotClient/hubspot.py index 97dee61..90c551b 100644 --- a/etl/hubSpotClient/hubspot.py +++ b/etl/hubSpotClient/hubspot.py @@ -152,7 +152,7 @@ class HubSpotClient(): deal_id= deal.properties["hs_object_id"], deal_name=deal.properties["dealname"], work_type=deal.properties["work_type"], - needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents") else False, + needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents", "NO").upper() == "YES" else False, post_sap_score=int(deal.properties["domna_survey_post_sap"]), existing_wall_insulation=deal.properties.get("existing_wall_insulation") if deal.properties.get("existing_wall_insulation") else "None", no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]), diff --git a/etl/hubspot_to_invoice.py b/etl/hubspot_to_invoice.py index a106b4b..7413e49 100644 --- a/etl/hubspot_to_invoice.py +++ b/etl/hubspot_to_invoice.py @@ -69,4 +69,6 @@ sp.move_deals_to_completed(deal_ids) # Compare value with what I should get and in the deem score. Keep tabs below so I can check easily -# Change w.c. date to a weird one to speed up automation \ No newline at end of file +# Change w.c. date to a weird one to speed up automation + +value = df[df["SHAREPOINT ADDRESS"] == "29 Lower King, BRAINTREE, CM7 3XZ"] \ No newline at end of file diff --git a/etl/imagefilenamechcker.py b/etl/imagefilenamechcker.py index 4e32855..f95666b 100644 --- a/etl/imagefilenamechcker.py +++ b/etl/imagefilenamechcker.py @@ -8,72 +8,87 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COM import pandas as pd import hashlib -def calculate_sha256(bytes_io): - bytes_io.seek(0) # Make sure we're at the start - data = bytes_io.read() - return hashlib.sha256(data).hexdigest() - -south_coast_scraper = SharePointScraper(SharePointInstaller.JJC) +def get_photos_name(installer): + south_coast_scraper = SharePointScraper(installer) + folders = south_coast_scraper.get_folders_in_path('/') -folders = south_coast_scraper.get_folders_in_path('/') + list_of_file_names = [] + for folder in folders['value']: + if "Khalim" in folder["name"]: + continue + elif ".Training" in folder["name"]: + continue + if 'file' not in folder: + list_of_file_names.append("/" + folder["name"]) + + list_of_dates = [] + for i, folder in enumerate(list_of_file_names): + print(f"getting dates {i}") + dates = south_coast_scraper.get_folders_in_path(folder) + for date in dates['value']: + if 'file' not in date: + list_of_dates.append(folder + "/" + date["name"]) -list_of_file_names = [] -for folder in folders['value']: - if "Khalim" in folder["name"]: - continue - elif ".Training" in folder["name"]: - continue - if 'file' not in folder: - list_of_file_names.append("/" + folder["name"]) + list_of_housing_associations = [] + for i, folder in enumerate(list_of_dates): + print(f"getting housing assoication {i}") + house_ass = south_coast_scraper.get_folders_in_path(folder) + for house in house_ass['value']: + if 'file' not in house: + list_of_housing_associations.append(folder + "/" + house["name"]) + list_of_address = [] -list_of_dates = [] -for folder in list_of_file_names: - dates = south_coast_scraper.get_folders_in_path(folder) - for date in dates['value']: - if 'file' not in date: - list_of_dates.append(folder + "/" + date["name"]) + for i, folder in enumerate(list_of_housing_associations): + print(f"getting address {i}") + address = south_coast_scraper.get_folders_in_path(folder) + for add in address['value']: + if 'file' not in add: + list_of_address.append(folder + "/" + add['name']) -print(list_of_dates) + list_of_pictures = [] -list_of_housing_associations = [] -for folder in list_of_dates: - house_ass = south_coast_scraper.get_folders_in_path(folder) - for house in house_ass['value']: - if 'file' not in house: - list_of_housing_associations.append(folder + "/" + house["name"]) + for i, folder in enumerate(list_of_address): + print(f"getting pictures {i}") + pictures = south_coast_scraper.get_folders_in_path(folder) + for pic in pictures['value']: + if 'file' not in pic: + list_of_pictures.append(folder + "/" + pic['name']) -list_of_address = [] + print(list_of_pictures) -for folder in list_of_housing_associations: - address = south_coast_scraper.get_folders_in_path(folder) - for add in address['value']: - if 'file' not in add: - list_of_address.append(folder + "/" + add['name']) + final_list = [] + for i,files in enumerate(list_of_pictures): + print(f"for finali list {i}") -list_of_pictures = [] + content = south_coast_scraper.get_folders_in_path(files) + parts = files.split("/") + date = None + for part in parts: + if part.startswith("W.C."): + date = part # Output: W.C. 17.03.2025 + for file in content['value']: + if 'file' in file: + final_list.append({ + "Date": date, + "path": file, + "Photo Name": file['name'], + }) -for folder in list_of_address: - pictures = south_coast_scraper.get_folders_in_path(folder) - for pic in pictures['value']: - if 'file' not in pic: - list_of_pictures.append(folder + "/" + pic['name']) + final_df = pd.DataFrame(final_list) + return final_df -print(list_of_pictures) +jjc_df = get_photos_name(SharePointInstaller.JJC) +scis_df = get_photos_name(SharePointInstaller.SOUTH_COAST_INSULATION) -final_list = [] -for files in list_of_pictures: - content = south_coast_scraper.get_folders_in_path(files) - for file in content['value']: - if 'file' in file: - url = file['@microsoft.graph.downloadUrl'] - print(f"Downloading {files}/{file['name']}") - final_list.append({ - "Directories": files, - "Photo Name": file['name'], - }) +all_df = [jjc_df, scis_df] -final_df = pd.DataFrame(final_list) +final_df = pd.concat(all_df, ignore_index=True) +final_df -final_df.to_csv("jjc.csv") \ No newline at end of file +final_df.to_csv("photos_name.csv") + +duplicate_names = final_df[final_df.duplicated('Photo Name', keep=False)] +df = final_df +dupe_names_df = df[df.duplicated('Photo Name', keep=False)].sort_values('Photo Name') diff --git a/etl/surveyPrice/surveyPrice.py b/etl/surveyPrice/surveyPrice.py index 8fe1f64..c58dd8b 100644 --- a/etl/surveyPrice/surveyPrice.py +++ b/etl/surveyPrice/surveyPrice.py @@ -36,11 +36,11 @@ class SurveyPrice(): } self.domna_job_to_price_sheet_convertor = { - "JJC - SOLAR": "JJC - SOLAR", + "JJC - ECO4 PV": "JJC - SOLAR", "JJC - EMPTY CAVITY": "JJC - EMPTIES", "JJC - REMIDIAL FOAM FILLED CAVITY": "JJC - FORMALDEHYDE EXTRACTION", "JJC - REMIDIAL FILLED CAVITY": "JJC - GENERAL EXTRACTIONS", - "SCIS - SOLAR": "SCIS - SOLAR", + "SCIS - ECO4 PV": "SCIS - SOLAR", "SCIS - EMPTY CAVITY": "SCIS - EMPTIES", "SCIS - REMIDIAL FOAM FILLED CAVITY": "SCIS - FORMALDEHYDE EXTRACTION", "SCIS - REMIDIAL FILLED CAVITY": "SCIS - GENERAL EXTRACTIONS", @@ -214,10 +214,10 @@ class SurveyPrice(): else: info.update({ "DOMNA JOB TYPE": "EMPTY CAVITY" - }) + }) else: info.update({ - "DOMNA JOB TYPE": "SOLAR" + "DOMNA JOB TYPE": "ECO4 PV" }) @@ -305,13 +305,13 @@ class SurveyPrice(): submission_data = self.merge_hub_spot_and_survey_information() final_list = [] for _, row in submission_data.iterrows(): - if "SOLAR" in row["DOMNA JOB TYPE"].upper(): - sheet_name = f"{self.domna_job_to_price_sheet_convertor[f'{self.installer[row["HUBSPOT_INSTALLER"]]} - {row["DOMNA JOB TYPE"]}'].upper()}" + if "PV" in row["HUBSPOT_WORK_TYPE"].upper(): + sheet_name = f"{self.domna_job_to_price_sheet_convertor[f'{self.installer[row["HUBSPOT_INSTALLER"]]} - {row["HUBSPOT_WORK_TYPE"]}'].upper()}" price_matrix = self.get_price_matrix(sheet_name) merged_row = pd.merge( row.to_frame().T, price_matrix, - left_on='DOMNA JOB TYPE', + left_on='HUBSPOT_WORK_TYPE', right_on='WORK TYPE', how='outer' )