From ba521fb004bd40eba5cf9d06f9f5c375108bfc2a Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte@domna.homes>
Date: Wed, 23 Apr 2025 14:35:45 +0000
Subject: [PATCH] solar now works

---
 .vscode/settings.json          |   5 +-
 etl/hubSpotClient/hubspot.py   |   2 +-
 etl/hubspot_to_invoice.py      |   4 +-
 etl/imagefilenamechcker.py     | 123 ++++++++++++++++++---------------
 etl/surveyPrice/surveyPrice.py |  14 ++--
 5 files changed, 84 insertions(+), 64 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index e8c08c6..9868a02 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,6 +1,9 @@
 {
     "jupyter.interactiveWindow.textEditor.executeSelection": true,
-    "python.REPL.sendToNativeREPL": true
+    "python.REPL.sendToNativeREPL": true,
+    "notebook.output.scrolling": true,
+    "notebook.output.textLineLimit": 0
+
 
     // Hot reload setting that needs to be in user settings
     // "jupyter.runStartupCommands": [
diff --git a/etl/hubSpotClient/hubspot.py b/etl/hubSpotClient/hubspot.py
index 97dee61..90c551b 100644
--- a/etl/hubSpotClient/hubspot.py
+++ b/etl/hubSpotClient/hubspot.py
@@ -152,7 +152,7 @@ class HubSpotClient():
                 deal_id= deal.properties["hs_object_id"],
                 deal_name=deal.properties["dealname"],
                 work_type=deal.properties["work_type"],
-                needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents") else False,
+                needs_trickle_ventilation=True if deal.properties.get("property_needs_trickle_vents", "NO").upper() == "YES" else False,
                 post_sap_score=int(deal.properties["domna_survey_post_sap"]),
                 existing_wall_insulation=deal.properties.get("existing_wall_insulation") if deal.properties.get("existing_wall_insulation") else "None",
                 no_of_wet_rooms=int(deal.properties["number_of_wet_rooms_needing_ventilation"]),
diff --git a/etl/hubspot_to_invoice.py b/etl/hubspot_to_invoice.py
index a106b4b..7413e49 100644
--- a/etl/hubspot_to_invoice.py
+++ b/etl/hubspot_to_invoice.py
@@ -69,4 +69,6 @@ sp.move_deals_to_completed(deal_ids)
 
 # Compare value with what I should get and in the deem score. Keep tabs below so I can check easily
 
-# Change w.c. date to a weird one to speed up automation
\ No newline at end of file
+# Change w.c. date to a weird one to speed up automation
+
+value = df[df["SHAREPOINT ADDRESS"] == "29 Lower King, BRAINTREE, CM7 3XZ"]
\ No newline at end of file
diff --git a/etl/imagefilenamechcker.py b/etl/imagefilenamechcker.py
index 4e32855..f95666b 100644
--- a/etl/imagefilenamechcker.py
+++ b/etl/imagefilenamechcker.py
@@ -8,72 +8,87 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller, WEEK_COM
 import pandas as pd
 import hashlib
 
-def calculate_sha256(bytes_io):
-    bytes_io.seek(0)  # Make sure we're at the start
-    data = bytes_io.read()
-    return hashlib.sha256(data).hexdigest()
-
-south_coast_scraper = SharePointScraper(SharePointInstaller.JJC)
+def get_photos_name(installer):
+    south_coast_scraper = SharePointScraper(installer)
+    folders = south_coast_scraper.get_folders_in_path('/')
 
 
-folders = south_coast_scraper.get_folders_in_path('/')
+    list_of_file_names = []
+    for folder in folders['value']:
+        if "Khalim" in folder["name"]:
+            continue
+        elif ".Training" in folder["name"]:
+            continue
+        if 'file' not in folder:
+            list_of_file_names.append("/" + folder["name"])
+
+    list_of_dates = []
+    for i, folder in enumerate(list_of_file_names):
+        print(f"getting dates {i}")
+        dates = south_coast_scraper.get_folders_in_path(folder)
+        for date in dates['value']:
+            if 'file' not in date:
+                list_of_dates.append(folder + "/" + date["name"])
 
 
-list_of_file_names = []
-for folder in folders['value']:
-    if "Khalim" in folder["name"]:
-        continue
-    elif ".Training" in folder["name"]:
-        continue
-    if 'file' not in folder:
-        list_of_file_names.append("/" + folder["name"])
+    list_of_housing_associations = []
+    for i, folder in enumerate(list_of_dates):
+        print(f"getting housing assoication {i}")
+        house_ass = south_coast_scraper.get_folders_in_path(folder)
+        for house in house_ass['value']:
+            if 'file' not in house:
+                list_of_housing_associations.append(folder + "/" + house["name"])
+    list_of_address = []
 
-list_of_dates = []
-for folder in list_of_file_names:
-    dates = south_coast_scraper.get_folders_in_path(folder)
-    for date in dates['value']:
-        if 'file' not in date:
-            list_of_dates.append(folder + "/" + date["name"])
+    for i, folder in enumerate(list_of_housing_associations):
+        print(f"getting address {i}")
+        address = south_coast_scraper.get_folders_in_path(folder)
+        for add in address['value']:
+            if 'file' not in add:
+                list_of_address.append(folder + "/" + add['name'])
 
-print(list_of_dates)
+    list_of_pictures = []
 
-list_of_housing_associations = []
-for folder in list_of_dates:
-    house_ass = south_coast_scraper.get_folders_in_path(folder)
-    for house in house_ass['value']:
-        if 'file' not in house:
-            list_of_housing_associations.append(folder + "/" + house["name"])
+    for i, folder in enumerate(list_of_address):
+        print(f"getting pictures {i}")
+        pictures = south_coast_scraper.get_folders_in_path(folder)
+        for pic in pictures['value']:
+            if 'file' not in pic:
+                list_of_pictures.append(folder + "/" + pic['name'])
 
-list_of_address = []
+    print(list_of_pictures)
 
-for folder in list_of_housing_associations:
-    address = south_coast_scraper.get_folders_in_path(folder)
-    for add in address['value']:
-        if 'file' not in add:
-            list_of_address.append(folder + "/" + add['name'])
+    final_list = []
+    for i,files in enumerate(list_of_pictures):
+        print(f"for finali list {i}")
 
-list_of_pictures = []
+        content = south_coast_scraper.get_folders_in_path(files)
+        parts = files.split("/")
+        date = None
+        for part in parts:
+            if part.startswith("W.C."):
+                date = part  # Output: W.C. 17.03.2025
+        for file in content['value']:
+            if 'file' in file:
+                final_list.append({
+                    "Date": date,
+                    "path": file,
+                    "Photo Name": file['name'],
+                })
 
-for folder in list_of_address:
-    pictures = south_coast_scraper.get_folders_in_path(folder)
-    for pic in pictures['value']:
-        if 'file' not in pic:
-            list_of_pictures.append(folder + "/" + pic['name'])
+    final_df = pd.DataFrame(final_list)
+    return final_df
 
-print(list_of_pictures)
+jjc_df = get_photos_name(SharePointInstaller.JJC)
+scis_df = get_photos_name(SharePointInstaller.SOUTH_COAST_INSULATION)
 
-final_list = []
-for files in list_of_pictures:
-    content = south_coast_scraper.get_folders_in_path(files)
-    for file in content['value']:
-        if 'file' in file:
-            url = file['@microsoft.graph.downloadUrl']
-            print(f"Downloading {files}/{file['name']}")
-            final_list.append({
-                "Directories": files,
-                "Photo Name": file['name'],
-            })
+all_df = [jjc_df, scis_df]
 
-final_df = pd.DataFrame(final_list)
+final_df = pd.concat(all_df, ignore_index=True)
+final_df
 
-final_df.to_csv("jjc.csv")
\ No newline at end of file
+final_df.to_csv("photos_name.csv")
+
+duplicate_names = final_df[final_df.duplicated('Photo Name', keep=False)]
+df = final_df
+dupe_names_df = df[df.duplicated('Photo Name', keep=False)].sort_values('Photo Name')
diff --git a/etl/surveyPrice/surveyPrice.py b/etl/surveyPrice/surveyPrice.py
index 8fe1f64..c58dd8b 100644
--- a/etl/surveyPrice/surveyPrice.py
+++ b/etl/surveyPrice/surveyPrice.py
@@ -36,11 +36,11 @@ class SurveyPrice():
         }
 
         self.domna_job_to_price_sheet_convertor = {
-            "JJC - SOLAR": "JJC - SOLAR",
+            "JJC - ECO4 PV": "JJC - SOLAR",
             "JJC - EMPTY CAVITY": "JJC - EMPTIES",
             "JJC - REMIDIAL FOAM FILLED CAVITY": "JJC - FORMALDEHYDE EXTRACTION",
             "JJC - REMIDIAL FILLED CAVITY": "JJC - GENERAL EXTRACTIONS",
-            "SCIS - SOLAR": "SCIS - SOLAR",
+            "SCIS - ECO4 PV": "SCIS - SOLAR",
             "SCIS - EMPTY CAVITY": "SCIS - EMPTIES",
             "SCIS - REMIDIAL FOAM FILLED CAVITY": "SCIS - FORMALDEHYDE EXTRACTION",
             "SCIS - REMIDIAL FILLED CAVITY": "SCIS - GENERAL EXTRACTIONS",
@@ -214,10 +214,10 @@ class SurveyPrice():
                 else:
                     info.update({
                         "DOMNA JOB TYPE": "EMPTY CAVITY"
-                    })
+                    })        
             else:
                 info.update({
-                    "DOMNA JOB TYPE": "SOLAR"
+                    "DOMNA JOB TYPE": "ECO4 PV"
                 })
 
 
@@ -305,13 +305,13 @@ class SurveyPrice():
         submission_data = self.merge_hub_spot_and_survey_information()
         final_list = []
         for _, row in submission_data.iterrows():
-            if "SOLAR" in row["DOMNA JOB TYPE"].upper():
-                sheet_name = f"{self.domna_job_to_price_sheet_convertor[f'{self.installer[row["HUBSPOT_INSTALLER"]]} - {row["DOMNA JOB TYPE"]}'].upper()}"
+            if "PV" in row["HUBSPOT_WORK_TYPE"].upper():
+                sheet_name = f"{self.domna_job_to_price_sheet_convertor[f'{self.installer[row["HUBSPOT_INSTALLER"]]} - {row["HUBSPOT_WORK_TYPE"]}'].upper()}"
                 price_matrix = self.get_price_matrix(sheet_name)
                 merged_row = pd.merge(
                     row.to_frame().T,
                     price_matrix,
-                    left_on='DOMNA JOB TYPE',
+                    left_on='HUBSPOT_WORK_TYPE',
                     right_on='WORK TYPE',
                     how='outer'
                 )