From 2040c2a957d79fc86c8cf9241cdd8f7547da0f13 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 30 Jun 2025 12:14:49 +0100
Subject: [PATCH] updated the prepare for hubspot code

---
 .idea/Model.iml                           |   2 +-
 .idea/misc.xml                            |   2 +-
 asset_list/AssetList.py                   |   5 +
 asset_list/abs_estimates.py               | 112 +++++++++++++++++++---
 asset_list/hubspot/prepare_for_hubspot.py |  17 ++--
 5 files changed, 112 insertions(+), 26 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index c6561970..09f2e496 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..fb10c6b0 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 3f5b99cb..9ae05f05 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -2375,6 +2375,11 @@ class AssetList:
         none_details = [x for x in details_colnames if x is None]
         details_colnames = [x for x in details_colnames if x is not None]
 
+        if local_filepath is None:
+            # Create an empty DataFrame based on the fields in self.contact_detail_fields
+            self.contact_details = pd.DataFrame(columns=list(self.contact_detail_fields.keys()))
+            return
+
         contact_details = pd.read_excel(
             local_filepath, sheet_name=sheet_name
         )[[self.contact_detail_fields["landlord_property_id"]] + details_colnames]
diff --git a/asset_list/abs_estimates.py b/asset_list/abs_estimates.py
index ee85973c..58adcca6 100644
--- a/asset_list/abs_estimates.py
+++ b/asset_list/abs_estimates.py
@@ -14,8 +14,8 @@ load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 
 asset_list = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Thrive Programme - reconciled.xlsx",
-    sheet_name="Cavity properties - for review"
+    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Livewest South-West - Standardised V2.xlsx",
+    sheet_name="Cavity Route (Insta Review)"
 )
 
 abs_matrix = pd.read_csv(
@@ -29,51 +29,133 @@ pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
 
 # We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for
 # cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate.
-cavity_route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
+route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
     columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"}
 )
-cavity_route["address"] = cavity_route["address"].astype(str)
+route["address"] = route["address"].astype(str)
 
 asset_list_epc_client = AssetListEpcData(
-    asset_list=cavity_route,
+    asset_list=route,
     epc_auth_token=EPC_AUTH_TOKEN
 )
 
 asset_list_epc_client.get_data()
 asset_list_epc_client.get_non_invasive_recommendations()
 
-cwi_sap_points = []
+solar_sap_points = []
 for r in asset_list_epc_client.non_invasive_recommendations:
     if not r.get("recommendations"):
         continue
-    cwi_recommendations = [
-        x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
+    solar_recommendations = [
+        x for x in r["recommendations"] if "solar_pv" in x["type"]
     ]
-    if cwi_recommendations:
-        cwi_recommendations = cwi_recommendations[0]
+    if solar_recommendations:
+        solar_recommendations = solar_recommendations[0]
     else:
         continue
 
     address = r["address"]
     postcode = r["postcode"]
 
-    cwi_sap_points.append(
+    solar_sap_points.append(
         {
             "address": address,
             "postcode": postcode,
-            "sap_points": cwi_recommendations["sap_points"]
+            "sap_points": solar_recommendations["sap_points"]
         }
     )
 
-cwi_sap_points = pd.DataFrame(cwi_sap_points)
+solar_sap_points = pd.DataFrame(solar_sap_points)
+solar_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
 # Store the sap points in the cavity route to csv
 # cwi_sap_points.to_csv(
 #     "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv",
 #     index=False
 # )
+
+avg_solar_points_by_postcode = solar_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
+avg_solar_points = solar_sap_points["sap_points"].median()
+asset_list["domna_address_1"] = asset_list["domna_address_1"].astype(str)
+asset_list = asset_list.merge(
+    solar_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
+).drop(
+    columns=["address", "postcode"]
+)
+
+# Fill the sap points with the average cwi points
+asset_list = asset_list.merge(
+    avg_solar_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
+    how="left", on=["domna_postcode"], suffixes=("", "_avg")
+)
+asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
+asset_list.drop(columns=["sap_points_avg"], inplace=True)
+
+asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_solar_points)
+asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
+asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
+asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
+asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
+
+asset_list["ending_half_band"] = np.where(
+    (asset_list["post_works_epc"] == asset_list["epc_rating_on_register"]),
+    "Low_C",
+    asset_list["ending_half_band"]
+)
+# Realistically, we'll take the properties to a low C at worst
+asset_list["ending_half_band"] = np.where(
+    (asset_list["post_works_sap"] < 69),
+    "Low_C",
+    asset_list["ending_half_band"]
+)
+
+asset_list = asset_list.merge(
+    abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
+    right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
+)
+asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
+
+asset_list = asset_list.rename(
+    columns={"Cost Savings": "funding_abs"}
+)
+
+print(asset_list["domna_property_id"].duplicated().sum())
+
+# Store this data
+asset_list.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_solar_abs_estimates-solar.csv",
+    index=False
+)
+
+# Cavity process!
+# cwi_sap_points = []
+# for r in asset_list_epc_client.non_invasive_recommendations:
+#     if not r.get("recommendations"):
+#         continue
+#     cwi_recommendations = [
+#         x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
+#     ]
+#     if cwi_recommendations:
+#         cwi_recommendations = cwi_recommendations[0]
+#     else:
+#         continue
+#
+#     address = r["address"]
+#     postcode = r["postcode"]
+#
+#     cwi_sap_points.append(
+#         {
+#             "address": address,
+#             "postcode": postcode,
+#             "sap_points": cwi_recommendations["sap_points"]
+#         }
+#     )
+#
+# cwi_sap_points = pd.DataFrame(cwi_sap_points)
 # cwi_sap_points = pd.read_csv(
 #     "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv"
 # )
+# cwi_sap_points.drop_duplicates(subset=["address", "postcode"], inplace=True)
 avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
 avg_cwi_points = cwi_sap_points["sap_points"].median()
 asset_list = asset_list.merge(
@@ -138,8 +220,10 @@ asset_list["funding_abs"] = np.where(
     asset_list["Cost Savings"]
 )
 
+asset_list["domna_property_id"].duplicated().sum()
+
 # Store this data
 asset_list.to_csv(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/thrive_abs_estimates.csv",
+    "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/livewest_sw_abs_estimates.csv",
     index=False
 )
diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py
index eed6d7e7..6c8d9499 100644
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@@ -19,17 +19,16 @@ def app():
 
     # inputs:
     reconcile_programme = False  # If True, the hubspot upload will include all properties with a project code
-    customer_domain = "https://sandwell.gov.uk"
-    installer_name = "J & J CRUMP"
+    customer_domain = "https://medway.gov.uk"
+    installer_name = "SGEC"
     asset_list_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
-        "Standardised.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway/Hubspot/Reviewed programme - 2025-05-27.xlsx"
     )
-    asset_list_sheet_name = "Proposed Program"
-    asset_list_header = 1
+    asset_list_sheet_name = "Finalised Route"
+    asset_list_header = 0
 
     contact_details_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
+        None
     )
     contacts_sheet_name = "Sheet1"
     contacts_landlord_property_id = "landlord_property_id"
@@ -41,9 +40,7 @@ def app():
     contacts_firstname_column = "firstname"
     contacts_lastname_column = "lastname"
 
-    existing_programme_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv"
-    )
+    existing_programme_filepath = None
 
     asset_list = AssetList.load_standardised_asset_list(
         asset_list_filepath, asset_list_sheet_name, asset_list_header