Merge pull request #642 from Hestia-Homes/portfolio-diagnostics

Portfolio diagnostics
2026-07-27 23:35:01 +00:00 · 2026-01-06 20:58:51 +00:00 · 2026-01-06 20:58:51 +00:00 · e617d74f47
commit e617d74f47
parent 0dd179bbb1 608ff71d35
6 changed files with 276 additions and 88 deletions
--- a/backend/diagnostics/portfolio_diagnostics.py
+++ b/backend/diagnostics/portfolio_diagnostics.py
@ -0,0 +1,3 @@
+"""
+This script is set up to perform broad portfolio diagnostics to identify potential issues
+"""
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -0,0 +1,132 @@
+"""
+This script performs a deep dive into the various scenarios and checks fundamental things
+This includes:
+1) Do properties that should have a plan, have a plan? E.g. if the property is EPC D, and has a plan getting up to
+# EPC C, there should be a plan
+2) If the plan is fabric first, make sure they are actually fabric first
+"""
+import pandas as pd
+
+scenario_names = {
+    871: "EPC C, fabric first, no solid floor, ashp 3.0",
+    863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
+    862: "EPC B, No solid floor, ASHP COP 3.0",
+    861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
+    859: "EPC C, no solid floor, ashp 3.0",
+}
+
+scenario_sap_targets = {
+    871: 69,
+    863: 81,
+    862: 81,
+    861: 69,
+    859: 69,
+}
+
+problems = []
+for scenario_id, scenario_name in scenario_names.items():
+    # Read in the recommended measures
+    print("Reading")
+    df = pd.read_excel(
+        f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
+        f"{scenario_name}.xlsx"
+    )
+
+    # find properties that are below the scenario sap target, but have no recommended measures
+    df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
+    df["no_recommended_measures"] = df["sap_points"] == 0
+    df["zero_cost"] = df["total_retrofit_cost"] == 0
+    df["sap_points_above_zero"] = df["sap_points"] > 0
+
+    # Also look for zero cost and SAP points > 0
+
+    problematic_properties = df[
+        (df["below_scenario_target"] & df["no_recommended_measures"])
+    ].copy()
+
+    if scenario_sap_targets[scenario_id] == 81:
+        problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
+
+    zero_cost_above_zero_sap = df[
+        (df["sap_points_above_zero"] & df["zero_cost"])
+    ].copy()
+
+    # show all columns
+    # Source - https://stackoverflow.com/a
+    # Posted by YOLO, modified by community. See post 'Timeline' for change history
+    # Retrieved 2026-01-06, License - CC BY-SA 4.0
+
+    # pd.set_option('display.max_rows', 500)
+    # pd.set_option('display.max_columns', 500)
+    # pd.set_option('display.width', 1000)
+    # problematic_properties.head(len(problematic_properties))
+
+    print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
+    print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
+
+    problems.append(problematic_properties)
+    problems.append(zero_cost_above_zero_sap)
+
+    # plan_input = [
+    #     {
+    #         "uprn": 100022725126,
+    #         "address": "FLAT 5 Daveys Court",
+    #         "postcode": "WC2N 4BW"
+    #     }
+    # ]
+
+    # plan_input = [
+    #     {
+    #         "uprn": 100120966352,
+    #         "address": "FLAT 11 Kingsgate",
+    #         "postcode": "OX18 2BP"
+    #     }
+    # ]
+
+    plan_input = [
+        {
+            "uprn": 200003371857,
+            "postcode": "SE1 5SJ",
+            "address": "39 BUTTERMERE CLOSE",
+        }
+    ]
+
+all_problems = pd.concat(problems)
+all_problems = all_problems.drop_duplicates(subset=["uprn"])
+
+sal = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
+    "data.xlsx",
+    sheet_name="Standardised Asset List"
+)
+sal2 = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
+    "UPRNS.xlsx",
+    sheet_name="Standardised Asset List"
+)
+
+sal = pd.concat([sal, sal2])
+
+retry = sal[sal["epc_os_uprn"].isin(all_problems["uprn"])]
+
+# Store
+retry.to_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
+    "d_problematic_properties_to_review_20260106.xlsx",
+    sheet_name="Standardised Asset List",
+    index=False
+)
+
+# Delete associated plans
+# 1) Get the property IDs for these UPRNS, for this portfolio
+portfolio_id = 419
+uprns = retry
+
+# TODO: Delete all plans for these properties and re-build
+# Plan notes:
+# UPRN: 5870109770, property ID: 281244 - need to delete and re-build all scenarios
+# UPRN: 100022725126, property ID: 283781 - need to delete and re-build all scenarios
+
+
+# Bugs:
+12156800
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@ -167,9 +167,17 @@ class HeatingRecommender:

        hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters

-        hhr_suitable = hhr_suitable and (
-            "underfloor heating" not in self.property.main_heating["clean_description"]
-        )
+        # If the property has community heating heaters in place, we don't recommend HHRSH
+        has_community_heating = self.property.main_fuel["is_community"]
+
+        # If the property currently has electric underfloor heating, we allow this if there is elecric immersion
+        # hot water heating
+        underfloor_not_an_issue = True
+        if self.property.main_heating["has_electric_underfloor_heating"]:
+            if self.property.hotwater["heater_type"] != "electric immersion":
+                underfloor_not_an_issue = False
+
+        hhr_suitable = hhr_suitable and not has_community_heating and underfloor_not_an_issue

        # If the property has a ground source heat pump, or air source heat pump, we don't recommend HHRSH

--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -718,7 +718,8 @@ class Recommendations:
    ):

        # Handle the case of community schemes
-        if (heating_description == "Community scheme") or (hotwater_description == "Community scheme") and (
+        if (heating_description in ["Community scheme", 'Community scheme, plus solar']) or (
+            hotwater_description in ["Community scheme", 'Community scheme, plus solar']) and (
            "not community" not in main_fuel_description
        ):
            if main_fuel_description in ["mains gas (community)", "UNKNOWN"]:
@ -742,6 +743,18 @@ class Recommendations:
                    "heating_cop": 0.85,
                    "hotwater_cop": 0.85
                }
+
+            # Handling specific case
+            if main_fuel_description in ["To be used only when there is no heating/hot-water system"] and (
+                "electric heaters" in heating_description.lower()
+            ):
+                return {
+                    "heating_fuel_type": "Electricity",
+                    "hotwater_fuel_type": "Electricity",
+                    "heating_cop": 1,
+                    "hotwater_cop": 1
+                }
+
            logger.warning(
                "Unhandled community fuel."
                f"Fuel: {main_fuel_description}"
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@ -86,9 +86,17 @@ class WindowsRecommendations:

        # We scale the number of windows based on the proportion of existing glazing
        if self.property.data["multi-glaze-proportion"] != "":
-            n_windows_scalar = 1 - (
-                int(self.property.data["multi-glaze-proportion"]) / 100
-            )
+
+            if (self.property.windows["clean_description"] == "Some double glazing") and (
+                self.property.data["windows-energy-eff"] == "Very Poor") and (
+                self.property.data["multi-glaze-proportion"] == 100
+            ):
+                # In this case, we assume all of the dinwos need replacing
+                n_windows_scalar = 1
+            else:
+                n_windows_scalar = 1 - (
+                    int(self.property.data["multi-glaze-proportion"]) / 100
+                )
        else:
            n_windows_scalar = self.COVERAGE_MAP.get(
                self.property.windows["glazing_coverage"], 1
@ -97,6 +105,9 @@ class WindowsRecommendations:
        number_of_windows *= n_windows_scalar
        number_of_windows = np.ceil(number_of_windows)

+        # Handle edge case - prevent number of windows 0
+        number_of_windows = max(1, number_of_windows)
+
        # We then price the job based on the number of windows that there are
        cost_result = self.costs.window_glazing(
            number_of_windows=number_of_windows,
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -11,8 +11,21 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 404
-SCENARIOS = [829]
+PORTFOLIO_ID = 419  # Peabody
+SCENARIOS = [
+    871,  # EPC C - fabric first, no solid floor, ashp 3.0
+    863,  # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
+    862,  # EPC B - No solid floor, ASHP COP 3.0
+    861,  # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
+    859,  # EPC C - no solid floor, ashp 3.0
+]
+scenario_names = {
+    871: "EPC C, fabric first, no solid floor, ashp 3.0",
+    863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
+    862: "EPC B, No solid floor, ASHP COP 3.0",
+    861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
+    859: "EPC C, no solid floor, ashp 3.0",
+}


 def get_data(portfolio_id, scenario_ids):
@ -84,88 +97,96 @@ properties_df = pd.DataFrame(properties_data)
 plans_df = pd.DataFrame(plans_data)
 recommendations_df = pd.DataFrame(recommendations_data)

-recommended_measures_df = recommendations_df[
-    ["property_id", "measure_type", "estimated_cost", "default"]
-]
-recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
-recommended_measures_df = recommended_measures_df.drop(columns=["default"])
-
-post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
-post_install_sap = post_install_sap[post_install_sap["default"]]
-# Sum up the sap points by property id
-post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
-
-# Find dupes by property id and measure type
-dupes = recommended_measures_df.duplicated(
-    subset=["property_id", "measure_type"], keep=False
-)
-dupe_df = recommended_measures_df[dupes]
-
-if dupe_df.shape:
-    # Drop dupes - happened due to a funny bug
-    recommended_measures_df = recommended_measures_df.drop_duplicates(
-        subset=["property_id", "measure_type"], keep='first'
-    )
-
-recommendations_measures_pivot = recommended_measures_df.pivot(
-    index='property_id',
-    columns='measure_type',
-    values='estimated_cost'
-)
-recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
-
-# Total cost is the row sum, excluding the property_id column
-recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
-    columns=["property_id"]
-).sum(axis=1)
-
-df = properties_df[
-    [
-        "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
-        "heating", "windows",
-        "current_epc_rating",
-        "current_sap_points", "total_floor_area", "number_of_rooms",
-    ]
-].merge(
-    recommendations_measures_pivot, how="left", on="property_id"
-).merge(
-    post_install_sap, how="left", on="property_id"
-)
-
-df = df.drop(columns=["property_id"])
-df["sap_points"] = df["sap_points"].fillna(0)
-
-df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
-df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
-df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
-
-# We merge this back to the main dataframe, which will contain the bathrooms
 from utils.s3 import read_csv_from_s3, read_excel_from_s3

-# asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
-asset_list = read_excel_from_s3(
-    bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx",
-    header_row=0, sheet_name="Standardised Asset List"
-)
-asset_list = pd.DataFrame(asset_list)
-asset_list = asset_list.rename(
-    columns={
-        "postcode": "domna_postcode"
-    }
-)
-if "domna_full_address":
-    # For Peabody
-    asset_list["domna_full_address"] = asset_list["domna_address_1"]
+# asset_list = read_excel_from_s3(
+#     bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx",
+#     header_row=0, sheet_name="Standardised Asset List"
+# )

-asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
-asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
-df["uprn"] = df["uprn"].astype(str)
-asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
-asset_list = asset_list.merge(
-    df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
-    how="left",
-    on="uprn"
-)
+
+for scenario_id in SCENARIOS:
+    # Get recs for this scenario
+    recommended_measures_df = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][
+        ["property_id", "measure_type", "estimated_cost", "default"]
+    ]
+    recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
+    recommended_measures_df = recommended_measures_df.drop(columns=["default"])
+
+    post_install_sap = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][
+        ["property_id", "default", "sap_points"]]
+    post_install_sap = post_install_sap[post_install_sap["default"]]
+    # Sum up the sap points by property id
+    post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
+
+    # Find dupes by property id and measure type
+    dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
+    dupe_df = recommended_measures_df[dupes]
+
+    if dupe_df.shape:
+        # Drop dupes - happened due to a funny bug
+        recommended_measures_df = recommended_measures_df.drop_duplicates(
+            subset=["property_id", "measure_type"], keep='first'
+        )
+
+    recommendations_measures_pivot = recommended_measures_df.pivot(
+        index='property_id',
+        columns='measure_type',
+        values='estimated_cost'
+    )
+    recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
+
+    # Total cost is the row sum, excluding the property_id column
+    recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
+        columns=["property_id"]
+    ).sum(axis=1)
+
+    df = properties_df[
+        [
+            "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
+            "heating", "windows",
+            "current_epc_rating",
+            "current_sap_points", "total_floor_area", "number_of_rooms",
+        ]
+    ].merge(
+        recommendations_measures_pivot, how="left", on="property_id"
+    ).merge(
+        post_install_sap, how="left", on="property_id"
+    )
+
+    df = df.drop(columns=["property_id"])
+    df["sap_points"] = df["sap_points"].fillna(0)
+
+    df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
+    df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
+    df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
+    df["uprn"] = df["uprn"].astype(str)
+
+    # Create excel to store to
+    filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+                f"Project/{scenario_names[scenario_id]}.xlsx")
+    with pd.ExcelWriter(filename) as writer:
+        df.to_excel(writer, sheet_name="properties", index=False)
+
+
+# asset_list = pd.DataFrame(asset_list)
+# asset_list = asset_list.rename(
+#     columns={
+#         "postcode": "domna_postcode"
+#     }
+# )
+# if "domna_full_address":
+#     # For Peabody
+#     asset_list["domna_full_address"] = asset_list["domna_address_1"]
+#
+# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
+# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
+# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
+# asset_list = asset_list.merge(
+#     df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
+#     how="left",
+#     on="uprn"
+# )


 # Get conservation area data from property details spatial. based on the UPRNs