Merge pull request #642 from Hestia-Homes/portfolio-diagnostics

Portfolio diagnostics
This commit is contained in:
KhalimCK 2026-01-06 20:58:51 +00:00 committed by GitHub
commit e617d74f47
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 276 additions and 88 deletions

View file

@ -0,0 +1,3 @@
"""
This script is set up to perform broad portfolio diagnostics to identify potential issues
"""

View file

@ -0,0 +1,132 @@
"""
This script performs a deep dive into the various scenarios and checks fundamental things
This includes:
1) Do properties that should have a plan, have a plan? E.g. if the property is EPC D, and has a plan getting up to
# EPC C, there should be a plan
2) If the plan is fabric first, make sure they are actually fabric first
"""
import pandas as pd
scenario_names = {
871: "EPC C, fabric first, no solid floor, ashp 3.0",
863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
862: "EPC B, No solid floor, ASHP COP 3.0",
861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
859: "EPC C, no solid floor, ashp 3.0",
}
scenario_sap_targets = {
871: 69,
863: 81,
862: 81,
861: 69,
859: 69,
}
problems = []
for scenario_id, scenario_name in scenario_names.items():
# Read in the recommended measures
print("Reading")
df = pd.read_excel(
f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
f"{scenario_name}.xlsx"
)
# find properties that are below the scenario sap target, but have no recommended measures
df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
df["no_recommended_measures"] = df["sap_points"] == 0
df["zero_cost"] = df["total_retrofit_cost"] == 0
df["sap_points_above_zero"] = df["sap_points"] > 0
# Also look for zero cost and SAP points > 0
problematic_properties = df[
(df["below_scenario_target"] & df["no_recommended_measures"])
].copy()
if scenario_sap_targets[scenario_id] == 81:
problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
zero_cost_above_zero_sap = df[
(df["sap_points_above_zero"] & df["zero_cost"])
].copy()
# show all columns
# Source - https://stackoverflow.com/a
# Posted by YOLO, modified by community. See post 'Timeline' for change history
# Retrieved 2026-01-06, License - CC BY-SA 4.0
# pd.set_option('display.max_rows', 500)
# pd.set_option('display.max_columns', 500)
# pd.set_option('display.width', 1000)
# problematic_properties.head(len(problematic_properties))
print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
problems.append(problematic_properties)
problems.append(zero_cost_above_zero_sap)
# plan_input = [
# {
# "uprn": 100022725126,
# "address": "FLAT 5 Daveys Court",
# "postcode": "WC2N 4BW"
# }
# ]
# plan_input = [
# {
# "uprn": 100120966352,
# "address": "FLAT 11 Kingsgate",
# "postcode": "OX18 2BP"
# }
# ]
plan_input = [
{
"uprn": 200003371857,
"postcode": "SE1 5SJ",
"address": "39 BUTTERMERE CLOSE",
}
]
all_problems = pd.concat(problems)
all_problems = all_problems.drop_duplicates(subset=["uprn"])
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
)
sal2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
)
sal = pd.concat([sal, sal2])
retry = sal[sal["epc_os_uprn"].isin(all_problems["uprn"])]
# Store
retry.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_problematic_properties_to_review_20260106.xlsx",
sheet_name="Standardised Asset List",
index=False
)
# Delete associated plans
# 1) Get the property IDs for these UPRNS, for this portfolio
portfolio_id = 419
uprns = retry
# TODO: Delete all plans for these properties and re-build
# Plan notes:
# UPRN: 5870109770, property ID: 281244 - need to delete and re-build all scenarios
# UPRN: 100022725126, property ID: 283781 - need to delete and re-build all scenarios
# Bugs:
12156800

View file

@ -167,9 +167,17 @@ class HeatingRecommender:
hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters
hhr_suitable = hhr_suitable and (
"underfloor heating" not in self.property.main_heating["clean_description"]
)
# If the property has community heating heaters in place, we don't recommend HHRSH
has_community_heating = self.property.main_fuel["is_community"]
# If the property currently has electric underfloor heating, we allow this if there is elecric immersion
# hot water heating
underfloor_not_an_issue = True
if self.property.main_heating["has_electric_underfloor_heating"]:
if self.property.hotwater["heater_type"] != "electric immersion":
underfloor_not_an_issue = False
hhr_suitable = hhr_suitable and not has_community_heating and underfloor_not_an_issue
# If the property has a ground source heat pump, or air source heat pump, we don't recommend HHRSH

View file

@ -718,7 +718,8 @@ class Recommendations:
):
# Handle the case of community schemes
if (heating_description == "Community scheme") or (hotwater_description == "Community scheme") and (
if (heating_description in ["Community scheme", 'Community scheme, plus solar']) or (
hotwater_description in ["Community scheme", 'Community scheme, plus solar']) and (
"not community" not in main_fuel_description
):
if main_fuel_description in ["mains gas (community)", "UNKNOWN"]:
@ -742,6 +743,18 @@ class Recommendations:
"heating_cop": 0.85,
"hotwater_cop": 0.85
}
# Handling specific case
if main_fuel_description in ["To be used only when there is no heating/hot-water system"] and (
"electric heaters" in heating_description.lower()
):
return {
"heating_fuel_type": "Electricity",
"hotwater_fuel_type": "Electricity",
"heating_cop": 1,
"hotwater_cop": 1
}
logger.warning(
"Unhandled community fuel."
f"Fuel: {main_fuel_description}"

View file

@ -86,9 +86,17 @@ class WindowsRecommendations:
# We scale the number of windows based on the proportion of existing glazing
if self.property.data["multi-glaze-proportion"] != "":
n_windows_scalar = 1 - (
int(self.property.data["multi-glaze-proportion"]) / 100
)
if (self.property.windows["clean_description"] == "Some double glazing") and (
self.property.data["windows-energy-eff"] == "Very Poor") and (
self.property.data["multi-glaze-proportion"] == 100
):
# In this case, we assume all of the dinwos need replacing
n_windows_scalar = 1
else:
n_windows_scalar = 1 - (
int(self.property.data["multi-glaze-proportion"]) / 100
)
else:
n_windows_scalar = self.COVERAGE_MAP.get(
self.property.windows["glazing_coverage"], 1
@ -97,6 +105,9 @@ class WindowsRecommendations:
number_of_windows *= n_windows_scalar
number_of_windows = np.ceil(number_of_windows)
# Handle edge case - prevent number of windows 0
number_of_windows = max(1, number_of_windows)
# We then price the job based on the number of windows that there are
cost_result = self.costs.window_glazing(
number_of_windows=number_of_windows,

View file

@ -11,8 +11,21 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 404
SCENARIOS = [829]
PORTFOLIO_ID = 419 # Peabody
SCENARIOS = [
871, # EPC C - fabric first, no solid floor, ashp 3.0
863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
862, # EPC B - No solid floor, ASHP COP 3.0
861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
859, # EPC C - no solid floor, ashp 3.0
]
scenario_names = {
871: "EPC C, fabric first, no solid floor, ashp 3.0",
863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
862: "EPC B, No solid floor, ASHP COP 3.0",
861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
859: "EPC C, no solid floor, ashp 3.0",
}
def get_data(portfolio_id, scenario_ids):
@ -84,88 +97,96 @@ properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
recommended_measures_df = recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
# Find dupes by property id and measure type
dupes = recommended_measures_df.duplicated(
subset=["property_id", "measure_type"], keep=False
)
dupe_df = recommended_measures_df[dupes]
if dupe_df.shape:
# Drop dupes - happened due to a funny bug
recommended_measures_df = recommended_measures_df.drop_duplicates(
subset=["property_id", "measure_type"], keep='first'
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
# Total cost is the row sum, excluding the property_id column
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
columns=["property_id"]
).sum(axis=1)
df = properties_df[
[
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
"heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
)
df = df.drop(columns=["property_id"])
df["sap_points"] = df["sap_points"].fillna(0)
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
# We merge this back to the main dataframe, which will contain the bathrooms
from utils.s3 import read_csv_from_s3, read_excel_from_s3
# asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
asset_list = read_excel_from_s3(
bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx",
header_row=0, sheet_name="Standardised Asset List"
)
asset_list = pd.DataFrame(asset_list)
asset_list = asset_list.rename(
columns={
"postcode": "domna_postcode"
}
)
if "domna_full_address":
# For Peabody
asset_list["domna_full_address"] = asset_list["domna_address_1"]
# asset_list = read_excel_from_s3(
# bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx",
# header_row=0, sheet_name="Standardised Asset List"
# )
asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
df["uprn"] = df["uprn"].astype(str)
asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
asset_list = asset_list.merge(
df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
how="left",
on="uprn"
)
for scenario_id in SCENARIOS:
# Get recs for this scenario
recommended_measures_df = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[recommendations_df["Scenario ID"] == scenario_id][
["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
# Find dupes by property id and measure type
dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
dupe_df = recommended_measures_df[dupes]
if dupe_df.shape:
# Drop dupes - happened due to a funny bug
recommended_measures_df = recommended_measures_df.drop_duplicates(
subset=["property_id", "measure_type"], keep='first'
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
# Total cost is the row sum, excluding the property_id column
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
columns=["property_id"]
).sum(axis=1)
df = properties_df[
[
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
"heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
)
df = df.drop(columns=["property_id"])
df["sap_points"] = df["sap_points"].fillna(0)
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
df["uprn"] = df["uprn"].astype(str)
# Create excel to store to
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
f"Project/{scenario_names[scenario_id]}.xlsx")
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)
# asset_list = pd.DataFrame(asset_list)
# asset_list = asset_list.rename(
# columns={
# "postcode": "domna_postcode"
# }
# )
# if "domna_full_address":
# # For Peabody
# asset_list["domna_full_address"] = asset_list["domna_address_1"]
#
# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
# asset_list = asset_list.merge(
# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
# how="left",
# on="uprn"
# )
# Get conservation area data from property details spatial. based on the UPRNs