minor peabody output work

This commit is contained in:
Khalim Conn-Kowlessar 2026-01-12 13:51:28 +00:00
parent 3809066cbf
commit bae3e13e21
3 changed files with 317 additions and 20 deletions

View file

@ -1,8 +1,10 @@
import pandas as pd
df = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Parity Data 08012026.xlsx"
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
"08012026.xlsx"
)
df["wall_combined"] = df["Wall Construction"] + "+" + df["Wall Insulation"].fillna("Unknown Insulation")
df['SAP Score'].mean()
@ -18,4 +20,72 @@ df["SAP Band"].value_counts(normalize=True)
z = df[df["SAP Band"] != df["Lodged EPC Band"]]
agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count")
zz = z[z["Lodged EPC Band"] == "A"]
recommendations_epc_c = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, ashp 3.0 - corrected.xlsx"
)
recommendations_epc_c["uprn"] = recommendations_epc_c["uprn"].astype(int).astype(str)
combined = recommendations_epc_c.merge(
df,
left_on="uprn",
right_on="UPRN",
suffixes=("_rec", "_sal")
)
combined = combined[["uprn", "SAP Score", "current_sap_points", "walls", "wall_combined"]]
combined[combined["SAP Score"] < 69]["current_epc_rating"].value_counts()
combined[combined["SAP Score"] < 69]["SAP Band"].value_counts()
combined[combined["SAP Score"] < 69].shape
combined[combined["current_sap_points"] < 69]
combined["SAP Band"].value_counts()
# Our Cs
combined_cs = combined[combined["SAP Score"] < 69]
combined_cs["SAP Band"].value_counts()
# Their C and below
compare = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69]
packages = recommendations_epc_c[recommendations_epc_c["total_retrofit_cost"] > 0]
packages["current_epc_rating"].value_counts()
# TODO: 612 units
23219 - 612
errors = recommendations_epc_c[
(recommendations_epc_c["current_sap_points"] >= 69) &
(recommendations_epc_c["total_retrofit_cost"] > 0)
]
errors["total_retrofit_cost"].sum()
below_epc_c = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69]
below_epc_c_compare = below_epc_c.merge(
df,
left_on="uprn",
right_on="UPRN",
suffixes=("_rec", "_sal")
)
eg1 = below_epc_c_compare[below_epc_c_compare["SAP Band"] == "C"].copy()
eg1["wall_combined"].value_counts()
eg1_counts = eg1.groupby(["walls", "wall_combined"]).size().reset_index(name="count")
eg1_counts = eg1_counts.sort_values("count", ascending=False)
externally_insulated = eg1[
(eg1["wall_combined"] == "Solid Brick+External") &
pd.isnull(eg1["internal_wall_insulation"])
]
externally_insulated[externally_insulated.index == 823]["uprn"]
recommendations_epc_c[
(recommendations_epc_c["current_sap_points"] < 69) &
(recommendations_epc_c["current_sap_points"] > 68)
].shape
recommendations_epc_c[recommendations_epc_c["wall_combined"] == ""]

View file

@ -0,0 +1,236 @@
import pandas as pd
epc_c_recommendations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, ashp 3.0 - corrected.xlsx"
)
epc_b_recommendations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
"solid floor, ashp 3.0 - corrected.xlsx"
)
epc_c_movers = epc_b_recommendations[
epc_b_recommendations["current_epc_rating"] == "Epc.C"
]
epc_c_movers["property_type"].value_counts()
house_epc_c_movers = epc_c_movers[
epc_c_movers["property_type"] == "House"
]
house_epc_c_movers_with_solar = house_epc_c_movers[
~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
]
house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
]
flat_epc_c_movers = epc_c_movers[
epc_c_movers["property_type"] == "Flat"
]
epc_c_recommendations["sap_points"].mean()
epc_c_recommendations["sap_points"].mean()
measure_cols = [
"air_source_heat_pump",
"boiler_upgrade",
"cavity_wall_insulation",
"double_glazing",
"external_wall_insulation",
"flat_roof_insulation",
"high_heat_retention_storage_heaters",
"internal_wall_insulation",
"loft_insulation",
"low_energy_lighting",
"mechanical_ventilation",
"room_roof_insulation",
"roomstat_programmer_trvs",
"sealing_open_fireplace",
"secondary_glazing",
"secondary_heating",
"solar_pv",
"solar_pv_with_battery",
"suspended_floor_insulation",
"time_temperature_zone_control",
]
epc_c_melted = (
epc_c_recommendations
.melt(
id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
value_vars=measure_cols,
var_name="measure_type",
value_name="value",
)
.dropna(subset=["value"])
)
epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
epc_b_melted = (
epc_b_recommendations
.melt(
id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
value_vars=measure_cols,
var_name="measure_type",
value_name="value",
)
.dropna(subset=["value"])
)
epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
measures_compared = epc_c_measures.merge(
epc_b_measures,
left_on="measure_type",
right_on="measure_type",
suffixes=("_epc_c", "_epc_b"),
)
epc_c_retrofits = epc_c_recommendations[
epc_c_recommendations["total_retrofit_cost"] > 0
]
epc_b_retrofits = epc_b_recommendations[
epc_b_recommendations["total_retrofit_cost"] > 0
]
epc_c_retrofits["sap_points"].mean()
epc_b_retrofits["sap_points"].mean()
properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
properties_in_both["total_retrofit_cost_epc_c"].mean()
properties_in_both["sap_points_epc_c"].mean()
properties_in_both["total_retrofit_cost_epc_b"].mean()
properties_in_both["sap_points_epc_b"].mean()
# Solar PV savings - we need the amount of solar PV bill savings
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from collections import defaultdict
PORTFOLIO_ID = 434 # Peabody
SCENARIOS = [
904,
905
]
scenario_names = {
904: "EPC C - no solid floor, ashp 3.0",
905: "EPC B - no solid floor, ashp 3.0",
}
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# --------------------
# Properties
# --------------------
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id
).all()
properties_data = [
{
**{col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns},
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns},
}
for p in properties_query
]
# --------------------
# Plans
# --------------------
plans_query = session.query(Plan).filter(
Plan.scenario_id.in_(scenario_ids)
).all()
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
plan_ids = [p["id"] for p in plans_data]
# --------------------
# Recommendations (NO materials yet)
# --------------------
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan,
Plan.id == PlanRecommendations.plan_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
recommendations_data = [
{
**{col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns},
"scenario_id": r.scenario_id,
"materials": [] # placeholder
}
for r in recommendations_query
]
recommendation_ids = [r["id"] for r in recommendations_data]
# --------------------
# Recommendation materials (SEPARATE QUERY)
# --------------------
materials_query = session.query(
RecommendationMaterials
).filter(
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
).all()
# Group materials by recommendation_id
materials_by_recommendation = defaultdict(list)
for m in materials_query:
materials_by_recommendation[m.recommendation_id].append({
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
})
# Attach materials safely (no filtering side effects)
for r in recommendations_data:
r["materials"] = materials_by_recommendation.get(r["id"], [])
session.close()
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
)
recommendations_df = pd.DataFrame(recommendations_data)
solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()

View file

@ -14,22 +14,14 @@ from collections import defaultdict
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 419 # Peabody
PORTFOLIO_ID = 434 # Peabody
SCENARIOS = [
871, # EPC C - fabric first, no solid floor, ashp 3.0
863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
862, # EPC B - No solid floor, ASHP COP 3.0
861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
859, # EPC C - no solid floor, ashp 3.0
885, # EPC B - fabric first, no solid floor, ashp 3.0
904,
905
]
scenario_names = {
871: "EPC C, fabric first, no solid floor, ashp 3.0",
863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
862: "EPC B, No solid floor, ASHP COP 3.0",
861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
859: "EPC C, no solid floor, ashp 3.0",
885: "EPC B, fabric first, no solid floor, ashp 3.0"
904: "EPC C - no solid floor, ashp 3.0",
905: "EPC B - no solid floor, ashp 3.0",
}
@ -88,7 +80,8 @@ def get_data(portfolio_id, scenario_ids):
Plan.id == PlanRecommendations.plan_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True)
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
recommendations_data = [
@ -220,9 +213,7 @@ for scenario_id in SCENARIOS:
df = properties_df[
[
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
"heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
"heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
@ -240,7 +231,7 @@ for scenario_id in SCENARIOS:
# Create excel to store to
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
f"Project/{scenario_names[scenario_id]}.xlsx")
f"Project/Final SAL/{scenario_names[scenario_id]} - corrected.xlsx")
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)