From 74ce1627ec934c1f023c4e025bf0a89f99ac7f82 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 17 Jan 2026 13:00:00 +0000 Subject: [PATCH] peabody specific --- .../k_deck_stats.py | 63 +++++++++-- .../n_fixing_already_installed_bug.py | 3 +- .../o_rerunning_iwi_jobs.py | 41 +++++++ sfr/principal_pitch/2_export_data.py | 101 ++++++++++++++++-- 4 files changed, 193 insertions(+), 15 deletions(-) create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index cd7fba63..b6fc0f8f 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -114,14 +114,16 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from collections import defaultdict -PORTFOLIO_ID = 434 # Peabody +PORTFOLIO_ID = 435 # Peabody SCENARIOS = [ - 904, - 905 + 908, + 909, + 910, ] scenario_names = { - 904: "EPC C - no solid floor, ashp 3.0", - 905: "EPC B - no solid floor, ashp 3.0", + 908: "EPC C - no solid floor, ashp 3.0", + 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", + 910: "EPC B - no solid floor, no EWI, ashp 3.0" } @@ -232,9 +234,58 @@ properties_data, plans_data, recommendations_data = get_data( recommendations_df = pd.DataFrame(recommendations_data) properties_df = pd.DataFrame(properties_data) +plans_df = pd.DataFrame(plans_data) -solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] +s_id = 910 +ps_w_a_plan = plans_df[plans_df["scenario_id"] == s_id].copy() +# Take the newest by scenario id +ps_w_a_plan = ps_w_a_plan.sort_values("created_at", ascending=False).drop_duplicates( + subset=["property_id"] +) +z = ps_w_a_plan[ + ps_w_a_plan["cost_of_works"] > 0 + ].copy() +z2 = properties_df[properties_df["property_id"].isin(z["property_id"].values)] +# '', 'hot_water_cost_current', +# 'lighting_cost_current', 'appliances_cost_current', +# 'gas_standing_charge', 'electricity_standing_charge' +z2["total_bills"] = z2["heating_cost_current"] + z2["hot_water_cost_current"] + z2["lighting_cost_current"] + z2[ + "appliances_cost_current" +] + z2["gas_standing_charge"] + z2["electricity_standing_charge"] + +from tqdm import tqdm + +# For a property ID, find a property where the no EWI/IWI approach is more expensive than the EWI approach +pids = properties_df["property_id"].unique() +for pid in tqdm(pids): + + if pid in [603272, 550550, 574493]: + continue + + # get the plans + property_plan = plans_df[plans_df["property_id"] == int(pid)] + # Take the newest plan by scenario id + property_plan = property_plan.sort_values("created_at", ascending=False).drop_duplicates( + subset=["scenario_id"] + ) + a = property_plan[property_plan["scenario_id"] == 909].squeeze() # no EWI/IWI + b = property_plan[property_plan["scenario_id"] == 908].squeeze() # EWI + if (a["cost_of_works"] > b["cost_of_works"]) and ( + a["post_epc_rating"].value == "C") and (b["cost_of_works"] > 5000): + bah + +solar_pv_recommendations = recommendations_df[ + recommendations_df["measure_type"] == "solar_pv" + ] + +solid_wall_recommendation = recommendations_df[ + recommendations_df["scenario_id"].isin([908]) & + recommendations_df["measure_type"].isin(["internal_wall_insulation"]) & + recommendations_df["default"] + ] average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() +# Add on scenarion names +average_savings["scenario_name"] = average_savings["scenario_id"].map(scenario_names) # Check tenures initial_asset_data = pd.read_excel( diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py index 4bd11a1b..d07f1ac1 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py @@ -11,7 +11,6 @@ from etl.customers.cambridge.surveys import current_epc with db_session() as session: # We need installed measures, where the measure type is ewi or iwi installed_measures = session.query(InstalledMeasure).filter( - InstalledMeasure.measure_type.in_(["cavity_wall_insulation"]) ).all() # Get the uprns installed_uprns = [x.uprn for x in installed_measures] @@ -32,7 +31,7 @@ needing_retry = sal[sal["epc_os_uprn"].isin(installed_uprns)] # Store needing_retry.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " - "SAL/properties_needing_retry_20260115 - cavity wall insulation.xlsx", + "SAL/properties_needing_retry_20260115 - all already installed.xlsx", sheet_name="Standardised Asset List", index=False ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py b/etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py new file mode 100644 index 00000000..39eccb0b --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/o_rerunning_iwi_jobs.py @@ -0,0 +1,41 @@ +# get all properties that have an IWI recommendation +import pandas as pd + +r1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no " + "solid floor, no EWI, ashp 3.0 - 20250113 final.xlsx" +) + +r2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - 20250113 final.xlsx" +) + +r3 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +) + +s1 = r1[~pd.isnull(r1["internal_wall_insulation"])] +s2 = r2[~pd.isnull(r2["internal_wall_insulation"])] + +# Combined uprns +uprns = s1["uprn"].tolist() + s2["uprn"].tolist() +uprns = list(set(uprns)) + +# Create SAL of these uprns +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - " + "final asset list.xlsx", + sheet_name="Standardised Asset List" +) + +needing_retry = sal[sal["epc_os_uprn"].isin(uprns)] + +# Store +needing_retry.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/properties_needing_retry_20260115 - internal wall insulation.xlsx", + sheet_name="Standardised Asset List", + index=False +) diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index f12eb85d..2184d074 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -11,6 +11,7 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial from backend.app.db.functions.materials_functions import get_materials from collections import defaultdict +from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] @@ -57,9 +58,44 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + latest_plans_subq = ( + session.query( + Plan.scenario_id, + Plan.property_id, + func.max(Plan.created_at).label("latest_created_at") + ) + .filter(Plan.scenario_id.in_(scenario_ids)) + .group_by( + Plan.scenario_id, + Plan.property_id + ) + .subquery() + ) + + # plans_query = session.query(Plan).filter( + # Plan.scenario_id.in_(scenario_ids) + # ).all() + + plans_query = ( + session.query(Plan) + .join( + latest_plans_subq, + (Plan.scenario_id == latest_plans_subq.c.scenario_id) & + (Plan.property_id == latest_plans_subq.c.property_id) & + (Plan.created_at == latest_plans_subq.c.latest_created_at) + ) + .all() + ) + + # plans_query = ( + # session.query(Plan) + # .join( + # latest_plans_subq, + # (Plan.scenario_id == latest_plans_subq.c.scenario_id) & + # (Plan.created_at == latest_plans_subq.c.latest_created_at) + # ) + # .all() + # ) plans_data = [ {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} @@ -73,7 +109,8 @@ def get_data(portfolio_id, scenario_ids): # -------------------- recommendations_query = session.query( Recommendation, - Plan.scenario_id + Plan.scenario_id, + PlanRecommendations.plan_id ).join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id @@ -216,6 +253,7 @@ for scenario_id in SCENARIOS: [ "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", + "id" ] ].merge( recommendations_measures_pivot, how="left", on="property_id" @@ -223,17 +261,42 @@ for scenario_id in SCENARIOS: post_install_sap, how="left", on="property_id" ) - df = df.drop(columns=["property_id"]) + # df = df.drop(columns=["property_id"]) df["sap_points"] = df["sap_points"].fillna(0) df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] - df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round() + df["predicted_post_works_sap"] = df["predicted_post_works_sap"] df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) df["uprn"] = df["uprn"].astype(str) + relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id] + df2 = df.merge( + relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id", + suffixes=("", "_plan") + ) + print(df2["predicted_post_works_epc"].value_counts()) + print(df2["post_epc_rating"].value_counts()) + + z = df2[ + (df2["predicted_post_works_epc"] != "D") & + (df2["post_epc_rating"].astype(str) == "Epc.D") + ] + + df2["predicted_post_works_epc"].value_counts() + df2["post_epc_rating"].astype(str).value_counts() + + df2[df2["total_retrofit_cost"] > 0].shape + + getting_works = df[df["total_retrofit_cost"] > 0] + getting_works["predicted_post_works_epc"].value_counts() + + 32565 / getting_works.shape[0] + + df[df["predicted_post_works_sap"] == ""] + # Create excel to store to filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx") + f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx") with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) @@ -388,3 +451,27 @@ asset_list.to_excel( condition_cost_comparison = asset_list[ ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"] ] + +# Testing +plans_df.head() + +example = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx" +) + +plans_df2 = plans_df.merge( + properties_df[["property_id", "landlord_property_id"]], + left_on="property_id", + right_on="property_id", + how="left" +) + +plans_df2 = plans_df2[plans_df2["scenario_id"] == 909] + +dupes = plans_df2[plans_df2["property_id"].duplicated()] + +# merge on plans +example = example.merge( + plans_df, how="left", +)