From b8dbf1531ebee6fb9b8b6e1366826c4bf10b3e6e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Feb 2024 10:58:19 +0000 Subject: [PATCH] completed Urban Splash portfolio --- backend/app/plan/router.py | 2 + backend/ml_models/Valuation.py | 5 + etl/customers/slide_utils.py | 33 +++- etl/customers/urban_splash/asset_list.py | 38 +++- etl/customers/urban_splash/slides.py | 233 ++++++++++++++++++----- 5 files changed, 255 insertions(+), 56 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index a4ba3e08..0b98cf2c 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -207,6 +207,8 @@ async def trigger_plan(body: PlanTriggerRequest): # Insert the predictions into the recommendations and run the optimiser # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a # possibility with heating system + # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water + # cylinder jacket), we should add these to the recommendations as default logger.info("Optimising recommendations") for property_id in recommendations.keys(): diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index dd5322e3..2bb7de32 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -47,6 +47,11 @@ class PropertyValuation: 10070056816: 101_000, 10094183498: 101_000, 10070056840: 673_000, + 10070056848: 76_000, + 10070056849: 76_000, + 10070056829: 76_000, + 10070056920: 76_000, + 10023345463: 76_000, } # We base our valuation uplifts on a number of sources diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index e215af80..d1efce47 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -1,4 +1,3 @@ -import os from pptx.enum.text import PP_ALIGN # NOQA from pptx import Presentation from pptx.util import Inches, Pt @@ -9,6 +8,7 @@ from backend.app.db.utils import row2dict from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Recommendation from backend.app.db.models.recommendations import Plan +from backend.app.utils import sap_to_epc EPC_COLOURS = { "A": "#028051", @@ -244,3 +244,34 @@ def create_powerpoint(data, save_location): # Save the presentation prs.save(save_location) + + +def create_recommendations_summary(recommendations_df, properties_df, sap_target): + # Aggregate the impact of the recommendations + # We want: + # Total number of sap points + # total valuation impact + # total bill savings + # total cost + # Total Co2 impact + recommendations_summary = recommendations_df.groupby(["property_id"]).agg( + total_sap_points=("sap_points", "sum"), + total_valuation_impact=("property_valuation_increase", "sum"), + total_bill_savings=("energy_cost_savings", "sum"), + total_cost=("estimated_cost", "sum"), + total_carbon=("co2_equivalent_savings", "sum") + ).reset_index() + # Merge on current sap points + recommendations_summary = recommendations_summary.merge( + properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", + how="left" + ) + recommendations_summary["expected_sap_points"] = ( + recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] + ) + recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply( + lambda x: sap_to_epc(x) + ) + recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"] + + return recommendations_summary diff --git a/etl/customers/urban_splash/asset_list.py b/etl/customers/urban_splash/asset_list.py index 96aad007..751ac081 100644 --- a/etl/customers/urban_splash/asset_list.py +++ b/etl/customers/urban_splash/asset_list.py @@ -14,7 +14,15 @@ load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") USER_ID = 8 -PORTFOLIO_ID = 65 +PORTFOLIO_ID = 66 +SECOND_SCENARIO_PORTFOLIO_ID = 65 + +# We also create a second portfolio for a subset of properties that do not meet the install requirements +# We drop these uprns from the first plan +second_portfolio_uprns = [ + 10070056840, 10070056846, 10070056847, 10070056843, 10070056848, 10070056844, 10070056849, + 10070056829, 10070056920, 10023345463 +] def app(): @@ -99,7 +107,6 @@ def app(): epc_data.append(newest_epc) processed_asset_list_df = pd.DataFrame(processed_asset_list) - processed_asset_list_df.to_excel("urban_splash_processed_asset_list.xlsx") epc_data_df = pd.DataFrame(epc_data) @@ -107,7 +114,9 @@ def app(): # Store the data in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv" save_csv_to_s3( - dataframe=processed_asset_list_df, + dataframe=processed_asset_list_df[ + ~processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns) + ], bucket_name="retrofit-plan-inputs-dev", file_name=filename ) @@ -118,7 +127,28 @@ def app(): "goal": "Increase EPC", "goal_value": "C", "trigger_file_path": filename, - "budget": 5000, + "budget": None, + } + print(body) + + subset = processed_asset_list_df[ + processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns) + ] + + filename2 = f"{USER_ID}/{SECOND_SCENARIO_PORTFOLIO_ID}/test_inputs.csv" + save_csv_to_s3( + dataframe=subset, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename2 + ) + + body = { + "portfolio_id": str(SECOND_SCENARIO_PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "C", + "trigger_file_path": filename, + "budget": None, } print(body) diff --git a/etl/customers/urban_splash/slides.py b/etl/customers/urban_splash/slides.py index 19e1d579..e275167e 100644 --- a/etl/customers/urban_splash/slides.py +++ b/etl/customers/urban_splash/slides.py @@ -6,17 +6,18 @@ a environment akin to the backend to run this script import pandas as pd import numpy as np from backend.app.db.connection import db_engine -from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from etl.customers.slide_utils import ( plot_epc_distribution, get_property_details_by_portfolio_id, get_plan_by_portfolio_id, get_properties_with_default_recommendations, - create_powerpoint + create_powerpoint, + create_recommendations_summary ) PORTFOLIO_ID = 66 +SECOND_SCENARIO_PORTFOLIO_ID = 65 EPC_TARGET = "C" SAP_TARGET = 69 CUSTOMER_KEY = "urban_splash" @@ -37,6 +38,11 @@ def app(): # We now pull the data for the property details property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID) property_details_df = pd.DataFrame(property_details) + # Merge on uprn + property_details_df = property_details_df.merge( + properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}), + on="property_id" + ) plans = get_plan_by_portfolio_id(session, PORTFOLIO_ID) plans_df = pd.DataFrame(plans) @@ -44,34 +50,42 @@ def app(): # Unnest the recommendations. Each recommendation is a list of dictionaries recommendations_exploded = properties_df["recommendations"].explode().tolist() recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)]) - # Aggregate the impact of the recommendations - # We want: - # Total number of sap points - # total valuation impact - # total bill savings - # total cost - # Total Co2 impact - recommendations_summary = recommendations_df.groupby(["property_id"]).agg( - total_sap_points=("sap_points", "sum"), - total_valuation_impact=("property_valuation_increase", "sum"), - total_bill_savings=("energy_cost_savings", "sum"), - total_cost=("estimated_cost", "sum"), - total_carbon=("co2_equivalent_savings", "sum") - ).reset_index() - # Merge on current sap points - recommendations_summary = recommendations_summary.merge( - properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", - how="left" - ) - recommendations_summary["expected_sap_points"] = ( - recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] - ) - recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply( - lambda x: sap_to_epc(x) - ) - recommendations_summary["sap_difference"] = SAP_TARGET - recommendations_summary["expected_sap_points"] - epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count") + recommendations_summary = create_recommendations_summary(recommendations_df, properties_df, SAP_TARGET) + + # Get the data for the second scenario portfolio + properties_second_scenario = get_properties_with_default_recommendations(session, SECOND_SCENARIO_PORTFOLIO_ID) + properties_second_scenario_df = pd.DataFrame(properties_second_scenario) + + propert_details_second_scenario = get_property_details_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID) + property_details_second_scenario_df = pd.DataFrame(propert_details_second_scenario) + # Merge on uprn + property_details_second_scenario_df = property_details_second_scenario_df.merge( + properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}), + on="property_id" + ) + + plans_second_scenario = get_plan_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID) + plans_second_scenario_df = pd.DataFrame(plans_second_scenario) + # Merge on uprn so we can compare properties across portfolios + plans_second_scenario_df = plans_second_scenario_df.merge( + properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}), on="property_id" + ) + + recommendations_exploded_second_scenario = properties_second_scenario_df["recommendations"].explode().tolist() + recommendations_second_scenario_df = pd.DataFrame( + [r for r in recommendations_exploded_second_scenario if not pd.isnull(r)] + ) + + recommendations_summary_second_scenario = create_recommendations_summary( + recommendations_second_scenario_df, properties_second_scenario_df, SAP_TARGET + ) + + # Combine the data for both scenarios + full_property_details = pd.concat([property_details_df, property_details_second_scenario_df]) + full_properties = pd.concat([properties_df, properties_second_scenario_df]) + + epc_rating_summary = full_properties.groupby("current_epc_rating").size().reset_index(name="count") epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100 ######################################################################## @@ -87,24 +101,35 @@ def app(): ) # floor area - upper and lower bounds + + # Take just properties that are below EPC C + properties_needing_work = full_properties[ + full_properties["current_sap_points"] < SAP_TARGET + ] + property_details_needing_work = full_property_details[ + full_property_details["uprn"].isin(properties_needing_work["uprn"]) + ] + min_area, max_area, average_area = ( - property_details_df["total_floor_area"].min(), - property_details_df["total_floor_area"].max(), - property_details_df["total_floor_area"].mean() + full_property_details["total_floor_area"].min(), + full_property_details["total_floor_area"].max(), + full_property_details["total_floor_area"].mean() ) # Annual energy consumption - upper and lower bounds - min_energy_consumption, max_energy_consumption, average_consumption = ( - property_details_df["adjusted_energy_consumption"].min(), - property_details_df["adjusted_energy_consumption"].max(), - property_details_df["adjusted_energy_consumption"].mean() + min_energy_consumption, max_energy_consumption, average_consumption, total_consumption = ( + property_details_needing_work["adjusted_energy_consumption"].min(), + property_details_needing_work["adjusted_energy_consumption"].max(), + property_details_needing_work["adjusted_energy_consumption"].mean(), + property_details_needing_work["adjusted_energy_consumption"].sum() ) # Co2 emissions - upper and lower bounds - min_co2, max_co2, average_co2 = ( - property_details_df["co2_emissions"].min(), - property_details_df["co2_emissions"].max(), - property_details_df["co2_emissions"].mean() + min_co2, max_co2, average_co2, total_co2 = ( + property_details_needing_work["co2_emissions"].min(), + property_details_needing_work["co2_emissions"].max(), + property_details_needing_work["co2_emissions"].mean(), + property_details_needing_work["co2_emissions"].sum() ) # Valuation: upper and lower bounds and average - take positive values in case we have just a sample @@ -142,6 +167,17 @@ def app(): measures = "Electrical heating system upgrades & heating controls and Hot water system improvements" + # Costs + ( + expected_cost_per_unit_lower, + expected_cost_per_unit_upper, + expected_project_cost, + ) = ( + units_hitting_target["total_cost"].min(), + units_hitting_target["total_cost"].max(), + units_hitting_target["total_cost"].sum() + ) + # Per property # Take positive entries just in case we we have a sample valuation_impact_df = plans_df[plans_df["property_id"].isin(units_hitting_target["property_id"])] @@ -169,6 +205,8 @@ def app(): slide_2_commentary = ( f"{n_units_to_target} units expected to achieve EPC {EPC_TARGET} \n" + f"Expected cost: {expected_cost_per_unit_lower} - {expected_cost_per_unit_upper}, total project: £" + f"{expected_project_cost}\n" f"Measures include: {measures}\n" f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £" f"{average_valuation_impact}\n" @@ -181,25 +219,118 @@ def app(): # Slide 3: ############ - units_missed_target = recommendations_summary[ - recommendations_summary["expected_epc_rating"] != EPC_TARGET - ] + units_missed_target = recommendations_summary_second_scenario.copy() n_units_missed_target = units_missed_target.shape[0] # How close were the properties that missed the target # We calculate the difference between the expected sap points and the lower bound sap points for the target - min_difference, max_difference, average_difference = ( - np.ceil(units_missed_target["sap_difference"].min()), - np.ceil(units_missed_target["sap_difference"].max()), - np.ceil(units_missed_target["sap_difference"].mean()) + # min_difference, max_difference, average_difference = ( + # np.ceil(units_missed_target["sap_difference"].min()), + # np.ceil(units_missed_target["sap_difference"].max()), + # np.ceil(units_missed_target["sap_difference"].mean()) + # ) + + second_scenario_measures = ("Electrical heating system upgrades & heating controls, Hot water system improvements " + "and internal wall insulation") + + # Just take all of the units in the second scenario, since they're borderline + units_hitting_target_second_scenario = recommendations_summary_second_scenario[ + # (recommendations_summary_second_scenario["expected_epc_rating"] == EPC_TARGET) & + (recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values)) + ] + + n_units_hitting_second_scenario = units_hitting_target_second_scenario[ + units_hitting_target_second_scenario["expected_epc_rating"] == EPC_TARGET + ].shape[0] + + # Impact on second scenario + # Costs + ( + expected_cost_per_unit_lower_second_scenario, + expected_cost_per_unit_upper_second_scenario, + expected_project_cost_second_scenario, + ) = ( + recommendations_summary_second_scenario["total_cost"].min(), + recommendations_summary_second_scenario["total_cost"].max(), + recommendations_summary_second_scenario["total_cost"].sum() + ) + + valuation_impact_df_second_scenario = plans_second_scenario_df[ + plans_second_scenario_df["uprn"].isin(units_hitting_target_second_scenario["uprn"]) + ] + valuation_impact_df_second_scenario = valuation_impact_df_second_scenario[ + valuation_impact_df_second_scenario["valuation_increase_lower_bound"] > 0 + ] + ( + min_valuation_impact_second_scenario, + max_valuation_impact_second_scenario, + average_valuation_impact_second_scenario + ) = ( + valuation_impact_df_second_scenario["valuation_increase_lower_bound"].median(), + valuation_impact_df_second_scenario["valuation_increase_upper_bound"].median(), + valuation_impact_df_second_scenario["valuation_increase_average"].median() + ) + + # Bill savings per property + min_bill_savings_second_scenario, max_bill_savings_second_scenario, average_bill_savings_second_scenario = ( + units_hitting_target_second_scenario["total_bill_savings"].min(), + units_hitting_target_second_scenario["total_bill_savings"].max(), + units_hitting_target_second_scenario["total_bill_savings"].mean() + ) + + # Total CO2 reduction of portfolio + ( + min_co2_reduction_second_scenario, + max_co2_reduction_second_scenario, + average_co2_reduction_second_scenario, + total_co2_reduction_second_scenario + ) = ( + units_hitting_target_second_scenario["total_carbon"].min(), + units_hitting_target_second_scenario["total_carbon"].max(), + units_hitting_target_second_scenario["total_carbon"].mean(), + units_hitting_target_second_scenario["total_carbon"].sum() + ) + + # Values for the leftovers + units_missing_second_scenario = recommendations_summary_second_scenario[ + (recommendations_summary_second_scenario["expected_epc_rating"] != EPC_TARGET) & + (recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values)) + ] + + min_difference_second_scenario, max_difference_second_scenario, average_difference_second_scenario = ( + np.ceil(units_missing_second_scenario["sap_difference"].min()), + np.ceil(units_missing_second_scenario["sap_difference"].max()), + np.ceil(units_missing_second_scenario["sap_difference"].mean()) ) slide_3_text = ( - "Many of the properties upgrade considerably and may be able to achieve an EPC C with further measures, " - "however we could need a survey to confirm the heating and hot water systems to identify further " - "potential measures.") + f"{n_units_missed_target} units look like they would miss the EPC {EPC_TARGET} by {min_difference}-" + f"{max_difference} points \n" + "When on site, an assessor may be able to identify further improvements to bring the properties up to an EPC " + f"{EPC_TARGET}.\n" + f"We have looked at a more extensive package for these properties, including: {second_scenario_measures}\n" + f"Of the {n_units_missed_target} properties, a further {units_hitting_target_second_scenario.shape[0]} are " + f"expected to achieve EPC {EPC_TARGET} with these measures.\n" + f"Expected cost: {expected_cost_per_unit_lower_second_scenario} - " + f"{expected_cost_per_unit_upper_second_scenario}, " + f"total project: £" + f"{expected_project_cost_second_scenario}\n" + f"Valuation increase per property: £{min_valuation_impact_second_scenario}-" + f"{max_valuation_impact_second_scenario}, average: £" + f"{average_valuation_impact_second_scenario}\n" + f"Bill savings per property: £{min_bill_savings_second_scenario}-{max_bill_savings_second_scenario}, " + f"average: £{average_bill_savings_second_scenario}\n" + f"Total CO2 reduction: {min_co2_reduction_second_scenario}-{max_co2_reduction_second_scenario} tonnes, " + f"average: " + f"{average_co2_reduction_second_scenario}\n" + f"tonnes, total for the {n_units_hitting_second_scenario} properties: {total_co2_reduction_second_scenario} " + f"tonnes\n" + f"Even in the second scenario, the remaining {units_missing_second_scenario.shape[0]} properties are expected " + f"to miss EPC {EPC_TARGET} by {min_difference_second_scenario} point on average - they should be visited by " + f"an assessor" + ) slide_data = { 'slide_1': { @@ -217,5 +348,5 @@ def app(): } } - save_location = f"etl/customers/{CUSTOMER_KEY}/powerpoint.pptx" + save_location = f"etl/customers/{CUSTOMER_KEY}/{CUSTOMER_KEY}_tech_slides.pptx" create_powerpoint(slide_data, save_location)