From b8dbf1531ebee6fb9b8b6e1366826c4bf10b3e6e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Feb 2024 10:58:19 +0000
Subject: [PATCH] completed Urban Splash portfolio

---
 backend/app/plan/router.py               |   2 +
 backend/ml_models/Valuation.py           |   5 +
 etl/customers/slide_utils.py             |  33 +++-
 etl/customers/urban_splash/asset_list.py |  38 +++-
 etl/customers/urban_splash/slides.py     | 233 ++++++++++++++++++-----
 5 files changed, 255 insertions(+), 56 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a4ba3e08..0b98cf2c 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -207,6 +207,8 @@ async def trigger_plan(body: PlanTriggerRequest):
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
         #       possibility with heating system
+        # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
+        #      cylinder jacket), we should add these to the recommendations as default
         logger.info("Optimising recommendations")
         for property_id in recommendations.keys():
 
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index dd5322e3..2bb7de32 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -47,6 +47,11 @@ class PropertyValuation:
         10070056816: 101_000,
         10094183498: 101_000,
         10070056840: 673_000,
+        10070056848: 76_000,
+        10070056849: 76_000,
+        10070056829: 76_000,
+        10070056920: 76_000,
+        10023345463: 76_000,
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py
index e215af80..d1efce47 100644
--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@@ -1,4 +1,3 @@
-import os
 from pptx.enum.text import PP_ALIGN  # NOQA
 from pptx import Presentation
 from pptx.util import Inches, Pt
@@ -9,6 +8,7 @@ from backend.app.db.utils import row2dict
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from backend.app.db.models.recommendations import Recommendation
 from backend.app.db.models.recommendations import Plan
+from backend.app.utils import sap_to_epc
 
 EPC_COLOURS = {
     "A": "#028051",
@@ -244,3 +244,34 @@ def create_powerpoint(data, save_location):
 
     # Save the presentation
     prs.save(save_location)
+
+
+def create_recommendations_summary(recommendations_df, properties_df, sap_target):
+    # Aggregate the impact of the recommendations
+    # We want:
+    # Total number of sap points
+    # total valuation impact
+    # total bill savings
+    # total cost
+    # Total Co2 impact
+    recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
+        total_sap_points=("sap_points", "sum"),
+        total_valuation_impact=("property_valuation_increase", "sum"),
+        total_bill_savings=("energy_cost_savings", "sum"),
+        total_cost=("estimated_cost", "sum"),
+        total_carbon=("co2_equivalent_savings", "sum")
+    ).reset_index()
+    # Merge on current sap points
+    recommendations_summary = recommendations_summary.merge(
+        properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
+        how="left"
+    )
+    recommendations_summary["expected_sap_points"] = (
+        recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
+    )
+    recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
+        lambda x: sap_to_epc(x)
+    )
+    recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
+
+    return recommendations_summary
diff --git a/etl/customers/urban_splash/asset_list.py b/etl/customers/urban_splash/asset_list.py
index 96aad007..751ac081 100644
--- a/etl/customers/urban_splash/asset_list.py
+++ b/etl/customers/urban_splash/asset_list.py
@@ -14,7 +14,15 @@ load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 
 USER_ID = 8
-PORTFOLIO_ID = 65
+PORTFOLIO_ID = 66
+SECOND_SCENARIO_PORTFOLIO_ID = 65
+
+# We also create a second portfolio for a subset of properties that do not meet the install requirements
+# We drop these uprns from the first plan
+second_portfolio_uprns = [
+    10070056840, 10070056846, 10070056847, 10070056843, 10070056848, 10070056844, 10070056849,
+    10070056829, 10070056920, 10023345463
+]
 
 
 def app():
@@ -99,7 +107,6 @@ def app():
         epc_data.append(newest_epc)
 
     processed_asset_list_df = pd.DataFrame(processed_asset_list)
-    processed_asset_list_df.to_excel("urban_splash_processed_asset_list.xlsx")
 
     epc_data_df = pd.DataFrame(epc_data)
 
@@ -107,7 +114,9 @@ def app():
     # Store the data in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
     save_csv_to_s3(
-        dataframe=processed_asset_list_df,
+        dataframe=processed_asset_list_df[
+            ~processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
+        ],
         bucket_name="retrofit-plan-inputs-dev",
         file_name=filename
     )
@@ -118,7 +127,28 @@ def app():
         "goal": "Increase EPC",
         "goal_value": "C",
         "trigger_file_path": filename,
-        "budget": 5000,
+        "budget": None,
+    }
+    print(body)
+
+    subset = processed_asset_list_df[
+        processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
+    ]
+
+    filename2 = f"{USER_ID}/{SECOND_SCENARIO_PORTFOLIO_ID}/test_inputs.csv"
+    save_csv_to_s3(
+        dataframe=subset,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename2
+    )
+
+    body = {
+        "portfolio_id": str(SECOND_SCENARIO_PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "budget": None,
     }
     print(body)
 
diff --git a/etl/customers/urban_splash/slides.py b/etl/customers/urban_splash/slides.py
index 19e1d579..e275167e 100644
--- a/etl/customers/urban_splash/slides.py
+++ b/etl/customers/urban_splash/slides.py
@@ -6,17 +6,18 @@ a environment akin to the backend to run this script
 import pandas as pd
 import numpy as np
 from backend.app.db.connection import db_engine
-from backend.app.utils import sap_to_epc
 from sqlalchemy.orm import sessionmaker
 from etl.customers.slide_utils import (
     plot_epc_distribution,
     get_property_details_by_portfolio_id,
     get_plan_by_portfolio_id,
     get_properties_with_default_recommendations,
-    create_powerpoint
+    create_powerpoint,
+    create_recommendations_summary
 )
 
 PORTFOLIO_ID = 66
+SECOND_SCENARIO_PORTFOLIO_ID = 65
 EPC_TARGET = "C"
 SAP_TARGET = 69
 CUSTOMER_KEY = "urban_splash"
@@ -37,6 +38,11 @@ def app():
     # We now pull the data for the property details
     property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
     property_details_df = pd.DataFrame(property_details)
+    # Merge on uprn
+    property_details_df = property_details_df.merge(
+        properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
 
     plans = get_plan_by_portfolio_id(session, PORTFOLIO_ID)
     plans_df = pd.DataFrame(plans)
@@ -44,34 +50,42 @@ def app():
     # Unnest the recommendations. Each recommendation is a list of dictionaries
     recommendations_exploded = properties_df["recommendations"].explode().tolist()
     recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
-    # Aggregate the impact of the recommendations
-    # We want:
-    # Total number of sap points
-    # total valuation impact
-    # total bill savings
-    # total cost
-    # Total Co2 impact
-    recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
-        total_sap_points=("sap_points", "sum"),
-        total_valuation_impact=("property_valuation_increase", "sum"),
-        total_bill_savings=("energy_cost_savings", "sum"),
-        total_cost=("estimated_cost", "sum"),
-        total_carbon=("co2_equivalent_savings", "sum")
-    ).reset_index()
-    # Merge on current sap points
-    recommendations_summary = recommendations_summary.merge(
-        properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
-        how="left"
-    )
-    recommendations_summary["expected_sap_points"] = (
-        recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
-    )
-    recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
-        lambda x: sap_to_epc(x)
-    )
-    recommendations_summary["sap_difference"] = SAP_TARGET - recommendations_summary["expected_sap_points"]
 
-    epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count")
+    recommendations_summary = create_recommendations_summary(recommendations_df, properties_df, SAP_TARGET)
+
+    # Get the data for the second scenario portfolio
+    properties_second_scenario = get_properties_with_default_recommendations(session, SECOND_SCENARIO_PORTFOLIO_ID)
+    properties_second_scenario_df = pd.DataFrame(properties_second_scenario)
+
+    propert_details_second_scenario = get_property_details_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
+    property_details_second_scenario_df = pd.DataFrame(propert_details_second_scenario)
+    # Merge on uprn
+    property_details_second_scenario_df = property_details_second_scenario_df.merge(
+        properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+        on="property_id"
+    )
+
+    plans_second_scenario = get_plan_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
+    plans_second_scenario_df = pd.DataFrame(plans_second_scenario)
+    # Merge on uprn so we can compare properties across portfolios
+    plans_second_scenario_df = plans_second_scenario_df.merge(
+        properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}), on="property_id"
+    )
+
+    recommendations_exploded_second_scenario = properties_second_scenario_df["recommendations"].explode().tolist()
+    recommendations_second_scenario_df = pd.DataFrame(
+        [r for r in recommendations_exploded_second_scenario if not pd.isnull(r)]
+    )
+
+    recommendations_summary_second_scenario = create_recommendations_summary(
+        recommendations_second_scenario_df, properties_second_scenario_df, SAP_TARGET
+    )
+
+    # Combine the data for both scenarios
+    full_property_details = pd.concat([property_details_df, property_details_second_scenario_df])
+    full_properties = pd.concat([properties_df, properties_second_scenario_df])
+
+    epc_rating_summary = full_properties.groupby("current_epc_rating").size().reset_index(name="count")
     epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
 
     ########################################################################
@@ -87,24 +101,35 @@ def app():
     )
 
     # floor area - upper and lower bounds
+
+    # Take just properties that are below EPC C
+    properties_needing_work = full_properties[
+        full_properties["current_sap_points"] < SAP_TARGET
+        ]
+    property_details_needing_work = full_property_details[
+        full_property_details["uprn"].isin(properties_needing_work["uprn"])
+    ]
+
     min_area, max_area, average_area = (
-        property_details_df["total_floor_area"].min(),
-        property_details_df["total_floor_area"].max(),
-        property_details_df["total_floor_area"].mean()
+        full_property_details["total_floor_area"].min(),
+        full_property_details["total_floor_area"].max(),
+        full_property_details["total_floor_area"].mean()
     )
 
     # Annual energy consumption - upper and lower bounds
-    min_energy_consumption, max_energy_consumption, average_consumption = (
-        property_details_df["adjusted_energy_consumption"].min(),
-        property_details_df["adjusted_energy_consumption"].max(),
-        property_details_df["adjusted_energy_consumption"].mean()
+    min_energy_consumption, max_energy_consumption, average_consumption, total_consumption = (
+        property_details_needing_work["adjusted_energy_consumption"].min(),
+        property_details_needing_work["adjusted_energy_consumption"].max(),
+        property_details_needing_work["adjusted_energy_consumption"].mean(),
+        property_details_needing_work["adjusted_energy_consumption"].sum()
     )
 
     # Co2 emissions - upper and lower bounds
-    min_co2, max_co2, average_co2 = (
-        property_details_df["co2_emissions"].min(),
-        property_details_df["co2_emissions"].max(),
-        property_details_df["co2_emissions"].mean()
+    min_co2, max_co2, average_co2, total_co2 = (
+        property_details_needing_work["co2_emissions"].min(),
+        property_details_needing_work["co2_emissions"].max(),
+        property_details_needing_work["co2_emissions"].mean(),
+        property_details_needing_work["co2_emissions"].sum()
     )
 
     # Valuation: upper and lower bounds and average - take positive values in case we have just a sample
@@ -142,6 +167,17 @@ def app():
 
     measures = "Electrical heating system upgrades & heating controls and Hot water system improvements"
 
+    # Costs
+    (
+        expected_cost_per_unit_lower,
+        expected_cost_per_unit_upper,
+        expected_project_cost,
+    ) = (
+        units_hitting_target["total_cost"].min(),
+        units_hitting_target["total_cost"].max(),
+        units_hitting_target["total_cost"].sum()
+    )
+
     # Per property
     # Take positive entries just in case we we have a sample
     valuation_impact_df = plans_df[plans_df["property_id"].isin(units_hitting_target["property_id"])]
@@ -169,6 +205,8 @@ def app():
 
     slide_2_commentary = (
         f"{n_units_to_target} units expected to achieve EPC {EPC_TARGET} \n"
+        f"Expected cost: {expected_cost_per_unit_lower} - {expected_cost_per_unit_upper}, total project: £"
+        f"{expected_project_cost}\n"
         f"Measures include: {measures}\n"
         f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £"
         f"{average_valuation_impact}\n"
@@ -181,25 +219,118 @@ def app():
     # Slide 3:
     ############
 
-    units_missed_target = recommendations_summary[
-        recommendations_summary["expected_epc_rating"] != EPC_TARGET
-        ]
+    units_missed_target = recommendations_summary_second_scenario.copy()
 
     n_units_missed_target = units_missed_target.shape[0]
 
     # How close were the properties that missed the target
     # We calculate the difference between the expected sap points and the lower bound sap points for the target
 
-    min_difference, max_difference, average_difference = (
-        np.ceil(units_missed_target["sap_difference"].min()),
-        np.ceil(units_missed_target["sap_difference"].max()),
-        np.ceil(units_missed_target["sap_difference"].mean())
+    # min_difference, max_difference, average_difference = (
+    #     np.ceil(units_missed_target["sap_difference"].min()),
+    #     np.ceil(units_missed_target["sap_difference"].max()),
+    #     np.ceil(units_missed_target["sap_difference"].mean())
+    # )
+
+    second_scenario_measures = ("Electrical heating system upgrades & heating controls, Hot water system improvements "
+                                "and internal wall insulation")
+
+    # Just take all of the units in the second scenario, since they're borderline
+    units_hitting_target_second_scenario = recommendations_summary_second_scenario[
+        # (recommendations_summary_second_scenario["expected_epc_rating"] == EPC_TARGET) &
+        (recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
+    ]
+
+    n_units_hitting_second_scenario = units_hitting_target_second_scenario[
+        units_hitting_target_second_scenario["expected_epc_rating"] == EPC_TARGET
+        ].shape[0]
+
+    # Impact on second scenario
+    # Costs
+    (
+        expected_cost_per_unit_lower_second_scenario,
+        expected_cost_per_unit_upper_second_scenario,
+        expected_project_cost_second_scenario,
+    ) = (
+        recommendations_summary_second_scenario["total_cost"].min(),
+        recommendations_summary_second_scenario["total_cost"].max(),
+        recommendations_summary_second_scenario["total_cost"].sum()
+    )
+
+    valuation_impact_df_second_scenario = plans_second_scenario_df[
+        plans_second_scenario_df["uprn"].isin(units_hitting_target_second_scenario["uprn"])
+    ]
+    valuation_impact_df_second_scenario = valuation_impact_df_second_scenario[
+        valuation_impact_df_second_scenario["valuation_increase_lower_bound"] > 0
+        ]
+    (
+        min_valuation_impact_second_scenario,
+        max_valuation_impact_second_scenario,
+        average_valuation_impact_second_scenario
+    ) = (
+        valuation_impact_df_second_scenario["valuation_increase_lower_bound"].median(),
+        valuation_impact_df_second_scenario["valuation_increase_upper_bound"].median(),
+        valuation_impact_df_second_scenario["valuation_increase_average"].median()
+    )
+
+    # Bill savings per property
+    min_bill_savings_second_scenario, max_bill_savings_second_scenario, average_bill_savings_second_scenario = (
+        units_hitting_target_second_scenario["total_bill_savings"].min(),
+        units_hitting_target_second_scenario["total_bill_savings"].max(),
+        units_hitting_target_second_scenario["total_bill_savings"].mean()
+    )
+
+    # Total CO2 reduction of portfolio
+    (
+        min_co2_reduction_second_scenario,
+        max_co2_reduction_second_scenario,
+        average_co2_reduction_second_scenario,
+        total_co2_reduction_second_scenario
+    ) = (
+        units_hitting_target_second_scenario["total_carbon"].min(),
+        units_hitting_target_second_scenario["total_carbon"].max(),
+        units_hitting_target_second_scenario["total_carbon"].mean(),
+        units_hitting_target_second_scenario["total_carbon"].sum()
+    )
+
+    # Values for the leftovers
+    units_missing_second_scenario = recommendations_summary_second_scenario[
+        (recommendations_summary_second_scenario["expected_epc_rating"] != EPC_TARGET) &
+        (recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
+        ]
+
+    min_difference_second_scenario, max_difference_second_scenario, average_difference_second_scenario = (
+        np.ceil(units_missing_second_scenario["sap_difference"].min()),
+        np.ceil(units_missing_second_scenario["sap_difference"].max()),
+        np.ceil(units_missing_second_scenario["sap_difference"].mean())
     )
 
     slide_3_text = (
-        "Many of the properties upgrade considerably and may be able to achieve an EPC C with further measures, "
-        "however we could need a survey to confirm the heating and hot water systems to identify further "
-        "potential measures.")
+        f"{n_units_missed_target} units look like they would miss the EPC {EPC_TARGET} by {min_difference}-"
+        f"{max_difference} points \n"
+        "When on site, an assessor may be able to identify further improvements to bring the properties up to an EPC "
+        f"{EPC_TARGET}.\n"
+        f"We have looked at a more extensive package for these properties, including: {second_scenario_measures}\n"
+        f"Of the {n_units_missed_target} properties, a further {units_hitting_target_second_scenario.shape[0]} are "
+        f"expected to achieve EPC {EPC_TARGET} with these measures.\n"
+        f"Expected cost: {expected_cost_per_unit_lower_second_scenario} - "
+        f"{expected_cost_per_unit_upper_second_scenario}, "
+        f"total project: £"
+        f"{expected_project_cost_second_scenario}\n"
+        f"Valuation increase per property: £{min_valuation_impact_second_scenario}-"
+        f"{max_valuation_impact_second_scenario}, average: £"
+        f"{average_valuation_impact_second_scenario}\n"
+        f"Bill savings per property: £{min_bill_savings_second_scenario}-{max_bill_savings_second_scenario}, "
+        f"average: £{average_bill_savings_second_scenario}\n"
+        f"Total CO2 reduction: {min_co2_reduction_second_scenario}-{max_co2_reduction_second_scenario} tonnes, "
+        f"average: "
+        f"{average_co2_reduction_second_scenario}\n"
+        f"tonnes, total for the {n_units_hitting_second_scenario} properties: {total_co2_reduction_second_scenario} "
+        f"tonnes\n"
+        f"Even in the second scenario, the remaining {units_missing_second_scenario.shape[0]} properties are expected "
+        f"to miss EPC {EPC_TARGET} by {min_difference_second_scenario} point on average - they should be visited by "
+        f"an assessor"
+    )
 
     slide_data = {
         'slide_1': {
@@ -217,5 +348,5 @@ def app():
         }
     }
 
-    save_location = f"etl/customers/{CUSTOMER_KEY}/powerpoint.pptx"
+    save_location = f"etl/customers/{CUSTOMER_KEY}/{CUSTOMER_KEY}_tech_slides.pptx"
     create_powerpoint(slide_data, save_location)