diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py index e6871678..67471813 100644 --- a/etl/customers/newhaven/newhaven_study.py +++ b/etl/customers/newhaven/newhaven_study.py @@ -54,6 +54,8 @@ def make_asset_list(): ) ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str) + ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze() + insulation_potential = pd.read_csv( f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv", low_memory=False, @@ -88,20 +90,20 @@ def make_asset_list(): columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"} ) - # had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])] - # below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape - # below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape - # had_an_epc["energy-efficiency-rating"].value_counts() - # asset_list["current-energy-rating"].value_counts() - # asset_list["co2-emissions-current"].mean() + had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])] + below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape + below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape + had_an_epc["energy-efficiency-rating"].value_counts() + asset_list["current-energy-rating"].value_counts() + asset_list["co2-emissions-current"].mean() # # Get the underlying data of a histograme - # import matplotlib.pyplot as plt - # n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7) + import matplotlib.pyplot as plt + n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7) # - # bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10 + bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10 # # # Step 3: Calculate the frequency of data in each bin - # hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins) + hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins) # Take properties below a B - there are 2844 units asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80] diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py index 3fe27452..2fe914e2 100644 --- a/etl/customers/newhaven/slides.py +++ b/etl/customers/newhaven/slides.py @@ -1,4 +1,6 @@ +from tqdm import tqdm import pandas as pd +import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario @@ -68,13 +70,101 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data +def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids): + # properties_starting_with_electric_heating = properties_df[ + # properties_df["mainfuel"].isin( + # ["Electricity not community", "Electricity electricity unspecified tariff"] + # ) + # ]["id"].tolist() + + # Get the recommendations for the scenario, default + scenario_comparison_df = [] + scenario_comparison_df_2 = [] + cost_per_kwh_saved_table = [] + for scenario_id in scenario_ids: + # Get the recommendations for the scenario, default + scenario_recommendations = recommendations_df[ + (recommendations_df["Scenario ID"] == scenario_id) & + (recommendations_df["default"] == True) + ].copy() + + scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply( + lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, + axis=1) + scenario_recommendations['solar_kwh'] = scenario_recommendations.apply( + lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + + # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used + scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply( + lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ + 'kwh_savings'], axis=1) + + # We need to determine if any of the properties start with electric heating or end with it + # property_electric_heating = [] + # for pid, recs in scenario_recommendations.groupby("property_id"): + # has_ashp = recs[recs["description"].str.contains("air source heat pump")] + # if not has_ashp.empty: + # property_electric_heating.append(pid) + # continue + # has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")] + # if not has_heating_rec.empty: + # property_electric_heating.append(pid) + # continue + + grouped_data = scenario_recommendations.groupby(['property_id']).agg({ + 'Estimated Kwh Savings': 'sum', + 'ligting_kwh': 'sum', + 'solar_kwh': 'sum', + "estimated_cost": "sum" + }).reset_index() + + comparison = properties_df.drop_duplicates().merge( + grouped_data, on=["property_id"], how="left" + ) + + comparison["Post Retrofit Heating & Hotwater kwh"] = ( + comparison["current_energy_demand_heating_hotwater"] - \ + comparison["Estimated Kwh Savings"] + ) + + avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() + + # We now, for properties that have a plan, do a before and after + with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])] + + avgs2 = with_savings[ + ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() + avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[ + "Post Retrofit Heating & Hotwater kwh"] + avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + + # We also calculate the cost per kwh saves + total_kwh_saved = ( + with_savings["Estimated Kwh Savings"].sum() + + with_savings["ligting_kwh"].sum() + + with_savings["solar_kwh"].sum() + ) + total_cost = with_savings["estimated_cost"].sum() + cost_per_kwh_saved = total_cost / total_kwh_saved + + scenario_comparison_df.append({"scenario_id": scenario_id, **avgs}) + scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2}) + cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved}) + + scenario_comparison_population = pd.DataFrame(scenario_comparison_df) + scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2) + cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table) + + return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table + + def slides(): # Prepares the information required for the slides # Right now this is the second version of the nehaven portfolio portfolio_id = 90 # Look at one scenario at a time, otherwise this is agony - scenario_ids = [47, 48, 49] + scenario_ids = [47, 48, 49, 50, 51] properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) @@ -85,114 +175,25 @@ def slides(): if properties_df.shape[0] != 2553: raise ValueError("The number of unique properties is not 2553") - def estimate_post_retrofit_heating_hotwater_kwh(recommendations_df, scenario_ids): - # Get the recommendations for the scenario, default - scenario_comparison_df = [] - scenario_comparison_df_2 = [] - for scenario_id in scenario_ids: - # Get the recommendations for the scenario, default - scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() - - scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['solar_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) - - if scenario_recommendations['solar_kwh'].sum() > 0: - blah - - # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) - - grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Kwh Savings': 'sum', - 'ligting_kwh': 'sum', - 'solar_kwh': 'sum' - }).reset_index() - - comparison = properties_df.drop_duplicates().merge( - grouped_data, on=["property_id"], how="left" - ) - - comparison["Post Retrofit Heating & Hotwater kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - \ - comparison["Estimated Kwh Savings"] - ) - - avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() - - # We now, for properties that have a plan, do a before and after - with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])] - - avgs2 = with_savings[ - ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() - avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[ - "Post Retrofit Heating & Hotwater kwh"] - avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] - - scenario_comparison_df.append({"scenario_id": scenario_id, **avgs}) - scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2}) - - scenario_comparison_df = pd.DataFrame(scenario_comparison_df) - scenario_comparison_df_2 = pd.DataFrame(scenario_comparison_df_2) - - return scenario_comparison_df, scenario_comparison_df_2 - - # TODO: How do we factor in solar PV - # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline? heating_hotwater_kwh = ( properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']] .mean() ) - # Q2: For each scenario, what is the £ per kwh reduction? - # Calculate total kwh savings - kwh_plan_impact = estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df) - - z = df[ - (df["Recommendation Default Status"] == True) & - (df["Plan Name"].isin(['Demand Reduction – cavity & roof insulation'])) - ] - z2 = z[z["Property ID"] == 25215] - # Find duplicated property ID, recommendationt type combos - z = z[z.duplicated(subset=["Property ID", "Recommendation Type"])] - - for plan_name in df["Plan Name"].unique(): - # Get default recs - default_recs = df[ - (df["Recommendation Default Status"] == True) & - (df["Plan Name"] == plan_name) - ].copy() - if default_recs["Recommendation ID"].duplicated().sum(): - raise Exception("somethign went wrong") - - default_recs["Recommendation Type"].unique() - - # We now calculate the total savings - total_savings = default_recs["Estimated Kwh Savings"].sum() - total_cost = default_recs["Recommendation Cost"].sum() - - kwh_savings = df[ - df["Recommendation Default Status"] == True - ].groupby("Plan Name")[["Estimated Kwh Savings", "Recommendation Cost"]].sum().rename( - columns={"Estimated Kwh Savings": "Total Kwh Savings", "Recommendation Cost": "Total Cost"} - ).reset_index() - - kwh_savings["Cost per Kwh Saved"] = kwh_savings["Total Cost"] / kwh_savings["Total Kwh Savings"] + # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire + # popoulation (incl those without retrofit) and for just those being retrofit + # We also calculat the cost per kwh saved + scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = ( + estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids) + ) # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit # We need to take recommndations that affect just the heating and hot water # By property - df["Type Mapped"] = df["Recommendation Type"].copy().replace( + recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace( { "loft_insulation": "roof_insulation", "room_roof_insulation": "roof_insulation", @@ -200,15 +201,217 @@ def slides(): "hot_water_tank_insulation": "other", "cylinder_thermostat": "other", "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", } ) + recommendations_df["type_mapped"] = np.where( + recommendations_df["description"].str.contains("air source heat pump"), + "air_source_heat_pump", + recommendations_df["type_mapped"] + ) + # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID' - recommendation_summary = df.groupby(['Plan Name', 'Type Mapped']).agg({ - 'Property ID': 'nunique' + recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby( + ['Scenario ID', 'type_mapped'] + ).agg({ + 'property_id': 'nunique' }).reset_index() - recommendation_summary.columns = ['Plan Name', 'Type Mapped', 'Number of Properties'] + recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties'] recommendation_summary["Percentage of Properties"] = 100 * ( - recommendation_summary["Number of Properties"] / df["Property ID"].nunique() + recommendation_summary["Number of Properties"] / properties_df["id"].nunique() ) + + recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])] + + # MVP implementation of funding estimation for the most basic scenario, using GBIS + + project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv") + + def find_abs(sap_movement, starting_sap, floor_area): + starting_band = find_band(starting_sap) + finishing_band = find_band(starting_sap + sap_movement) + if starting_band == finishing_band: + return 0 + + if floor_area <= 72: + floor_area_segment = '0-72' + elif (floor_area > 72) and (floor_area <= 97): + floor_area_segment = "73-97" + elif (floor_area > 97) and (floor_area <= 199): + floor_area_segment = "98-199" + else: + floor_area_segment = "200+" + + return project_scores_matrix[ + (project_scores_matrix["Floor Area Segment"] == floor_area_segment) & + (project_scores_matrix["Starting Band"] == starting_band) & + (project_scores_matrix["Finishing Band"] == finishing_band) + ].squeeze()["Cost Savings"] + + eco4_scores_sap_table = [ + {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0}, + {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0}, + {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5}, + {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5}, + {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25}, + {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75}, + {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75}, + {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25}, + {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25}, + {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75}, + {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75}, + {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25}, + {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25}, + {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75} + ] + eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table) + + def find_band(value): + # Iterate through each row in the DataFrame to find the correct band + value_floored = np.floor(value) + return eco4_scores_sap_table[ + (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored) + ].squeeze()["Band"] + + def identify_funding_measure(p, p_recs, is_social): + measures = ["cavity_wall_insulation", "loft_insulation"] + property_abs = [] + for m in measures: + funding_measure = p_recs[p_recs["type"] == m] + if not funding_measure.empty: + funding_measure = funding_measure.squeeze() + project_abs = find_abs( + sap_movement=funding_measure["sap_points"], + starting_sap=p["current_sap_points"], + floor_area=p["total_floor_area"] + ) + property_abs.append({ + "property_id": p["property_id"], + "measure": funding_measure["type"], + "cost": funding_measure["estimated_cost"], + "abs": project_abs, + "is_social": is_social + }) + + if not property_abs: + return None + + property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False) + property_abs = property_abs.head(1).to_dict(orient="records")[0] + return property_abs + + social_tenure = ["rental (social)", "Rented (social)"] + scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])] + + funding = [] + for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)): + p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]] + if p_recs.empty: + continue + + if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69): + f = identify_funding_measure(p, p_recs, True) + if f: + funding.append(f) + continue + + if p["current_sap_points"] < 69: + f = identify_funding_measure(p, p_recs, False) + if f: + funding.append(f) + continue + + funding = pd.DataFrame(funding) + conservative_abs = 20 + funding["expected_funding"] = funding["abs"] * conservative_abs + # We take rows where the expected funding is higher than the cost of the works + 10% + funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)] + + # From the owner of the properties, the funding that they see is just the cost of the works. The actual funding + # recieved will go to the installer + # We now look at the social funding + social_funding = funding[funding["is_social"]]["cost"].sum() + # For the private funding, we need to scale this to consider the fact that only a proportion of the properties + # will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants + # will meet the benefits criteria + private_funding = funding[~funding["is_social"]]["cost"].sum() + + # 51% of households are recipients of benefits in the South East, in the UK + # (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/) + + # We also need to deduce the % of properties in council tax bands A - D + # 2023 council tax bands: + # https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties + # -statistical-commentary + band_a_proportion = 0.239 + band_b_proportion = 0.195 + band_c_proportion = 0.219 + band_d_proportion = 0.156 + a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + + benefits_proportion = 0.51 + + # Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits, + # however we retain the regional average to be conservative + # We scale the private funding based on these two factors + private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion + + n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion) + + # Look at the impact of EWI for scenario + + ewi_jobs = recommendations_df[ + (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation") + ] + ewi_jobs["estimated_cost"].sum() + + has_cavity = recommendations_df[ + (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47) + ] + # Take the some properties in this + cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)] + + cavity_units[cavity_units.index == 3][["uprn", "property_id"]] + + z = recommendations_df[recommendations_df["property_id"] == 24525] + + # Recommenation type by kwh savings per unit + recommendations_final_scenario = recommendations_df[ + recommendations_df["Scenario ID"].isin([51]) & + (recommendations_df["default"] == True) + ].copy() + # Merge on floor area + recommendations_final_scenario = recommendations_final_scenario.merge( + properties_df[["property_id", "total_floor_area"]], on="property_id", how="left" + ) + recommendations_final_scenario = recommendations_final_scenario[ + ~pd.isnull(recommendations_final_scenario["total_floor_area"])] + recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \ + recommendations_final_scenario["total_floor_area"] + + recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace( + { + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) + + aggs = recommendations_final_scenario.groupby("type_mapped")[ + ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values( + "kwh_savings_per_unit", ascending=False + ) + aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"] + # Show more columns with pandas + pd.set_option('display.max_columns', None) + # Show more rows with pandas + pd.set_option('display.max_rows', None) + # Show more characters in a column + pd.set_option('display.max_colwidth', None) diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 569d7bcb..b73f187c 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -67,6 +67,7 @@ class WallRecommendations(Definitions): "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation', "Timber frame, as built, no insulation": "Timber frame, with external insulation", 'Timber frame, as built, partial insulation': 'Timber frame, with external insulation', + "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation", } # These are the ending descriptions we consider for walls with internal insulation @@ -80,6 +81,7 @@ class WallRecommendations(Definitions): "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation', "Timber frame, as built, no insulation": "Timber frame, with internal insulation", 'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation', + "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation", } def __init__(