closing up newhaven study

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-16 09:50:37 +01:00
parent 6e802b1f58
commit cee16b8166
3 changed files with 318 additions and 111 deletions

View file

@ -54,6 +54,8 @@ def make_asset_list():
)
ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str)
ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze()
insulation_potential = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv",
low_memory=False,
@ -88,20 +90,20 @@ def make_asset_list():
columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
)
# had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
# below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
# below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
# had_an_epc["energy-efficiency-rating"].value_counts()
# asset_list["current-energy-rating"].value_counts()
# asset_list["co2-emissions-current"].mean()
had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
had_an_epc["energy-efficiency-rating"].value_counts()
asset_list["current-energy-rating"].value_counts()
asset_list["co2-emissions-current"].mean()
# # Get the underlying data of a histograme
# import matplotlib.pyplot as plt
# n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
import matplotlib.pyplot as plt
n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
#
# bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10
bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10
#
# # Step 3: Calculate the frequency of data in each bin
# hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
# Take properties below a B - there are 2844 units
asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]

View file

@ -1,4 +1,6 @@
from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario
@ -68,13 +70,101 @@ def get_data(portfolio_id, scenario_ids):
return properties_data, plans_data, recommendations_data
def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids):
# properties_starting_with_electric_heating = properties_df[
# properties_df["mainfuel"].isin(
# ["Electricity not community", "Electricity electricity unspecified tariff"]
# )
# ]["id"].tolist()
# Get the recommendations for the scenario, default
scenario_comparison_df = []
scenario_comparison_df_2 = []
cost_per_kwh_saved_table = []
for scenario_id in scenario_ids:
# Get the recommendations for the scenario, default
scenario_recommendations = recommendations_df[
(recommendations_df["Scenario ID"] == scenario_id) &
(recommendations_df["default"] == True)
].copy()
scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
axis=1)
scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
# Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
'kwh_savings'], axis=1)
# We need to determine if any of the properties start with electric heating or end with it
# property_electric_heating = []
# for pid, recs in scenario_recommendations.groupby("property_id"):
# has_ashp = recs[recs["description"].str.contains("air source heat pump")]
# if not has_ashp.empty:
# property_electric_heating.append(pid)
# continue
# has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")]
# if not has_heating_rec.empty:
# property_electric_heating.append(pid)
# continue
grouped_data = scenario_recommendations.groupby(['property_id']).agg({
'Estimated Kwh Savings': 'sum',
'ligting_kwh': 'sum',
'solar_kwh': 'sum',
"estimated_cost": "sum"
}).reset_index()
comparison = properties_df.drop_duplicates().merge(
grouped_data, on=["property_id"], how="left"
)
comparison["Post Retrofit Heating & Hotwater kwh"] = (
comparison["current_energy_demand_heating_hotwater"] - \
comparison["Estimated Kwh Savings"]
)
avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
# We now, for properties that have a plan, do a before and after
with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
avgs2 = with_savings[
['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
"Post Retrofit Heating & Hotwater kwh"]
avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
# We also calculate the cost per kwh saves
total_kwh_saved = (
with_savings["Estimated Kwh Savings"].sum() +
with_savings["ligting_kwh"].sum() +
with_savings["solar_kwh"].sum()
)
total_cost = with_savings["estimated_cost"].sum()
cost_per_kwh_saved = total_cost / total_kwh_saved
scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved})
scenario_comparison_population = pd.DataFrame(scenario_comparison_df)
scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2)
cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table)
return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table
def slides():
# Prepares the information required for the slides
# Right now this is the second version of the nehaven portfolio
portfolio_id = 90
# Look at one scenario at a time, otherwise this is agony
scenario_ids = [47, 48, 49]
scenario_ids = [47, 48, 49, 50, 51]
properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
@ -85,114 +175,25 @@ def slides():
if properties_df.shape[0] != 2553:
raise ValueError("The number of unique properties is not 2553")
def estimate_post_retrofit_heating_hotwater_kwh(recommendations_df, scenario_ids):
# Get the recommendations for the scenario, default
scenario_comparison_df = []
scenario_comparison_df_2 = []
for scenario_id in scenario_ids:
# Get the recommendations for the scenario, default
scenario_recommendations = recommendations_df[
(recommendations_df["Scenario ID"] == scenario_id) &
(recommendations_df["default"] == True)
].copy()
scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
axis=1)
scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
if scenario_recommendations['solar_kwh'].sum() > 0:
blah
# Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
'kwh_savings'], axis=1)
grouped_data = scenario_recommendations.groupby(['property_id']).agg({
'Estimated Kwh Savings': 'sum',
'ligting_kwh': 'sum',
'solar_kwh': 'sum'
}).reset_index()
comparison = properties_df.drop_duplicates().merge(
grouped_data, on=["property_id"], how="left"
)
comparison["Post Retrofit Heating & Hotwater kwh"] = (
comparison["current_energy_demand_heating_hotwater"] - \
comparison["Estimated Kwh Savings"]
)
avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
# We now, for properties that have a plan, do a before and after
with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
avgs2 = with_savings[
['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
"Post Retrofit Heating & Hotwater kwh"]
avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
scenario_comparison_df = pd.DataFrame(scenario_comparison_df)
scenario_comparison_df_2 = pd.DataFrame(scenario_comparison_df_2)
return scenario_comparison_df, scenario_comparison_df_2
# TODO: How do we factor in solar PV
# Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
heating_hotwater_kwh = (
properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']]
.mean()
)
# Q2: For each scenario, what is the £ per kwh reduction?
# Calculate total kwh savings
kwh_plan_impact = estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df)
z = df[
(df["Recommendation Default Status"] == True) &
(df["Plan Name"].isin(['Demand Reduction cavity & roof insulation']))
]
z2 = z[z["Property ID"] == 25215]
# Find duplicated property ID, recommendationt type combos
z = z[z.duplicated(subset=["Property ID", "Recommendation Type"])]
for plan_name in df["Plan Name"].unique():
# Get default recs
default_recs = df[
(df["Recommendation Default Status"] == True) &
(df["Plan Name"] == plan_name)
].copy()
if default_recs["Recommendation ID"].duplicated().sum():
raise Exception("somethign went wrong")
default_recs["Recommendation Type"].unique()
# We now calculate the total savings
total_savings = default_recs["Estimated Kwh Savings"].sum()
total_cost = default_recs["Recommendation Cost"].sum()
kwh_savings = df[
df["Recommendation Default Status"] == True
].groupby("Plan Name")[["Estimated Kwh Savings", "Recommendation Cost"]].sum().rename(
columns={"Estimated Kwh Savings": "Total Kwh Savings", "Recommendation Cost": "Total Cost"}
).reset_index()
kwh_savings["Cost per Kwh Saved"] = kwh_savings["Total Cost"] / kwh_savings["Total Kwh Savings"]
# Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire
# popoulation (incl those without retrofit) and for just those being retrofit
# We also calculat the cost per kwh saved
scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = (
estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids)
)
# Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
# We need to take recommndations that affect just the heating and hot water
# By property
df["Type Mapped"] = df["Recommendation Type"].copy().replace(
recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace(
{
"loft_insulation": "roof_insulation",
"room_roof_insulation": "roof_insulation",
@ -200,15 +201,217 @@ def slides():
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
recommendations_df["type_mapped"] = np.where(
recommendations_df["description"].str.contains("air source heat pump"),
"air_source_heat_pump",
recommendations_df["type_mapped"]
)
# Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
recommendation_summary = df.groupby(['Plan Name', 'Type Mapped']).agg({
'Property ID': 'nunique'
recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby(
['Scenario ID', 'type_mapped']
).agg({
'property_id': 'nunique'
}).reset_index()
recommendation_summary.columns = ['Plan Name', 'Type Mapped', 'Number of Properties']
recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties']
recommendation_summary["Percentage of Properties"] = 100 * (
recommendation_summary["Number of Properties"] / df["Property ID"].nunique()
recommendation_summary["Number of Properties"] / properties_df["id"].nunique()
)
recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])]
# MVP implementation of funding estimation for the most basic scenario, using GBIS
project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv")
def find_abs(sap_movement, starting_sap, floor_area):
starting_band = find_band(starting_sap)
finishing_band = find_band(starting_sap + sap_movement)
if starting_band == finishing_band:
return 0
if floor_area <= 72:
floor_area_segment = '0-72'
elif (floor_area > 72) and (floor_area <= 97):
floor_area_segment = "73-97"
elif (floor_area > 97) and (floor_area <= 199):
floor_area_segment = "98-199"
else:
floor_area_segment = "200+"
return project_scores_matrix[
(project_scores_matrix["Floor Area Segment"] == floor_area_segment) &
(project_scores_matrix["Starting Band"] == starting_band) &
(project_scores_matrix["Finishing Band"] == finishing_band)
].squeeze()["Cost Savings"]
eco4_scores_sap_table = [
{'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
{'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
{'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
{'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
{'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
{'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
{'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
{'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
{'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
{'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
{'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
{'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
{'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
{'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
]
eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table)
def find_band(value):
# Iterate through each row in the DataFrame to find the correct band
value_floored = np.floor(value)
return eco4_scores_sap_table[
(eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored)
].squeeze()["Band"]
def identify_funding_measure(p, p_recs, is_social):
measures = ["cavity_wall_insulation", "loft_insulation"]
property_abs = []
for m in measures:
funding_measure = p_recs[p_recs["type"] == m]
if not funding_measure.empty:
funding_measure = funding_measure.squeeze()
project_abs = find_abs(
sap_movement=funding_measure["sap_points"],
starting_sap=p["current_sap_points"],
floor_area=p["total_floor_area"]
)
property_abs.append({
"property_id": p["property_id"],
"measure": funding_measure["type"],
"cost": funding_measure["estimated_cost"],
"abs": project_abs,
"is_social": is_social
})
if not property_abs:
return None
property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False)
property_abs = property_abs.head(1).to_dict(orient="records")[0]
return property_abs
social_tenure = ["rental (social)", "Rented (social)"]
scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])]
funding = []
for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)):
p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]]
if p_recs.empty:
continue
if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69):
f = identify_funding_measure(p, p_recs, True)
if f:
funding.append(f)
continue
if p["current_sap_points"] < 69:
f = identify_funding_measure(p, p_recs, False)
if f:
funding.append(f)
continue
funding = pd.DataFrame(funding)
conservative_abs = 20
funding["expected_funding"] = funding["abs"] * conservative_abs
# We take rows where the expected funding is higher than the cost of the works + 10%
funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)]
# From the owner of the properties, the funding that they see is just the cost of the works. The actual funding
# recieved will go to the installer
# We now look at the social funding
social_funding = funding[funding["is_social"]]["cost"].sum()
# For the private funding, we need to scale this to consider the fact that only a proportion of the properties
# will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants
# will meet the benefits criteria
private_funding = funding[~funding["is_social"]]["cost"].sum()
# 51% of households are recipients of benefits in the South East, in the UK
# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
# We also need to deduce the % of properties in council tax bands A - D
# 2023 council tax bands:
# https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties
# -statistical-commentary
band_a_proportion = 0.239
band_b_proportion = 0.195
band_c_proportion = 0.219
band_d_proportion = 0.156
a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
benefits_proportion = 0.51
# Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits,
# however we retain the regional average to be conservative
# We scale the private funding based on these two factors
private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion
n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion)
# Look at the impact of EWI for scenario
ewi_jobs = recommendations_df[
(recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation")
]
ewi_jobs["estimated_cost"].sum()
has_cavity = recommendations_df[
(recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47)
]
# Take the some properties in this
cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)]
cavity_units[cavity_units.index == 3][["uprn", "property_id"]]
z = recommendations_df[recommendations_df["property_id"] == 24525]
# Recommenation type by kwh savings per unit
recommendations_final_scenario = recommendations_df[
recommendations_df["Scenario ID"].isin([51]) &
(recommendations_df["default"] == True)
].copy()
# Merge on floor area
recommendations_final_scenario = recommendations_final_scenario.merge(
properties_df[["property_id", "total_floor_area"]], on="property_id", how="left"
)
recommendations_final_scenario = recommendations_final_scenario[
~pd.isnull(recommendations_final_scenario["total_floor_area"])]
recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \
recommendations_final_scenario["total_floor_area"]
recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace(
{
"room_roof_insulation": "roof_insulation",
"flat_roof_insulation": "roof_insulation",
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
aggs = recommendations_final_scenario.groupby("type_mapped")[
["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values(
"kwh_savings_per_unit", ascending=False
)
aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"]
# Show more columns with pandas
pd.set_option('display.max_columns', None)
# Show more rows with pandas
pd.set_option('display.max_rows', None)
# Show more characters in a column
pd.set_option('display.max_colwidth', None)

View file

@ -67,6 +67,7 @@ class WallRecommendations(Definitions):
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
"Timber frame, as built, no insulation": "Timber frame, with external insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with external insulation',
"Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation",
}
# These are the ending descriptions we consider for walls with internal insulation
@ -80,6 +81,7 @@ class WallRecommendations(Definitions):
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
"Timber frame, as built, no insulation": "Timber frame, with internal insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation',
"Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation",
}
def __init__(