Model/etl/customers/newhaven/slides.py

976 lines
37 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
ScenarioModel,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from utils.s3 import read_csv_from_s3
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
def estimate_post_retrofit_heating_hotwater_kwh(
properties_df, recommendations_df, scenario_ids
):
# properties_starting_with_electric_heating = properties_df[
# properties_df["mainfuel"].isin(
# ["Electricity not community", "Electricity electricity unspecified tariff"]
# )
# ]["id"].tolist()
# Get the recommendations for the scenario, default
scenario_comparison_df = []
scenario_comparison_df_2 = []
cost_per_kwh_saved_table = []
for scenario_id in scenario_ids:
# Get the recommendations for the scenario, default
scenario_recommendations = recommendations_df[
(recommendations_df["Scenario ID"] == scenario_id)
& (recommendations_df["default"] == True)
].copy()
scenario_recommendations["ligting_kwh"] = scenario_recommendations.apply(
lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0,
axis=1,
)
scenario_recommendations["solar_kwh"] = scenario_recommendations.apply(
lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1
)
# Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
scenario_recommendations["Estimated Kwh Savings"] = (
scenario_recommendations.apply(
lambda x: (
0
if x["type"] in ["low_energy_lighting", "solar_pv"]
else x["kwh_savings"]
),
axis=1,
)
)
# We need to determine if any of the properties start with electric heating or end with it
# property_electric_heating = []
# for pid, recs in scenario_recommendations.groupby("property_id"):
# has_ashp = recs[recs["description"].str.contains("air source heat pump")]
# if not has_ashp.empty:
# property_electric_heating.append(pid)
# continue
# has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")]
# if not has_heating_rec.empty:
# property_electric_heating.append(pid)
# continue
grouped_data = (
scenario_recommendations.groupby(["property_id"])
.agg(
{
"Estimated Kwh Savings": "sum",
"ligting_kwh": "sum",
"solar_kwh": "sum",
"estimated_cost": "sum",
}
)
.reset_index()
)
comparison = properties_df.drop_duplicates().merge(
grouped_data, on=["property_id"], how="left"
)
comparison["Post Retrofit Heating & Hotwater kwh"] = (
comparison["current_energy_demand_heating_hotwater"]
- comparison["Estimated Kwh Savings"]
)
avgs = comparison[
[
"current_energy_demand_heating_hotwater",
"Post Retrofit Heating & Hotwater kwh",
]
].mean()
# We now, for properties that have a plan, do a before and after
with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
avgs2 = with_savings[
[
"current_energy_demand_heating_hotwater",
"Post Retrofit Heating & Hotwater kwh",
]
].mean()
avgs2["difference"] = (
avgs2["current_energy_demand_heating_hotwater"]
- avgs2["Post Retrofit Heating & Hotwater kwh"]
)
avgs2["percentage_reduction"] = (
100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
)
# We also calculate the cost per kwh saves
total_kwh_saved = (
with_savings["Estimated Kwh Savings"].sum()
+ with_savings["ligting_kwh"].sum()
+ with_savings["solar_kwh"].sum()
)
total_cost = with_savings["estimated_cost"].sum()
cost_per_kwh_saved = total_cost / total_kwh_saved
scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
cost_per_kwh_saved_table.append(
{"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved}
)
scenario_comparison_population = pd.DataFrame(scenario_comparison_df)
scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2)
cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table)
return (
scenario_comparison_population,
scenario_comparison_retrofitted_units,
cost_per_kwh_saved_table,
)
def slides():
# Prepares the information required for the slides
# Right now this is the second version of the nehaven portfolio
portfolio_id = 90
# Look at one scenario at a time, otherwise this is agony
scenario_ids = [47, 48, 49, 50, 51]
properties_data, plans_data, recommendations_data = get_data(
portfolio_id, scenario_ids
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
if properties_df.shape[0] != 2553:
raise ValueError("The number of unique properties is not 2553")
# Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
heating_hotwater_kwh = properties_df[
["current_energy_demand", "current_energy_demand_heating_hotwater"]
].mean()
# Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire
# popoulation (incl those without retrofit) and for just those being retrofit
# We also calculat the cost per kwh saved
(
scenario_comparison_population,
scenario_comparison_retrofitted_units,
cost_per_kwh_saved_table,
) = estimate_post_retrofit_heating_hotwater_kwh(
properties_df, recommendations_df, scenario_ids
)
# Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
# We need to take recommndations that affect just the heating and hot water
# By property
recommendations_df["type_mapped"] = (
recommendations_df["type"]
.copy()
.replace(
{
"loft_insulation": "roof_insulation",
"room_roof_insulation": "roof_insulation",
"flat_roof_insulation": "roof_insulation",
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
)
recommendations_df["type_mapped"] = np.where(
recommendations_df["description"].str.contains("air source heat pump"),
"air_source_heat_pump",
recommendations_df["type_mapped"],
)
# Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
recommendation_summary = (
recommendations_df[recommendations_df["default"] == True]
.groupby(["Scenario ID", "type_mapped"])
.agg({"property_id": "nunique"})
.reset_index()
)
recommendation_summary.columns = [
"Scenario ID",
"Type Mapped",
"Number of Properties",
]
recommendation_summary["Percentage of Properties"] = 100 * (
recommendation_summary["Number of Properties"] / properties_df["id"].nunique()
)
recommendation_summary_final_scenario = recommendation_summary[
recommendation_summary["Scenario ID"].isin([51])
]
# MVP implementation of funding estimation for the most basic scenario, using GBIS
project_scores_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
def find_abs(sap_movement, starting_sap, floor_area):
starting_band = find_band(starting_sap)
finishing_band = find_band(starting_sap + sap_movement)
if starting_band == finishing_band:
return 0
if floor_area <= 72:
floor_area_segment = "0-72"
elif (floor_area > 72) and (floor_area <= 97):
floor_area_segment = "73-97"
elif (floor_area > 97) and (floor_area <= 199):
floor_area_segment = "98-199"
else:
floor_area_segment = "200+"
return project_scores_matrix[
(project_scores_matrix["Floor Area Segment"] == floor_area_segment)
& (project_scores_matrix["Starting Band"] == starting_band)
& (project_scores_matrix["Finishing Band"] == finishing_band)
].squeeze()["Cost Savings"]
eco4_scores_sap_table = [
{"Band": "High_A", "From": 96.0, "Up to": 100.0, "Mid-point": 98.0},
{"Band": "Low_A", "From": 92.0, "Up to": 96.0, "Mid-point": 94.0},
{"Band": "High_B", "From": 86.0, "Up to": 91.0, "Mid-point": 88.5},
{"Band": "Low_B", "From": 81.0, "Up to": 86.0, "Mid-point": 83.5},
{"Band": "High_C", "From": 74.5, "Up to": 80.0, "Mid-point": 77.25},
{"Band": "Low_C", "From": 69.0, "Up to": 74.5, "Mid-point": 71.75},
{"Band": "High_D", "From": 61.5, "Up to": 68.0, "Mid-point": 64.75},
{"Band": "Low_D", "From": 55.0, "Up to": 61.5, "Mid-point": 58.25},
{"Band": "High_E", "From": 46.5, "Up to": 54.0, "Mid-point": 50.25},
{"Band": "Low_E", "From": 39.0, "Up to": 46.5, "Mid-point": 42.75},
{"Band": "High_F", "From": 29.5, "Up to": 38.0, "Mid-point": 33.75},
{"Band": "Low_F", "From": 21.0, "Up to": 29.5, "Mid-point": 25.25},
{"Band": "High_G", "From": 10.5, "Up to": 20.0, "Mid-point": 15.25},
{"Band": "Low_G", "From": 1.0, "Up to": 10.5, "Mid-point": 5.75},
]
eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table)
def find_band(value):
# Iterate through each row in the DataFrame to find the correct band
value_floored = np.floor(value)
return eco4_scores_sap_table[
(eco4_scores_sap_table["From"] <= value_floored)
& (eco4_scores_sap_table["Up to"] >= value_floored)
].squeeze()["Band"]
def identify_funding_measure(p, p_recs, is_social):
measures = ["cavity_wall_insulation", "loft_insulation"]
property_abs = []
for m in measures:
funding_measure = p_recs[p_recs["type"] == m]
if not funding_measure.empty:
funding_measure = funding_measure.squeeze()
project_abs = find_abs(
sap_movement=funding_measure["sap_points"],
starting_sap=p["current_sap_points"],
floor_area=p["total_floor_area"],
)
property_abs.append(
{
"property_id": p["property_id"],
"measure": funding_measure["type"],
"cost": funding_measure["estimated_cost"],
"abs": project_abs,
"is_social": is_social,
}
)
if not property_abs:
return None
property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False)
property_abs = property_abs.head(1).to_dict(orient="records")[0]
return property_abs
social_tenure = ["rental (social)", "Rented (social)"]
scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])]
funding = []
for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)):
p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]]
if p_recs.empty:
continue
if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69):
f = identify_funding_measure(p, p_recs, True)
if f:
funding.append(f)
continue
if p["current_sap_points"] < 69:
f = identify_funding_measure(p, p_recs, False)
if f:
funding.append(f)
continue
funding = pd.DataFrame(funding)
conservative_abs = 20
funding["expected_funding"] = funding["abs"] * conservative_abs
# We take rows where the expected funding is higher than the cost of the works + 10%
funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)]
# From the owner of the properties, the funding that they see is just the cost of the works. The actual funding
# recieved will go to the installer
# We now look at the social funding
social_funding = funding[funding["is_social"]]["cost"].sum()
# For the private funding, we need to scale this to consider the fact that only a proportion of the properties
# will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants
# will meet the benefits criteria
private_funding = funding[~funding["is_social"]]["cost"].sum()
# 51% of households are recipients of benefits in the South East, in the UK
# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
# We also need to deduce the % of properties in council tax bands A - D
# 2023 council tax bands:
# https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties
# -statistical-commentary
band_a_proportion = 0.239
band_b_proportion = 0.195
band_c_proportion = 0.219
band_d_proportion = 0.156
a_to_d_proportion = (
band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
)
benefits_proportion = 0.51
# Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits,
# however we retain the regional average to be conservative
# We scale the private funding based on these two factors
private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion
n_private_projects = np.round(
(~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion
)
# Look at the impact of EWI for scenario
ewi_jobs = recommendations_df[
(recommendations_df["Scenario ID"] == 49)
& (recommendations_df["type"] == "external_wall_insulation")
]
ewi_jobs["estimated_cost"].sum()
has_cavity = recommendations_df[
(recommendations_df["type"] == "cavity_wall_insulation")
& (recommendations_df["Scenario ID"] == 47)
]
# Take the some properties in this
cavity_units = properties_df[
properties_df["property_id"].isin(has_cavity["property_id"].values)
]
cavity_units[cavity_units.index == 3][["uprn", "property_id"]]
z = recommendations_df[recommendations_df["property_id"] == 24525]
# Recommenation type by kwh savings per unit
recommendations_final_scenario = recommendations_df[
recommendations_df["Scenario ID"].isin([51])
& (recommendations_df["default"] == True)
].copy()
# Merge on floor area
recommendations_final_scenario = recommendations_final_scenario.merge(
properties_df[["property_id", "total_floor_area"]], on="property_id", how="left"
)
recommendations_final_scenario = recommendations_final_scenario[
~pd.isnull(recommendations_final_scenario["total_floor_area"])
]
recommendations_final_scenario["kwh_savings_per_unit"] = (
recommendations_final_scenario["kwh_savings"]
/ recommendations_final_scenario["total_floor_area"]
)
recommendations_final_scenario["type_mapped2"] = (
recommendations_df["type"]
.copy()
.replace(
{
"room_roof_insulation": "roof_insulation",
"flat_roof_insulation": "roof_insulation",
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
)
aggs = (
recommendations_final_scenario.groupby("type_mapped")[
["kwh_savings_per_unit", "estimated_cost"]
]
.mean()
.reset_index()
.sort_values("kwh_savings_per_unit", ascending=False)
)
aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"]
# Show more columns with pandas
pd.set_option("display.max_columns", None)
# Show more rows with pandas
pd.set_option("display.max_rows", None)
# Show more characters in a column
pd.set_option("display.max_colwidth", None)
def lewes_outputs():
"""
preparing of this data for the following 2 needs:
1) dataset to share with Nextgen heating
2) Breakdown of results by property type
:return:
"""
# get the asset list
asset_list = read_csv_from_s3(
bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv"
)
asset_list = pd.DataFrame(asset_list)
# Get non-invasive recommendations
non_intrusive_recommendations = read_csv_from_s3(
bucket_name="retrofit-plan-inputs-dev",
filepath="8/90/non_invasive_recommendations.csv",
)
non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations)
# Right now this is the second version of the nehaven portfolio
portfolio_id = 90
# Look at one scenario at a time, otherwise this is agony
scenario_ids = [47, 48, 49, 50, 51]
properties_data, plans_data, recommendations_data = get_data(
portfolio_id, scenario_ids
)
properties_df = pd.DataFrame(properties_data)
recommendations_df = pd.DataFrame(recommendations_data)
# Unnest this
import ast
survey_recs = []
for _, row in non_intrusive_recommendations.iterrows():
recs = ast.literal_eval(row["recommendations"])
ashp_rec = next((r for r in recs if r["type"] == "air_source_heat_pump"), None)
solar_rec = next((r for r in recs if r["type"] == "solar_pv"), None)
to_append = {"uprn": row["uprn"]}
if ashp_rec["suitable"]:
to_append = {
**to_append,
"ashp_suitable": True,
"ashp_size_kw": ashp_rec["size"],
"ashp_cost": ashp_rec["cost"],
}
if solar_rec["suitable"]:
to_append = {
**to_append,
"solar_suitable": True,
"solar_size_kwp": solar_rec["array_wattage"],
"solar_cost": solar_rec["cost"],
}
survey_recs.append(to_append)
survey_recs = pd.DataFrame(survey_recs)
asset_list["uprn"] = asset_list["uprn"].astype(int)
survey_recs["uprn"] = survey_recs["uprn"].astype(int)
vital_kwh = 7597
domna_kwh = 10850
scaling_factor = vital_kwh / domna_kwh
next_gen_dataset = (
properties_df[
[
"uprn",
"address",
"postcode",
"property_type",
"built_form",
"current_energy_demand_heating_hotwater",
"mainfuel",
"total_floor_area",
"floor_height",
]
]
.rename(
columns={
"mainfuel": "primary_fuel_type",
"total_floor_area": "gross_floor_area",
"current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh",
}
)
.merge(asset_list[["uprn", "number_of_floors"]], how="left", on="uprn")
.merge(survey_recs, how="left", on="uprn")
)
next_gen_dataset["estimated_heating_hotwater_kwh_scaled"] = (
next_gen_dataset["estimated_heating_hotwater_kwh"] * scaling_factor
)
next_gen_dataset["ashp_suitable"] = next_gen_dataset["ashp_suitable"].fillna(False)
next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna(
False
)
# We prepare the scenario outputs by property type
grouped_data = next_gen_dataset.copy()
grouped_data["property_sub_type"] = grouped_data["built_form"].copy()
# If a property is a flat, re-map sub_type just to flat
grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = (
"Flat"
)
# Same for maisonettes
grouped_data.loc[
grouped_data["property_type"] == "Maisonette", "property_sub_type"
] = "Maisonette"
# We now pull out the recommendations impact by property type and sub type
# Exclude sealing open fireplaces
recommendations_df = recommendations_df[
recommendations_df["type"] != "sealing_open_fireplace"
]
# We update the type column so that if type == heating, and the description contains "air source heat pump",
# the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else
# if the description contains "condensing boiler, the type is updated to "boiler_upgrade"
recommendations_df["type"] = np.where(
recommendations_df["type"] == "heating",
np.where(
recommendations_df["description"].str.contains("air source heat pump"),
"Air Source Heat Pump",
np.where(
recommendations_df["description"].str.contains("high heat retention"),
"High Heat Retention Storage",
np.where(
recommendations_df["description"].str.contains("condensing boiler"),
"Boiler Upgrade",
recommendations_df["type"],
),
),
),
recommendations_df["type"],
)
recommendation_types = recommendations_df["type"].unique().tolist()
rename_dict = {
"hot_water_tank_insulation": "Hot Water Tank Insulation",
"windows_glazing": "Windows Glazing",
"secondary_heating": "Secondary Heating",
"cavity_wall_insulation": "Cavity Wall Insulation",
"flat_roof_insulation": "Flat Roof Insulation",
"mechanical_ventilation": "Mechanical Ventilation",
"loft_insulation": "Loft Insulation",
"cylinder_thermostat": "Cylinder Thermostat",
"room_roof_insulation": "Room Roof Insulation",
"low_energy_lighting": "Low Energy Lighting",
"external_wall_insulation": "External Wall Insulation",
"solar_pv": "Solar PV",
"heating_control": "Heating Control",
"solid_floor_insulation": "Solid Floor Insulation",
"suspended_floor_insulation": "Suspended Floor Insulation",
"internal_wall_insulation": "Internal Wall Insulation",
}
property_scenario_impact = []
for scenario_id in tqdm(scenario_ids):
# Get the recommendations for the scenario, default
scenario_recommendations = recommendations_df[
(recommendations_df["Scenario ID"] == scenario_id)
& (recommendations_df["default"] == True)
].copy()
scenario_recommendations["Estimated Lighting kWh Savings"] = (
scenario_recommendations.apply(
lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0,
axis=1,
)
)
scenario_recommendations["Estimated Solar kWh Savings"] = (
scenario_recommendations.apply(
lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1
)
)
# Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
scenario_recommendations["Estimated Heating Demand kWh Savings"] = (
scenario_recommendations.apply(
lambda x: (
0
if x["type"] in ["low_energy_lighting", "solar_pv"]
else x["kwh_savings"]
),
axis=1,
)
)
scenario_grouped_data = (
scenario_recommendations.groupby(["property_id"])
.agg(
{
"Estimated Heating Demand kWh Savings": "sum",
"Estimated Lighting kWh Savings": "sum",
"Estimated Solar kWh Savings": "sum",
"estimated_cost": "sum",
}
)
.reset_index()
)
comparison = properties_df.drop_duplicates()[
["uprn", "property_id", "current_energy_demand_heating_hotwater"]
].merge(scenario_grouped_data, on=["property_id"], how="left")
comparison["Estimated Heating Demand kWh Savings"] = comparison[
"Estimated Heating Demand kWh Savings"
].fillna(0)
comparison["Estimated Lighting kWh Savings"] = comparison[
"Estimated Lighting kWh Savings"
].fillna(0)
comparison["Estimated Solar kWh Savings"] = comparison[
"Estimated Solar kWh Savings"
].fillna(0)
comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0)
comparison["post_scenario_heating_hotwater_kwh"] = (
comparison["current_energy_demand_heating_hotwater"]
- comparison["Estimated Heating Demand kWh Savings"]
)
# For each scenario, we create a measure matrix
measure_matrix = scenario_recommendations.pivot_table(
index="property_id",
columns="type",
values="id", # Using 'id' just as a placeholder for the pivot
aggfunc=lambda x: True, # If an ID exists for a given type, mark as True
fill_value=False, # Fill other entries as False
).reset_index()
non_zero_heat_demand_impact = comparison[
(comparison["Estimated Heating Demand kWh Savings"] > 0)
| (comparison["Estimated Lighting kWh Savings"] > 0)
| (comparison["Estimated Solar kWh Savings"] > 0)
]
measure_matrix = measure_matrix[
measure_matrix["property_id"].isin(
non_zero_heat_demand_impact["property_id"].values
)
]
measure_matrix = measure_matrix.rename(columns=rename_dict)
comparison = comparison.merge(measure_matrix, on="property_id", how="left")
comparison["scenario_id"] = scenario_id
property_scenario_impact.append(comparison)
property_scenario_impact = pd.concat(property_scenario_impact)
# property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"])
for v in list(rename_dict.values()) + [
"Air Source Heat Pump",
"High Heat Retention Storage",
"Boiler Upgrade",
]:
# Fill NaNs with False
property_scenario_impact[v] = property_scenario_impact[v].fillna(False)
# Scale
property_scenario_impact["post_scenario_heating_hotwater_kwh_scaled"] = (
property_scenario_impact["post_scenario_heating_hotwater_kwh"] * scaling_factor
)
grouped_data = grouped_data.merge(property_scenario_impact, how="left", on="uprn")
# Agg the data
grouped_data = (
grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"])
.agg(
{
"estimated_heating_hotwater_kwh": "mean",
"estimated_heating_hotwater_kwh_scaled": "mean",
"estimated_cost": "mean",
"post_scenario_heating_hotwater_kwh": "mean",
"post_scenario_heating_hotwater_kwh_scaled": "mean",
}
)
.reset_index()
)
scenario_names = pd.DataFrame(
[
{
"scenario_id": 47,
"scenario": "Demand Reduction cavity & roof insulation",
},
{
"scenario_id": 48,
"scenario": "Demand reduction no solid wall, floors or heating/renewables",
},
{"scenario_id": 49, "scenario": "Demand reduction no decant"},
{
"scenario_id": 50,
"scenario": "Demand reduction no decant + heating & solar",
},
{"scenario_id": 51, "scenario": "Whole house retrofit"},
]
)
grouped_data = grouped_data.merge(scenario_names, how="left", on="scenario_id")
if not grouped_data[
grouped_data["estimated_heating_hotwater_kwh"]
< grouped_data["post_scenario_heating_hotwater_kwh"]
].empty:
raise Exception("someting went wrong")
if not grouped_data[
grouped_data["estimated_heating_hotwater_kwh_scaled"]
< grouped_data["post_scenario_heating_hotwater_kwh_scaled"]
].empty:
raise Exception("someting went wrong")
# Reorder the columns
grouped_data = grouped_data[
[
"property_type",
"property_sub_type",
"scenario",
"estimated_heating_hotwater_kwh",
"post_scenario_heating_hotwater_kwh",
"estimated_heating_hotwater_kwh_scaled",
"post_scenario_heating_hotwater_kwh_scaled",
"estimated_cost",
]
]
grouped_data = grouped_data.rename(
columns={
"property_type": "Property Type",
"property_sub_type": "Property Sub Type",
"scenario": "Scenario",
"estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
"post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh",
"estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)",
"post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)",
"estimated_cost": "Estimated Cost or Retrofit",
}
)
# grouped_data.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property "
# "Type.xlsx",
# index=False
# )
property_scenario_impact = property_scenario_impact.merge(
scenario_names, how="left", on="scenario_id"
)
lewes_data = next_gen_dataset.merge(property_scenario_impact, how="left", on="uprn")
lewes_data = lewes_data.sort_values(
["postcode", "uprn", "scenario_id"], ascending=True
)
# Rearrange, rename columns and drop what we don't need
# TODO - remap the heating type
lewes_data = lewes_data[
[
"uprn",
"address",
"postcode",
"property_type",
"built_form",
# 'estimated_heating_hotwater_kwh',
"primary_fuel_type",
"gross_floor_area",
"floor_height",
"number_of_floors",
"ashp_suitable",
"ashp_size_kw",
"ashp_cost",
"solar_suitable",
"solar_size_kwp",
"solar_cost",
"scenario",
"estimated_heating_hotwater_kwh_scaled",
"post_scenario_heating_hotwater_kwh_scaled",
# 'property_id', - dropped
# 'current_energy_demand_heating_hotwater',
"Estimated Heating Demand kWh Savings",
"Estimated Lighting kWh Savings",
"Estimated Solar kWh Savings",
"estimated_cost",
"post_scenario_heating_hotwater_kwh",
"Cavity Wall Insulation",
"Cylinder Thermostat",
"Flat Roof Insulation",
"Hot Water Tank Insulation",
"Loft Insulation",
"Mechanical Ventilation",
"Room Roof Insulation",
# 'scenario_id', - dropped
"Low Energy Lighting",
"Secondary Heating",
"Windows Glazing",
"External Wall Insulation",
"Heating Control",
"Solar PV",
"Air Source Heat Pump",
"Boiler Upgrade",
"High Heat Retention Storage",
"Internal Wall Insulation",
"Solid Floor Insulation",
"Suspended Floor Insulation",
]
].rename(
columns={
"primary_fuel_type": "Primary Fuel Type",
"gross_floor_area": "Gross Floor Area",
"floor_height": "Floor Height",
"number_of_floors": "Number of Floors",
"ashp_suitable": "Is an ASHP Suitable?",
"ashp_size_kw": "ASHP Size (kW)",
"ashp_cost": "ASHP Cost",
"solar_suitable": "Is Solar PV Suitable?",
"solar_size_kwp": "Solar PV Size (kWp)",
"solar_cost": "Solar PV Cost",
# "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
"estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh",
"post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh",
"estimated_cost": "Estimated Cost of Scenario",
}
)
# We save this dataset, which will be shared with Lewes Council
lewes_data.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv",
index=False,
)
df_pivot = property_scenario_impact.pivot_table(
index="uprn",
columns="scenario",
values=[
"post_scenario_heating_hotwater_kwh",
"post_scenario_heating_hotwater_kwh_scaled",
],
)
# Flattening multi-index columns
df_pivot.columns = [f"{col[0]}_{col[1]}" for col in df_pivot.columns]
# Reset the index to have a clean dataframe
df_pivot.reset_index(inplace=True)
next_gen_dataset = next_gen_dataset.merge(df_pivot, how="left", on="uprn")
next_gen_dataset.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv",
index=False,
)