mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
352 lines
16 KiB
Python
352 lines
16 KiB
Python
"""
|
|
This script contains the code to generate the data required to populate the slides
|
|
We connect to the database amd extract the data for the portfolio needed so it is recommended to use
|
|
a environment akin to the backend to run this script
|
|
"""
|
|
import pandas as pd
|
|
import numpy as np
|
|
from backend.app.db.connection import db_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
from etl.customers.slide_utils import (
|
|
plot_epc_distribution,
|
|
get_property_details_by_portfolio_id,
|
|
get_plan_by_portfolio_id,
|
|
get_properties_with_default_recommendations,
|
|
create_powerpoint,
|
|
create_recommendations_summary
|
|
)
|
|
|
|
PORTFOLIO_ID = 66
|
|
SECOND_SCENARIO_PORTFOLIO_ID = 65
|
|
EPC_TARGET = "C"
|
|
SAP_TARGET = 69
|
|
CUSTOMER_KEY = "urban_splash"
|
|
|
|
|
|
def app():
|
|
# Connect to database
|
|
session = sessionmaker(bind=db_engine)()
|
|
|
|
########################################################################
|
|
# Get the data we need
|
|
########################################################################
|
|
|
|
# Get the properties for the portfolio
|
|
properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)
|
|
properties_df = pd.DataFrame(properties)
|
|
|
|
# We now pull the data for the property details
|
|
property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
|
|
property_details_df = pd.DataFrame(property_details)
|
|
# Merge on uprn
|
|
property_details_df = property_details_df.merge(
|
|
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
|
on="property_id"
|
|
)
|
|
|
|
plans = get_plan_by_portfolio_id(session, PORTFOLIO_ID)
|
|
plans_df = pd.DataFrame(plans)
|
|
|
|
# Unnest the recommendations. Each recommendation is a list of dictionaries
|
|
recommendations_exploded = properties_df["recommendations"].explode().tolist()
|
|
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
|
|
|
|
recommendations_summary = create_recommendations_summary(recommendations_df, properties_df, SAP_TARGET)
|
|
|
|
# Get the data for the second scenario portfolio
|
|
properties_second_scenario = get_properties_with_default_recommendations(session, SECOND_SCENARIO_PORTFOLIO_ID)
|
|
properties_second_scenario_df = pd.DataFrame(properties_second_scenario)
|
|
|
|
propert_details_second_scenario = get_property_details_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
|
|
property_details_second_scenario_df = pd.DataFrame(propert_details_second_scenario)
|
|
# Merge on uprn
|
|
property_details_second_scenario_df = property_details_second_scenario_df.merge(
|
|
properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
|
on="property_id"
|
|
)
|
|
|
|
plans_second_scenario = get_plan_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
|
|
plans_second_scenario_df = pd.DataFrame(plans_second_scenario)
|
|
# Merge on uprn so we can compare properties across portfolios
|
|
plans_second_scenario_df = plans_second_scenario_df.merge(
|
|
properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}), on="property_id"
|
|
)
|
|
|
|
recommendations_exploded_second_scenario = properties_second_scenario_df["recommendations"].explode().tolist()
|
|
recommendations_second_scenario_df = pd.DataFrame(
|
|
[r for r in recommendations_exploded_second_scenario if not pd.isnull(r)]
|
|
)
|
|
|
|
recommendations_summary_second_scenario = create_recommendations_summary(
|
|
recommendations_second_scenario_df, properties_second_scenario_df, SAP_TARGET
|
|
)
|
|
|
|
# Combine the data for both scenarios
|
|
full_property_details = pd.concat([property_details_df, property_details_second_scenario_df])
|
|
full_properties = pd.concat([properties_df, properties_second_scenario_df])
|
|
|
|
epc_rating_summary = full_properties.groupby("current_epc_rating").size().reset_index(name="count")
|
|
epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
|
|
|
|
########################################################################
|
|
# We pull out the data for the slides
|
|
########################################################################
|
|
|
|
############
|
|
# Slide 1:
|
|
############
|
|
# visual
|
|
epc_plot, figure_path = plot_epc_distribution(
|
|
epc_rating_summary, CUSTOMER_KEY, title="", background_color="white", bar_height=0.75, font_size=15
|
|
)
|
|
|
|
# floor area - upper and lower bounds
|
|
|
|
# Take just properties that are below EPC C
|
|
properties_needing_work = full_properties[
|
|
full_properties["current_sap_points"] < SAP_TARGET
|
|
]
|
|
property_details_needing_work = full_property_details[
|
|
full_property_details["uprn"].isin(properties_needing_work["uprn"])
|
|
]
|
|
|
|
min_area, max_area, average_area = (
|
|
full_property_details["total_floor_area"].min(),
|
|
full_property_details["total_floor_area"].max(),
|
|
full_property_details["total_floor_area"].mean()
|
|
)
|
|
|
|
# Annual energy consumption - upper and lower bounds
|
|
min_energy_consumption, max_energy_consumption, average_consumption, total_consumption = (
|
|
property_details_needing_work["adjusted_energy_consumption"].min(),
|
|
property_details_needing_work["adjusted_energy_consumption"].max(),
|
|
property_details_needing_work["adjusted_energy_consumption"].mean(),
|
|
property_details_needing_work["adjusted_energy_consumption"].sum()
|
|
)
|
|
|
|
# Co2 emissions - upper and lower bounds
|
|
min_co2, max_co2, average_co2, total_co2 = (
|
|
property_details_needing_work["co2_emissions"].min(),
|
|
property_details_needing_work["co2_emissions"].max(),
|
|
property_details_needing_work["co2_emissions"].mean(),
|
|
property_details_needing_work["co2_emissions"].sum()
|
|
)
|
|
|
|
# Valuation: upper and lower bounds and average - take positive values in case we have just a sample
|
|
valuation_df = properties_df[properties_df["current_valuation"] > 0]
|
|
min_valuation, max_valuation, average_valuation = (
|
|
valuation_df["current_valuation"].min(),
|
|
valuation_df["current_valuation"].max(),
|
|
valuation_df["current_valuation"].median()
|
|
)
|
|
|
|
recommendations_df.keys()
|
|
|
|
slide_1_commentary = (
|
|
f"Floor areas range from {min_area} to {max_area} square meters, with an average of {average_area} square "
|
|
f"meters. \n"
|
|
f"Annual energy consumption ranges from {min_energy_consumption} to {max_energy_consumption} kWh, with an "
|
|
f"average of {average_consumption} kWh. \n"
|
|
f"CO2 emissions range from {min_co2} to {max_co2} tonnes, with an average of {average_co2} tonnes. \n"
|
|
f"Valuations range from £{min_valuation} to £{max_valuation} £, with an average of £"
|
|
f"{average_valuation}.\n"
|
|
)
|
|
|
|
############
|
|
# Slide 2:
|
|
############
|
|
# What it would take to hit EPC C
|
|
|
|
# We calculate the number of units that will make it to an EPC C
|
|
|
|
units_hitting_target = recommendations_summary[
|
|
recommendations_summary["expected_epc_rating"] == EPC_TARGET
|
|
]
|
|
|
|
n_units_to_target = units_hitting_target.shape[0]
|
|
|
|
measures = "Electrical heating system upgrades & heating controls and Hot water system improvements"
|
|
|
|
# Costs
|
|
(
|
|
expected_cost_per_unit_lower,
|
|
expected_cost_per_unit_upper,
|
|
expected_project_cost,
|
|
) = (
|
|
units_hitting_target["total_cost"].min(),
|
|
units_hitting_target["total_cost"].max(),
|
|
units_hitting_target["total_cost"].sum()
|
|
)
|
|
|
|
# Per property
|
|
# Take positive entries just in case we we have a sample
|
|
valuation_impact_df = plans_df[plans_df["property_id"].isin(units_hitting_target["property_id"])]
|
|
valuation_impact_df = valuation_impact_df[valuation_impact_df["valuation_increase_lower_bound"] > 0]
|
|
min_valuation_impact, max_valuation_impact, average_valuation_impact = (
|
|
valuation_impact_df["valuation_increase_lower_bound"].median(),
|
|
valuation_impact_df["valuation_increase_upper_bound"].median(),
|
|
valuation_impact_df["valuation_increase_average"].median()
|
|
)
|
|
|
|
# Bill savings per property
|
|
min_bill_savings, max_bill_savings, average_bill_savings = (
|
|
units_hitting_target["total_bill_savings"].min(),
|
|
units_hitting_target["total_bill_savings"].max(),
|
|
units_hitting_target["total_bill_savings"].mean()
|
|
)
|
|
|
|
# Total CO2 reduction of portfolio
|
|
min_co2_reduction, max_co2_reduction, average_co2_reduction, total_co2_reduction = (
|
|
units_hitting_target["total_carbon"].min(),
|
|
units_hitting_target["total_carbon"].max(),
|
|
units_hitting_target["total_carbon"].mean(),
|
|
units_hitting_target["total_carbon"].sum()
|
|
)
|
|
|
|
slide_2_commentary = (
|
|
f"{n_units_to_target} units expected to achieve EPC {EPC_TARGET} \n"
|
|
f"Expected cost: {expected_cost_per_unit_lower} - {expected_cost_per_unit_upper}, total project: £"
|
|
f"{expected_project_cost}\n"
|
|
f"Measures include: {measures}\n"
|
|
f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £"
|
|
f"{average_valuation_impact}\n"
|
|
f"Bill savings per property: £{min_bill_savings}-{max_bill_savings}, average: £{average_bill_savings}\n"
|
|
f"Total CO2 reduction: {min_co2_reduction}-{max_co2_reduction} tonnes, average: {average_co2_reduction}\n"
|
|
f"tonnes, total for the {n_units_to_target} properties: {total_co2_reduction} tonnes\n"
|
|
)
|
|
|
|
############
|
|
# Slide 3:
|
|
############
|
|
|
|
units_missed_target = recommendations_summary_second_scenario.copy()
|
|
|
|
n_units_missed_target = units_missed_target.shape[0]
|
|
|
|
# How close were the properties that missed the target
|
|
# We calculate the difference between the expected sap points and the lower bound sap points for the target
|
|
|
|
# min_difference, max_difference, average_difference = (
|
|
# np.ceil(units_missed_target["sap_difference"].min()),
|
|
# np.ceil(units_missed_target["sap_difference"].max()),
|
|
# np.ceil(units_missed_target["sap_difference"].mean())
|
|
# )
|
|
|
|
second_scenario_measures = ("Electrical heating system upgrades & heating controls, Hot water system improvements "
|
|
"and internal wall insulation")
|
|
|
|
# Just take all of the units in the second scenario, since they're borderline
|
|
units_hitting_target_second_scenario = recommendations_summary_second_scenario[
|
|
# (recommendations_summary_second_scenario["expected_epc_rating"] == EPC_TARGET) &
|
|
(recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
|
|
]
|
|
|
|
n_units_hitting_second_scenario = units_hitting_target_second_scenario[
|
|
units_hitting_target_second_scenario["expected_epc_rating"] == EPC_TARGET
|
|
].shape[0]
|
|
|
|
# Impact on second scenario
|
|
# Costs
|
|
(
|
|
expected_cost_per_unit_lower_second_scenario,
|
|
expected_cost_per_unit_upper_second_scenario,
|
|
expected_project_cost_second_scenario,
|
|
) = (
|
|
recommendations_summary_second_scenario["total_cost"].min(),
|
|
recommendations_summary_second_scenario["total_cost"].max(),
|
|
recommendations_summary_second_scenario["total_cost"].sum()
|
|
)
|
|
|
|
valuation_impact_df_second_scenario = plans_second_scenario_df[
|
|
plans_second_scenario_df["uprn"].isin(units_hitting_target_second_scenario["uprn"])
|
|
]
|
|
valuation_impact_df_second_scenario = valuation_impact_df_second_scenario[
|
|
valuation_impact_df_second_scenario["valuation_increase_lower_bound"] > 0
|
|
]
|
|
(
|
|
min_valuation_impact_second_scenario,
|
|
max_valuation_impact_second_scenario,
|
|
average_valuation_impact_second_scenario
|
|
) = (
|
|
valuation_impact_df_second_scenario["valuation_increase_lower_bound"].median(),
|
|
valuation_impact_df_second_scenario["valuation_increase_upper_bound"].median(),
|
|
valuation_impact_df_second_scenario["valuation_increase_average"].median()
|
|
)
|
|
|
|
# Bill savings per property
|
|
min_bill_savings_second_scenario, max_bill_savings_second_scenario, average_bill_savings_second_scenario = (
|
|
units_hitting_target_second_scenario["total_bill_savings"].min(),
|
|
units_hitting_target_second_scenario["total_bill_savings"].max(),
|
|
units_hitting_target_second_scenario["total_bill_savings"].mean()
|
|
)
|
|
|
|
# Total CO2 reduction of portfolio
|
|
(
|
|
min_co2_reduction_second_scenario,
|
|
max_co2_reduction_second_scenario,
|
|
average_co2_reduction_second_scenario,
|
|
total_co2_reduction_second_scenario
|
|
) = (
|
|
units_hitting_target_second_scenario["total_carbon"].min(),
|
|
units_hitting_target_second_scenario["total_carbon"].max(),
|
|
units_hitting_target_second_scenario["total_carbon"].mean(),
|
|
units_hitting_target_second_scenario["total_carbon"].sum()
|
|
)
|
|
|
|
# Values for the leftovers
|
|
units_missing_second_scenario = recommendations_summary_second_scenario[
|
|
(recommendations_summary_second_scenario["expected_epc_rating"] != EPC_TARGET) &
|
|
(recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
|
|
]
|
|
|
|
min_difference_second_scenario, max_difference_second_scenario, average_difference_second_scenario = (
|
|
np.ceil(units_missing_second_scenario["sap_difference"].min()),
|
|
np.ceil(units_missing_second_scenario["sap_difference"].max()),
|
|
np.ceil(units_missing_second_scenario["sap_difference"].mean())
|
|
)
|
|
|
|
slide_3_text = (
|
|
f"{n_units_missed_target} units look like they would miss the EPC {EPC_TARGET} by {min_difference}-"
|
|
f"{max_difference} points \n"
|
|
"When on site, an assessor may be able to identify further improvements to bring the properties up to an EPC "
|
|
f"{EPC_TARGET}.\n"
|
|
f"We have looked at a more extensive package for these properties, including: {second_scenario_measures}\n"
|
|
f"Of the {n_units_missed_target} properties, a further {units_hitting_target_second_scenario.shape[0]} are "
|
|
f"expected to achieve EPC {EPC_TARGET} with these measures.\n"
|
|
f"Expected cost: {expected_cost_per_unit_lower_second_scenario} - "
|
|
f"{expected_cost_per_unit_upper_second_scenario}, "
|
|
f"total project: £"
|
|
f"{expected_project_cost_second_scenario}\n"
|
|
f"Valuation increase per property: £{min_valuation_impact_second_scenario}-"
|
|
f"{max_valuation_impact_second_scenario}, average: £"
|
|
f"{average_valuation_impact_second_scenario}\n"
|
|
f"Bill savings per property: £{min_bill_savings_second_scenario}-{max_bill_savings_second_scenario}, "
|
|
f"average: £{average_bill_savings_second_scenario}\n"
|
|
f"Total CO2 reduction: {min_co2_reduction_second_scenario}-{max_co2_reduction_second_scenario} tonnes, "
|
|
f"average: "
|
|
f"{average_co2_reduction_second_scenario}\n"
|
|
f"tonnes, total for the {n_units_hitting_second_scenario} properties: {total_co2_reduction_second_scenario} "
|
|
f"tonnes\n"
|
|
f"Even in the second scenario, the remaining {units_missing_second_scenario.shape[0]} properties are expected "
|
|
f"to miss EPC {EPC_TARGET} by {min_difference_second_scenario} point on average - they should be visited by "
|
|
f"an assessor"
|
|
)
|
|
|
|
slide_data = {
|
|
'slide_1': {
|
|
"title": "EPC Rating Distribution",
|
|
'image_path': figure_path, # Pass the path to the saved image
|
|
"text": slide_1_commentary
|
|
},
|
|
"slide_2": {
|
|
"title": f"Properties that achieve EPC {EPC_TARGET}",
|
|
"text": slide_2_commentary,
|
|
},
|
|
"slide 3": {
|
|
"title": f"Properties that miss EPC {EPC_TARGET}",
|
|
"text": slide_3_text
|
|
}
|
|
}
|
|
|
|
save_location = f"etl/customers/{CUSTOMER_KEY}/{CUSTOMER_KEY}_tech_slides.pptx"
|
|
create_powerpoint(slide_data, save_location)
|