working on slides code and added valuations for urban splash

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-21 16:47:14 +00:00
parent 0b8da8d8be
commit c0b1acef98
3 changed files with 242 additions and 25 deletions

View file

@ -24,6 +24,29 @@ class PropertyValuation:
100120703802: 277000, # Based on Zoopla
10014469685: 286000, # Based on Zoopla
10001328782: 196000, # Based on Zoopla
# Urban Splash - valuations from The Move Market
10023345430: 74_000,
10023345435: 99_000,
10023345436: 62_000,
10023345441: 62_000,
10094183503: 2_988_000,
10094183499: 123_000,
10070056824: 70_000,
110070056242: 100_000,
10070056243: 130_000,
10070056817: 130_000,
10094183501: 185_000,
10070056250: 71_000,
10094183500: 185_000,
10070056843: 67_000,
10070056844: 67_000,
10070056241: 76_000,
10070056834: 63_000,
10023345439: 62_000,
10070056815: 101_000,
10070056816: 101_000,
10094183498: 101_000,
10070056840: 673_000,
}
# We base our valuation uplifts on a number of sources

View file

@ -1,5 +1,9 @@
import os
from pptx import Presentation
from pptx.util import Inches, Pt
import matplotlib.pyplot as plt
from sqlalchemy.orm import Session
from sqlalchemy.sql import true
from backend.app.db.utils import row2dict
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation
@ -18,28 +22,30 @@ EPC_COLOURS = {
def get_properties_with_default_recommendations(session: Session, portfolio_id: int):
"""
Fetch properties for a given portfolio_id along with their default recommendations,
minimizing database queries.
ensuring that all properties are retrieved even if they don't have recommendations
where default is True.
:param session: The SQLAlchemy session used to execute the query.
:param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations.
:return: A list of dictionaries, where each dictionary represents a property including
its associated default recommendations.
its associated default recommendations if any.
"""
# Perform a query that joins PropertyModel and Recommendation, filtering by portfolio_id and default=True
query = session.query(PropertyModel, Recommendation).join(Recommendation, isouter=True) \
.filter(PropertyModel.portfolio_id == portfolio_id, Recommendation.default == True) \
# Adjust the join to correctly filter recommendations while including all properties
query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
(Recommendation.property_id == PropertyModel.id) & (
Recommendation.default == true())) \
.filter(PropertyModel.portfolio_id == portfolio_id) \
.all()
# Process the query results
properties = {}
for property, recommendation in query:
# Check if the property is already added to the dictionary
# Ensure the property is added once with an empty list of recommendations initially
if property.id not in properties:
properties[property.id] = row2dict(property)
properties[property.id]['recommendations'] = []
# Add recommendation to the property if it exists
if recommendation:
# Append recommendations if they exist and meet the criteria (already filtered by the query)
if recommendation and recommendation.default:
properties[property.id]['recommendations'].append(row2dict(recommendation))
return list(properties.values())
@ -63,7 +69,7 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
return property_details_dict
def plot_epc_distribution(df, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
"""
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
@ -117,3 +123,67 @@ def plot_epc_distribution(df, title='Your Units', background_color='white', bar_
plt.tight_layout() # Adjust layout
plt.show()
# Save the figure as an image
figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
fig.savefig(figure_path, bbox_inches='tight')
plt.close(fig) # Close the figure to free memory
return fig, figure_path
def save_plot_to_image(figure, path='plot.png'):
"""
Saves a matplotlib figure to an image file for insertion into PowerPoint.
"""
figure.savefig(path, bbox_inches='tight')
plt.close(figure)
def save_figure_as_image(figure, filename='temp_plot.png'):
"""
Saves a matplotlib figure to an image file.
"""
figure.savefig(filename, dpi=300)
plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments
def add_slide_with_image(prs, title, img_path=None, commentary=None):
"""
Adds a slide with an image and optional commentary.
"""
slide_layout = prs.slide_layouts[5] # Title and Content layout
slide = prs.slides.add_slide(slide_layout)
title_placeholder = slide.shapes.title
title_placeholder.text = title
# Add the image
if img_path:
slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
# Add commentary if provided
if commentary:
txBox = slide.shapes.add_textbox(Inches(1), Inches(6), Inches(8), Inches(1))
tf = txBox.text_frame
p = tf.add_paragraph()
p.text = commentary
p.font.size = Pt(14) # Adjust font size as needed
def create_powerpoint(data, save_location):
"""
Creates a PowerPoint presentation based on provided data and optional commentaries.
:param data: A dictionary containing the data needed for each slide.
:param save_location: The file path where the PowerPoint presentation will be saved.
"""
prs = Presentation()
for slide, slide_data in data.items():
slide_figure_path = data[slide].get('image_path')
text = data[slide].get('text')
title = data[slide].get('title', "")
add_slide_with_image(prs, title, slide_figure_path, text)
# Save the presentation
prs.save(save_location)

View file

@ -4,15 +4,21 @@ We connect to the database amd extract the data for the portfolio needed so it i
a environment akin to the backend to run this script
"""
import pandas as pd
import numpy as np
from backend.app.db.connection import db_engine
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from etl.customers.slide_utils import (
plot_epc_distribution,
get_property_details_by_portfolio_id,
get_properties_with_default_recommendations
get_properties_with_default_recommendations,
create_powerpoint
)
PORTFOLIO_ID = 66
EPC_TARGET = "C"
SAP_TARGET = 69
CUSTOMER_KEY = "urban_splash"
def app():
@ -25,18 +31,45 @@ def app():
# Get the properties for the portfolio
properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)
# The first visual we want to produce is a horizontal bar chart showing the number of properties at each current
# EPC band
properties_df = pd.DataFrame(properties)
epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count")
epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
# We now pull the data for the property details
property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
property_details_df = pd.DataFrame(property_details)
# Unnest the recommendations. Each recommendation is a list of dictionaries
recommendations_exploded = properties_df["recommendations"].explode().tolist()
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
# Aggregate the impact of the recommendations
# We want:
# Total number of sap points
# total valuation impact
# total bill savings
# total cost
# Total Co2 impact
recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum")
).reset_index()
# Merge on current sap points
recommendations_summary = recommendations_summary.merge(
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
how="left"
)
recommendations_summary["expected_sap_points"] = (
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
)
recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
lambda x: sap_to_epc(x)
)
recommendations_summary["sap_difference"] = SAP_TARGET - recommendations_summary["expected_sap_points"]
epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count")
epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
########################################################################
# We pull out the data for the slides
########################################################################
@ -45,39 +78,130 @@ def app():
# Slide 1:
############
# visual
plot_epc_distribution(epc_rating_summary, title="", background_color="white", bar_height=0.75, font_size=15)
epc_plot, figure_path = plot_epc_distribution(
epc_rating_summary, CUSTOMER_KEY, title="", background_color="white", bar_height=0.75, font_size=15
)
# floor area - upper and lower bounds
min_area, max_area, average_area = (
property_details_df["total_floor_area"].min(),
property_details_df["total_floor_area"].max(),
property_details_df["total_floor_area"].median()
property_details_df["total_floor_area"].mean()
)
# Annual energy consumption - upper and lower bounds
min_energy_consumption, max_energy_consumption, average_consumption = (
property_details_df["adjusted_energy_consumption"].min(),
property_details_df["adjusted_energy_consumption"].max(),
property_details_df["adjusted_energy_consumption"].median()
property_details_df["adjusted_energy_consumption"].mean()
)
# Co2 emissions - upper and lower bounds
min_co2, max_co2, average_co2 = (
property_details_df["co2_emissions"].min(),
property_details_df["co2_emissions"].max(),
property_details_df["co2_emissions"].median()
property_details_df["co2_emissions"].mean()
)
# Valuation: upper and lower bounds - TODO!
min_valuation, max_valuation, average_valuation = 0, 0, 0
slide_1_commentary = (
f"Floor areas range from {min_area} to {max_area} square meters, with an average of {average_area} square "
f"meters. "
f"Annual energy consumption ranges from {min_energy_consumption} to {max_energy_consumption} kWh, with an "
f"average of {average_consumption} kWh. "
f"CO2 emissions range from {min_co2} to {max_co2} tonnes, with an average of {average_co2} tonnes. "
f"Valuations range from £{min_valuation} to £{max_valuation} £, with an average of £"
f"{average_valuation}."
)
############
# Slide 2:
############
# What it would take to hit EPC C
# This is the number of properties that are below a C
n_units = properties_df[properties_df["current_epc_rating"].isin(["D", "E", "F", "G"])].shape[0]
# We calculate the number of units that will make it to an EPC C
n_units_to_epc_c = ()
units_hitting_target = recommendations_summary[
recommendations_summary["expected_epc_rating"] == EPC_TARGET
]
n_units_to_target = units_hitting_target.shape[0]
measures = "Electrical heating system upgrades & heating controls and Hot water system improvements"
# Per property
min_valuation_impact, max_valuation_impact, average_valuation_impact = (
units_hitting_target["total_valuation_impact"].min(),
units_hitting_target["total_valuation_impact"].max(),
units_hitting_target["total_valuation_impact"].mean()
)
# Bill savings per property
min_bill_savings, max_bill_savings, average_bill_savings = (
units_hitting_target["total_bill_savings"].min(),
units_hitting_target["total_bill_savings"].max(),
units_hitting_target["total_bill_savings"].mean()
)
# Total CO2 reduction of portfolio
min_co2_reduction, max_co2_reduction, average_co2_reduction, total_co2_reduction = (
units_hitting_target["total_carbon"].min(),
units_hitting_target["total_carbon"].max(),
units_hitting_target["total_carbon"].mean(),
units_hitting_target["total_carbon"].sum()
)
slide_2_commentary = (
f"{n_units_to_target} expected to achieve EPC {EPC_TARGET} "
f"Measures include: {measures}"
f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £"
f"{average_valuation_impact}"
f"Bill savings per property: £{min_bill_savings}-{max_bill_savings}, average: £{average_bill_savings}"
f"Total CO2 reduction: {min_co2_reduction}-{max_co2_reduction} tonnes, average: {average_co2_reduction}"
f"tonnes, total for the {n_units_to_target} properties: {total_co2_reduction} tonnes"
)
############
# Slide 3:
############
units_missed_target = recommendations_summary[
recommendations_summary["expected_epc_rating"] != EPC_TARGET
]
n_units_missed_target = units_missed_target.shape[0]
# How close were the properties that missed the target
# We calculate the difference between the expected sap points and the lower bound sap points for the target
min_difference, max_difference, average_difference = (
np.ceil(units_missed_target["sap_difference"].min()),
np.ceil(units_missed_target["sap_difference"].max()),
np.ceil(units_missed_target["sap_difference"].mean())
)
slide_3_text = (
"Many of the properties upgrade considerably and may be able to achieve an EPC C with further measures, "
"however we could need a survey to confirm the heating and hot water systems to identify further "
"potential measures.")
slide_data = {
'slide_1': {
"title": "EPC Rating Distribution",
'image_path': figure_path, # Pass the path to the saved image
"text": slide_1_commentary
},
"slide_2": {
"title": f"Properties that achieve EPC {EPC_TARGET}",
"text": slide_2_commentary,
},
"slide 3": {
"title": f"Properties that miss EPC {EPC_TARGET}",
"text": slide_3_text
}
}
save_location = f"etl/customers/{CUSTOMER_KEY}/powerpoint.pptx"
create_powerpoint(slide_data, save_location)