diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 83c20bb4..dd5322e3 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -24,6 +24,29 @@ class PropertyValuation: 100120703802: 277000, # Based on Zoopla 10014469685: 286000, # Based on Zoopla 10001328782: 196000, # Based on Zoopla + # Urban Splash - valuations from The Move Market + 10023345430: 74_000, + 10023345435: 99_000, + 10023345436: 62_000, + 10023345441: 62_000, + 10094183503: 2_988_000, + 10094183499: 123_000, + 10070056824: 70_000, + 110070056242: 100_000, + 10070056243: 130_000, + 10070056817: 130_000, + 10094183501: 185_000, + 10070056250: 71_000, + 10094183500: 185_000, + 10070056843: 67_000, + 10070056844: 67_000, + 10070056241: 76_000, + 10070056834: 63_000, + 10023345439: 62_000, + 10070056815: 101_000, + 10070056816: 101_000, + 10094183498: 101_000, + 10070056840: 673_000, } # We base our valuation uplifts on a number of sources diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index d41f4f3b..1a2a894e 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -1,5 +1,9 @@ +import os +from pptx import Presentation +from pptx.util import Inches, Pt import matplotlib.pyplot as plt from sqlalchemy.orm import Session +from sqlalchemy.sql import true from backend.app.db.utils import row2dict from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Recommendation @@ -18,28 +22,30 @@ EPC_COLOURS = { def get_properties_with_default_recommendations(session: Session, portfolio_id: int): """ Fetch properties for a given portfolio_id along with their default recommendations, - minimizing database queries. + ensuring that all properties are retrieved even if they don't have recommendations + where default is True. :param session: The SQLAlchemy session used to execute the query. :param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations. :return: A list of dictionaries, where each dictionary represents a property including - its associated default recommendations. + its associated default recommendations if any. """ - # Perform a query that joins PropertyModel and Recommendation, filtering by portfolio_id and default=True - query = session.query(PropertyModel, Recommendation).join(Recommendation, isouter=True) \ - .filter(PropertyModel.portfolio_id == portfolio_id, Recommendation.default == True) \ + # Adjust the join to correctly filter recommendations while including all properties + query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation, + (Recommendation.property_id == PropertyModel.id) & ( + Recommendation.default == true())) \ + .filter(PropertyModel.portfolio_id == portfolio_id) \ .all() - # Process the query results properties = {} for property, recommendation in query: - # Check if the property is already added to the dictionary + # Ensure the property is added once with an empty list of recommendations initially if property.id not in properties: properties[property.id] = row2dict(property) properties[property.id]['recommendations'] = [] - # Add recommendation to the property if it exists - if recommendation: + # Append recommendations if they exist and meet the criteria (already filtered by the query) + if recommendation and recommendation.default: properties[property.id]['recommendations'].append(row2dict(recommendation)) return list(properties.values()) @@ -63,7 +69,7 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int): return property_details_dict -def plot_epc_distribution(df, title='Your Units', background_color='white', bar_height=0.4, font_size=15): +def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15): """ Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes. Allows setting the plot background color and dynamically adjusts text size and bar spacing. @@ -117,3 +123,67 @@ def plot_epc_distribution(df, title='Your Units', background_color='white', bar_ plt.tight_layout() # Adjust layout plt.show() + + # Save the figure as an image + figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png' + fig.savefig(figure_path, bbox_inches='tight') + plt.close(fig) # Close the figure to free memory + + return fig, figure_path + + +def save_plot_to_image(figure, path='plot.png'): + """ + Saves a matplotlib figure to an image file for insertion into PowerPoint. + """ + figure.savefig(path, bbox_inches='tight') + plt.close(figure) + + +def save_figure_as_image(figure, filename='temp_plot.png'): + """ + Saves a matplotlib figure to an image file. + """ + figure.savefig(filename, dpi=300) + plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments + + +def add_slide_with_image(prs, title, img_path=None, commentary=None): + """ + Adds a slide with an image and optional commentary. + """ + slide_layout = prs.slide_layouts[5] # Title and Content layout + slide = prs.slides.add_slide(slide_layout) + title_placeholder = slide.shapes.title + title_placeholder.text = title + + # Add the image + if img_path: + slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)) + + # Add commentary if provided + if commentary: + txBox = slide.shapes.add_textbox(Inches(1), Inches(6), Inches(8), Inches(1)) + tf = txBox.text_frame + p = tf.add_paragraph() + p.text = commentary + p.font.size = Pt(14) # Adjust font size as needed + + +def create_powerpoint(data, save_location): + """ + Creates a PowerPoint presentation based on provided data and optional commentaries. + + :param data: A dictionary containing the data needed for each slide. + :param save_location: The file path where the PowerPoint presentation will be saved. + """ + prs = Presentation() + + for slide, slide_data in data.items(): + slide_figure_path = data[slide].get('image_path') + text = data[slide].get('text') + title = data[slide].get('title', "") + add_slide_with_image(prs, title, slide_figure_path, text) + + # Save the presentation + prs.save(save_location) diff --git a/etl/customers/urban_splash/slides.py b/etl/customers/urban_splash/slides.py index f036fa5a..616939b2 100644 --- a/etl/customers/urban_splash/slides.py +++ b/etl/customers/urban_splash/slides.py @@ -4,15 +4,21 @@ We connect to the database amd extract the data for the portfolio needed so it i a environment akin to the backend to run this script """ import pandas as pd +import numpy as np from backend.app.db.connection import db_engine +from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from etl.customers.slide_utils import ( plot_epc_distribution, get_property_details_by_portfolio_id, - get_properties_with_default_recommendations + get_properties_with_default_recommendations, + create_powerpoint ) PORTFOLIO_ID = 66 +EPC_TARGET = "C" +SAP_TARGET = 69 +CUSTOMER_KEY = "urban_splash" def app(): @@ -25,18 +31,45 @@ def app(): # Get the properties for the portfolio properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID) - - # The first visual we want to produce is a horizontal bar chart showing the number of properties at each current - # EPC band - properties_df = pd.DataFrame(properties) - epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count") - epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100 # We now pull the data for the property details property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID) property_details_df = pd.DataFrame(property_details) + # Unnest the recommendations. Each recommendation is a list of dictionaries + recommendations_exploded = properties_df["recommendations"].explode().tolist() + recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)]) + # Aggregate the impact of the recommendations + # We want: + # Total number of sap points + # total valuation impact + # total bill savings + # total cost + # Total Co2 impact + recommendations_summary = recommendations_df.groupby(["property_id"]).agg( + total_sap_points=("sap_points", "sum"), + total_valuation_impact=("property_valuation_increase", "sum"), + total_bill_savings=("energy_cost_savings", "sum"), + total_cost=("estimated_cost", "sum"), + total_carbon=("co2_equivalent_savings", "sum") + ).reset_index() + # Merge on current sap points + recommendations_summary = recommendations_summary.merge( + properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", + how="left" + ) + recommendations_summary["expected_sap_points"] = ( + recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] + ) + recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply( + lambda x: sap_to_epc(x) + ) + recommendations_summary["sap_difference"] = SAP_TARGET - recommendations_summary["expected_sap_points"] + + epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count") + epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100 + ######################################################################## # We pull out the data for the slides ######################################################################## @@ -45,39 +78,130 @@ def app(): # Slide 1: ############ # visual - plot_epc_distribution(epc_rating_summary, title="", background_color="white", bar_height=0.75, font_size=15) + epc_plot, figure_path = plot_epc_distribution( + epc_rating_summary, CUSTOMER_KEY, title="", background_color="white", bar_height=0.75, font_size=15 + ) # floor area - upper and lower bounds min_area, max_area, average_area = ( property_details_df["total_floor_area"].min(), property_details_df["total_floor_area"].max(), - property_details_df["total_floor_area"].median() + property_details_df["total_floor_area"].mean() ) # Annual energy consumption - upper and lower bounds min_energy_consumption, max_energy_consumption, average_consumption = ( property_details_df["adjusted_energy_consumption"].min(), property_details_df["adjusted_energy_consumption"].max(), - property_details_df["adjusted_energy_consumption"].median() + property_details_df["adjusted_energy_consumption"].mean() ) # Co2 emissions - upper and lower bounds min_co2, max_co2, average_co2 = ( property_details_df["co2_emissions"].min(), property_details_df["co2_emissions"].max(), - property_details_df["co2_emissions"].median() + property_details_df["co2_emissions"].mean() ) # Valuation: upper and lower bounds - TODO! min_valuation, max_valuation, average_valuation = 0, 0, 0 + slide_1_commentary = ( + f"Floor areas range from {min_area} to {max_area} square meters, with an average of {average_area} square " + f"meters. " + f"Annual energy consumption ranges from {min_energy_consumption} to {max_energy_consumption} kWh, with an " + f"average of {average_consumption} kWh. " + f"CO2 emissions range from {min_co2} to {max_co2} tonnes, with an average of {average_co2} tonnes. " + f"Valuations range from £{min_valuation} to £{max_valuation} £, with an average of £" + f"{average_valuation}." + ) + ############ # Slide 2: ############ # What it would take to hit EPC C - # This is the number of properties that are below a C - n_units = properties_df[properties_df["current_epc_rating"].isin(["D", "E", "F", "G"])].shape[0] - # We calculate the number of units that will make it to an EPC C - n_units_to_epc_c = () + + units_hitting_target = recommendations_summary[ + recommendations_summary["expected_epc_rating"] == EPC_TARGET + ] + + n_units_to_target = units_hitting_target.shape[0] + + measures = "Electrical heating system upgrades & heating controls and Hot water system improvements" + + # Per property + min_valuation_impact, max_valuation_impact, average_valuation_impact = ( + units_hitting_target["total_valuation_impact"].min(), + units_hitting_target["total_valuation_impact"].max(), + units_hitting_target["total_valuation_impact"].mean() + ) + + # Bill savings per property + min_bill_savings, max_bill_savings, average_bill_savings = ( + units_hitting_target["total_bill_savings"].min(), + units_hitting_target["total_bill_savings"].max(), + units_hitting_target["total_bill_savings"].mean() + ) + + # Total CO2 reduction of portfolio + min_co2_reduction, max_co2_reduction, average_co2_reduction, total_co2_reduction = ( + units_hitting_target["total_carbon"].min(), + units_hitting_target["total_carbon"].max(), + units_hitting_target["total_carbon"].mean(), + units_hitting_target["total_carbon"].sum() + ) + + slide_2_commentary = ( + f"{n_units_to_target} expected to achieve EPC {EPC_TARGET} " + f"Measures include: {measures}" + f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £" + f"{average_valuation_impact}" + f"Bill savings per property: £{min_bill_savings}-{max_bill_savings}, average: £{average_bill_savings}" + f"Total CO2 reduction: {min_co2_reduction}-{max_co2_reduction} tonnes, average: {average_co2_reduction}" + f"tonnes, total for the {n_units_to_target} properties: {total_co2_reduction} tonnes" + ) + + ############ + # Slide 3: + ############ + + units_missed_target = recommendations_summary[ + recommendations_summary["expected_epc_rating"] != EPC_TARGET + ] + + n_units_missed_target = units_missed_target.shape[0] + + # How close were the properties that missed the target + # We calculate the difference between the expected sap points and the lower bound sap points for the target + + min_difference, max_difference, average_difference = ( + np.ceil(units_missed_target["sap_difference"].min()), + np.ceil(units_missed_target["sap_difference"].max()), + np.ceil(units_missed_target["sap_difference"].mean()) + ) + + slide_3_text = ( + "Many of the properties upgrade considerably and may be able to achieve an EPC C with further measures, " + "however we could need a survey to confirm the heating and hot water systems to identify further " + "potential measures.") + + slide_data = { + 'slide_1': { + "title": "EPC Rating Distribution", + 'image_path': figure_path, # Pass the path to the saved image + "text": slide_1_commentary + }, + "slide_2": { + "title": f"Properties that achieve EPC {EPC_TARGET}", + "text": slide_2_commentary, + }, + "slide 3": { + "title": f"Properties that miss EPC {EPC_TARGET}", + "text": slide_3_text + } + } + + save_location = f"etl/customers/{CUSTOMER_KEY}/powerpoint.pptx" + create_powerpoint(slide_data, save_location)