working on slides code and added valuations for urban splash

2026-07-27 23:35:01 +00:00 · 2024-02-21 16:47:14 +00:00 · 2024-02-21 16:47:14 +00:00 · c0b1acef98
commit c0b1acef98
parent 0b8da8d8be
3 changed files with 242 additions and 25 deletions
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@ -24,6 +24,29 @@ class PropertyValuation:
        100120703802: 277000,  # Based on Zoopla
        10014469685: 286000,  # Based on Zoopla
        10001328782: 196000,  # Based on Zoopla
+        # Urban Splash - valuations from The Move Market
+        10023345430: 74_000,
+        10023345435: 99_000,
+        10023345436: 62_000,
+        10023345441: 62_000,
+        10094183503: 2_988_000,
+        10094183499: 123_000,
+        10070056824: 70_000,
+        110070056242: 100_000,
+        10070056243: 130_000,
+        10070056817: 130_000,
+        10094183501: 185_000,
+        10070056250: 71_000,
+        10094183500: 185_000,
+        10070056843: 67_000,
+        10070056844: 67_000,
+        10070056241: 76_000,
+        10070056834: 63_000,
+        10023345439: 62_000,
+        10070056815: 101_000,
+        10070056816: 101_000,
+        10094183498: 101_000,
+        10070056840: 673_000,
    }

    # We base our valuation uplifts on a number of sources
--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@ -1,5 +1,9 @@
+import os
+from pptx import Presentation
+from pptx.util import Inches, Pt
 import matplotlib.pyplot as plt
 from sqlalchemy.orm import Session
+from sqlalchemy.sql import true
 from backend.app.db.utils import row2dict
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from backend.app.db.models.recommendations import Recommendation
@ -18,28 +22,30 @@ EPC_COLOURS = {
 def get_properties_with_default_recommendations(session: Session, portfolio_id: int):
    """
    Fetch properties for a given portfolio_id along with their default recommendations,
-    minimizing database queries.
+    ensuring that all properties are retrieved even if they don't have recommendations
+    where default is True.

    :param session: The SQLAlchemy session used to execute the query.
    :param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations.
    :return: A list of dictionaries, where each dictionary represents a property including
-             its associated default recommendations.
+             its associated default recommendations if any.
    """
-    # Perform a query that joins PropertyModel and Recommendation, filtering by portfolio_id and default=True
-    query = session.query(PropertyModel, Recommendation).join(Recommendation, isouter=True) \
-        .filter(PropertyModel.portfolio_id == portfolio_id, Recommendation.default == True) \
+    # Adjust the join to correctly filter recommendations while including all properties
+    query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
+                                                                   (Recommendation.property_id == PropertyModel.id) & (
+                                                                       Recommendation.default == true())) \
+        .filter(PropertyModel.portfolio_id == portfolio_id) \
        .all()

-    # Process the query results
    properties = {}
    for property, recommendation in query:
-        # Check if the property is already added to the dictionary
+        # Ensure the property is added once with an empty list of recommendations initially
        if property.id not in properties:
            properties[property.id] = row2dict(property)
            properties[property.id]['recommendations'] = []

-        # Add recommendation to the property if it exists
-        if recommendation:
+        # Append recommendations if they exist and meet the criteria (already filtered by the query)
+        if recommendation and recommendation.default:
            properties[property.id]['recommendations'].append(row2dict(recommendation))

    return list(properties.values())
@ -63,7 +69,7 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
    return property_details_dict


-def plot_epc_distribution(df, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
+def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
    """
    Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
    Allows setting the plot background color and dynamically adjusts text size and bar spacing.
@ -117,3 +123,67 @@ def plot_epc_distribution(df, title='Your Units', background_color='white', bar_

    plt.tight_layout()  # Adjust layout
    plt.show()
+
+    # Save the figure as an image
+    figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
+    fig.savefig(figure_path, bbox_inches='tight')
+    plt.close(fig)  # Close the figure to free memory
+
+    return fig, figure_path
+
+
+def save_plot_to_image(figure, path='plot.png'):
+    """
+    Saves a matplotlib figure to an image file for insertion into PowerPoint.
+    """
+    figure.savefig(path, bbox_inches='tight')
+    plt.close(figure)
+
+
+def save_figure_as_image(figure, filename='temp_plot.png'):
+    """
+    Saves a matplotlib figure to an image file.
+    """
+    figure.savefig(filename, dpi=300)
+    plt.close(figure)  # Close the figure to prevent it from displaying in notebooks or Python environments
+
+
+def add_slide_with_image(prs, title, img_path=None, commentary=None):
+    """
+    Adds a slide with an image and optional commentary.
+    """
+    slide_layout = prs.slide_layouts[5]  # Title and Content layout
+    slide = prs.slides.add_slide(slide_layout)
+    title_placeholder = slide.shapes.title
+    title_placeholder.text = title
+
+    # Add the image
+    if img_path:
+        slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
+
+    # Add commentary if provided
+    if commentary:
+        txBox = slide.shapes.add_textbox(Inches(1), Inches(6), Inches(8), Inches(1))
+        tf = txBox.text_frame
+        p = tf.add_paragraph()
+        p.text = commentary
+        p.font.size = Pt(14)  # Adjust font size as needed
+
+
+def create_powerpoint(data, save_location):
+    """
+    Creates a PowerPoint presentation based on provided data and optional commentaries.
+
+    :param data: A dictionary containing the data needed for each slide.
+    :param save_location: The file path where the PowerPoint presentation will be saved.
+    """
+    prs = Presentation()
+
+    for slide, slide_data in data.items():
+        slide_figure_path = data[slide].get('image_path')
+        text = data[slide].get('text')
+        title = data[slide].get('title', "")
+        add_slide_with_image(prs, title, slide_figure_path, text)
+
+    # Save the presentation
+    prs.save(save_location)
--- a/etl/customers/urban_splash/slides.py
+++ b/etl/customers/urban_splash/slides.py
@ -4,15 +4,21 @@ We connect to the database amd extract the data for the portfolio needed so it i
 a environment akin to the backend to run this script
 """
 import pandas as pd
+import numpy as np
 from backend.app.db.connection import db_engine
+from backend.app.utils import sap_to_epc
 from sqlalchemy.orm import sessionmaker
 from etl.customers.slide_utils import (
    plot_epc_distribution,
    get_property_details_by_portfolio_id,
-    get_properties_with_default_recommendations
+    get_properties_with_default_recommendations,
+    create_powerpoint
 )

 PORTFOLIO_ID = 66
+EPC_TARGET = "C"
+SAP_TARGET = 69
+CUSTOMER_KEY = "urban_splash"


 def app():
@ -25,18 +31,45 @@ def app():

    # Get the properties for the portfolio
    properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)
-
-    # The first visual we want to produce is a horizontal bar chart showing the number of properties at each current
-    # EPC band
-
    properties_df = pd.DataFrame(properties)
-    epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count")
-    epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100

    # We now pull the data for the property details
    property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
    property_details_df = pd.DataFrame(property_details)

+    # Unnest the recommendations. Each recommendation is a list of dictionaries
+    recommendations_exploded = properties_df["recommendations"].explode().tolist()
+    recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+    # Aggregate the impact of the recommendations
+    # We want:
+    # Total number of sap points
+    # total valuation impact
+    # total bill savings
+    # total cost
+    # Total Co2 impact
+    recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
+        total_sap_points=("sap_points", "sum"),
+        total_valuation_impact=("property_valuation_increase", "sum"),
+        total_bill_savings=("energy_cost_savings", "sum"),
+        total_cost=("estimated_cost", "sum"),
+        total_carbon=("co2_equivalent_savings", "sum")
+    ).reset_index()
+    # Merge on current sap points
+    recommendations_summary = recommendations_summary.merge(
+        properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
+        how="left"
+    )
+    recommendations_summary["expected_sap_points"] = (
+        recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
+    )
+    recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
+        lambda x: sap_to_epc(x)
+    )
+    recommendations_summary["sap_difference"] = SAP_TARGET - recommendations_summary["expected_sap_points"]
+
+    epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count")
+    epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
+
    ########################################################################
    # We pull out the data for the slides
    ########################################################################
@ -45,39 +78,130 @@ def app():
    # Slide 1:
    ############
    # visual
-    plot_epc_distribution(epc_rating_summary, title="", background_color="white", bar_height=0.75, font_size=15)
+    epc_plot, figure_path = plot_epc_distribution(
+        epc_rating_summary, CUSTOMER_KEY, title="", background_color="white", bar_height=0.75, font_size=15
+    )

    # floor area - upper and lower bounds
    min_area, max_area, average_area = (
        property_details_df["total_floor_area"].min(),
        property_details_df["total_floor_area"].max(),
-        property_details_df["total_floor_area"].median()
+        property_details_df["total_floor_area"].mean()
    )

    # Annual energy consumption - upper and lower bounds
    min_energy_consumption, max_energy_consumption, average_consumption = (
        property_details_df["adjusted_energy_consumption"].min(),
        property_details_df["adjusted_energy_consumption"].max(),
-        property_details_df["adjusted_energy_consumption"].median()
+        property_details_df["adjusted_energy_consumption"].mean()
    )

    # Co2 emissions - upper and lower bounds
    min_co2, max_co2, average_co2 = (
        property_details_df["co2_emissions"].min(),
        property_details_df["co2_emissions"].max(),
-        property_details_df["co2_emissions"].median()
+        property_details_df["co2_emissions"].mean()
    )

    # Valuation: upper and lower bounds - TODO!
    min_valuation, max_valuation, average_valuation = 0, 0, 0

+    slide_1_commentary = (
+        f"Floor areas range from {min_area} to {max_area} square meters, with an average of {average_area} square "
+        f"meters. "
+        f"Annual energy consumption ranges from {min_energy_consumption} to {max_energy_consumption} kWh, with an "
+        f"average of {average_consumption} kWh. "
+        f"CO2 emissions range from {min_co2} to {max_co2} tonnes, with an average of {average_co2} tonnes. "
+        f"Valuations range from £{min_valuation} to £{max_valuation} £, with an average of £"
+        f"{average_valuation}."
+    )
+
    ############
    # Slide 2:
    ############
    # What it would take to hit EPC C

-    # This is the number of properties that are below a C
-    n_units = properties_df[properties_df["current_epc_rating"].isin(["D", "E", "F", "G"])].shape[0]
-
    # We calculate the number of units that will make it to an EPC C
-    n_units_to_epc_c = ()
+
+    units_hitting_target = recommendations_summary[
+        recommendations_summary["expected_epc_rating"] == EPC_TARGET
+        ]
+
+    n_units_to_target = units_hitting_target.shape[0]
+
+    measures = "Electrical heating system upgrades & heating controls and Hot water system improvements"
+
+    # Per property
+    min_valuation_impact, max_valuation_impact, average_valuation_impact = (
+        units_hitting_target["total_valuation_impact"].min(),
+        units_hitting_target["total_valuation_impact"].max(),
+        units_hitting_target["total_valuation_impact"].mean()
+    )
+
+    # Bill savings per property
+    min_bill_savings, max_bill_savings, average_bill_savings = (
+        units_hitting_target["total_bill_savings"].min(),
+        units_hitting_target["total_bill_savings"].max(),
+        units_hitting_target["total_bill_savings"].mean()
+    )
+
+    # Total CO2 reduction of portfolio
+    min_co2_reduction, max_co2_reduction, average_co2_reduction, total_co2_reduction = (
+        units_hitting_target["total_carbon"].min(),
+        units_hitting_target["total_carbon"].max(),
+        units_hitting_target["total_carbon"].mean(),
+        units_hitting_target["total_carbon"].sum()
+    )
+
+    slide_2_commentary = (
+        f"{n_units_to_target} expected to achieve EPC {EPC_TARGET} "
+        f"Measures include: {measures}"
+        f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £"
+        f"{average_valuation_impact}"
+        f"Bill savings per property: £{min_bill_savings}-{max_bill_savings}, average: £{average_bill_savings}"
+        f"Total CO2 reduction: {min_co2_reduction}-{max_co2_reduction} tonnes, average: {average_co2_reduction}"
+        f"tonnes, total for the {n_units_to_target} properties: {total_co2_reduction} tonnes"
+    )
+
+    ############
+    # Slide 3:
+    ############
+
+    units_missed_target = recommendations_summary[
+        recommendations_summary["expected_epc_rating"] != EPC_TARGET
+        ]
+
+    n_units_missed_target = units_missed_target.shape[0]
+
+    # How close were the properties that missed the target
+    # We calculate the difference between the expected sap points and the lower bound sap points for the target
+
+    min_difference, max_difference, average_difference = (
+        np.ceil(units_missed_target["sap_difference"].min()),
+        np.ceil(units_missed_target["sap_difference"].max()),
+        np.ceil(units_missed_target["sap_difference"].mean())
+    )
+
+    slide_3_text = (
+        "Many of the properties upgrade considerably and may be able to achieve an EPC C with further measures, "
+        "however we could need a survey to confirm the heating and hot water systems to identify further "
+        "potential measures.")
+
+    slide_data = {
+        'slide_1': {
+            "title": "EPC Rating Distribution",
+            'image_path': figure_path,  # Pass the path to the saved image
+            "text": slide_1_commentary
+        },
+        "slide_2": {
+            "title": f"Properties that achieve EPC {EPC_TARGET}",
+            "text": slide_2_commentary,
+        },
+        "slide 3": {
+            "title": f"Properties that miss EPC {EPC_TARGET}",
+            "text": slide_3_text
+        }
+    }
+
+    save_location = f"etl/customers/{CUSTOMER_KEY}/powerpoint.pptx"
+    create_powerpoint(slide_data, save_location)