slide script wip

2026-07-27 23:35:01 +00:00 · 2024-02-21 13:55:28 +00:00 · 2024-02-21 13:55:28 +00:00 · 0b8da8d8be
commit 0b8da8d8be
parent 80a542f02e
2 changed files with 149 additions and 38 deletions
--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@ -1,65 +1,119 @@
 import matplotlib.pyplot as plt
 from sqlalchemy.orm import Session
 from backend.app.db.utils import row2dict
-from backend.app.db.models.portfolio import PropertyModel
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+from backend.app.db.models.recommendations import Recommendation

 EPC_COLOURS = {
-    "A": "#008054",
-    "B": "#1ab559",
-    "C": "#8ccf45",
-    "D": "#ffd600",
-    "E": "#fcab66",
-    "F": "#f08024",
-    "G": "#e8143b"
+    "A": "#028051",
+    "B": "#14b759",
+    "C": "#8ecd46",
+    "D": "#fdd401",
+    "E": "#fdab67",
+    "F": "#ee8023",
+    "G": "#e71437"
 }


-def get_properties_by_portfolio_id(session: Session, portfolio_id: int):
+def get_properties_with_default_recommendations(session: Session, portfolio_id: int):
    """
-    This function retrieves all properties associated with a given portfolio_id.
+    Fetch properties for a given portfolio_id along with their default recommendations,
+    minimizing database queries.

    :param session: The SQLAlchemy session used to execute the query.
-    :param portfolio_id: The ID of the portfolio for which to retrieve properties.
-    :return: A list of dictionaries, where each dictionary represents a property.
-             Returns an empty list if no properties are found.
+    :param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations.
+    :return: A list of dictionaries, where each dictionary represents a property including
+             its associated default recommendations.
    """
-    properties = session.query(PropertyModel).filter(PropertyModel.portfolio_id == portfolio_id).all()
+    # Perform a query that joins PropertyModel and Recommendation, filtering by portfolio_id and default=True
+    query = session.query(PropertyModel, Recommendation).join(Recommendation, isouter=True) \
+        .filter(PropertyModel.portfolio_id == portfolio_id, Recommendation.default == True) \
+        .all()
+
+    # Process the query results
+    properties = {}
+    for property, recommendation in query:
+        # Check if the property is already added to the dictionary
+        if property.id not in properties:
+            properties[property.id] = row2dict(property)
+            properties[property.id]['recommendations'] = []
+
+        # Add recommendation to the property if it exists
+        if recommendation:
+            properties[property.id]['recommendations'].append(row2dict(recommendation))
+
+    return list(properties.values())
+
+
+def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
+    """
+    This function retrieves all property details associated with a given portfolio_id.
+
+    :param session: The SQLAlchemy session used to execute the query.
+    :param portfolio_id: The ID of the portfolio for which to retrieve property details.
+    :return: A list of dictionaries, where each dictionary represents a property's details.
+             Returns an empty list if no property details are found.
+    """
+    property_details = session.query(PropertyDetailsEpcModel).filter(
+        PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()

    # Convert the SQLAlchemy objects to dictionaries
-    properties_dict = [row2dict(p) for p in properties] if properties else []
+    property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []

-    return properties_dict
+    return property_details_dict


-def plot_epc_distribution(df, title='Your units', figsize=(10, 6)):
+def plot_epc_distribution(df, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
    """
-    Plots a horizontal bar chart of EPC rating distribution with percentages annotated on the bars.
+    Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
+    Allows setting the plot background color and dynamically adjusts text size and bar spacing.

    :param df: DataFrame with columns ['current_epc_rating', 'count', 'percentage']
-    :param title: Title of the plot (default is 'EPC Rating Distribution by Percentage')
-    :param figsize: Figure size as a tuple (default is (10, 6))
+    :param title: Title of the plot
+    :param background_color: Background color of the plot
+    :param bar_height: Thickness of the bars (default 0.4)
+    :param font_size: Base font size for text annotations (default 15)
    """
-    # Sort the DataFrame for a consistent plotting order
+    # Calculate dynamic figure size or adjust based on preferences
+    square_size = max(6, len(df) * 0.6)  # Ensure minimum size and adjust based on number of entries
+    fig, ax = plt.subplots(figsize=(square_size, square_size))
+    fig.patch.set_facecolor(background_color)  # Set figure background color
+    ax.set_facecolor(background_color)  # Set axes background color
+
+    df['percentage'] = df['percentage'].round(1)  # Round the percentage values to 1 decimal place
    df_sorted = df.sort_values('percentage', ascending=True)

-    colors = df_sorted['current_epc_rating'].map(EPC_COLOURS)  # Map the EPC ratings to colors
+    # Plot bars with specified height for adjustable thickness
+    bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
+                   color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)

-    # Create the horizontal bar chart
-    plt.figure(figsize=figsize)
-    bars = plt.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], color=colors)
+    epc_rating_font_size = font_size * 2  # EPC rating font size larger than base font size
+    count_percentage_font_size = font_size  # Count (percentage) font size as base font size

-    # Annotate the bars with percentage values
-    for bar in bars:
+    # Annotate bars with EPC ratings inside and count with percentage values outside
+    for index, bar in enumerate(bars):
        width = bar.get_width()
-        label_x_pos = width + 1  # Adjust the offset for the label if necessary
-        plt.text(label_x_pos, bar.get_y() + bar.get_height() / 2, f'{width}%', va='center')
+        epc_rating = df_sorted.iloc[index]['current_epc_rating']
+        count = df_sorted.iloc[index]['count']
+        percentage = df_sorted.iloc[index]['percentage']

-    # Customize the plot aesthetics for better readability and presentation
-    plt.xlabel('Percentage')
-    plt.ylabel('EPC Rating')
-    plt.title(title)
-    plt.tight_layout()  # Adjust layout to not cut off labels
-    plt.grid(axis='x', linestyle='--')  # Add a light grid for better readability
+        # EPC rating inside the bar with increased font size
+        ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
+                f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)

-    # Show the plot
+        # Count and percentage outside the bar, original font size
+        ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
+                f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
+
+    ax.set_title(title, fontsize=font_size * 1.2)  # Adjust title font size proportionally
+    ax.tick_params(axis='x', which='both', bottom=False, top=False,
+                   labelbottom=False)  # Remove x-axis tick marks and values
+    ax.tick_params(axis='y', which='both', left=False, right=False,
+                   labelleft=False)  # Remove y-axis tick marks and labels
+    ax.spines['top'].set_visible(False)  # Remove top spine
+    ax.spines['right'].set_visible(False)  # Remove right spine
+    ax.spines['left'].set_visible(False)  # Remove left spine
+    ax.spines['bottom'].set_visible(False)  # Remove bottom spine
+
+    plt.tight_layout()  # Adjust layout
    plt.show()
--- a/etl/customers/urban_splash/slides.py
+++ b/etl/customers/urban_splash/slides.py
@ -6,7 +6,11 @@ a environment akin to the backend to run this script
 import pandas as pd
 from backend.app.db.connection import db_engine
 from sqlalchemy.orm import sessionmaker
-from etl.customers.slide_utils import get_properties_by_portfolio_id, plot_epc_distribution
+from etl.customers.slide_utils import (
+    plot_epc_distribution,
+    get_property_details_by_portfolio_id,
+    get_properties_with_default_recommendations
+)

 PORTFOLIO_ID = 66

@ -15,8 +19,12 @@ def app():
    # Connect to database
    session = sessionmaker(bind=db_engine)()

+    ########################################################################
+    # Get the data we need
+    ########################################################################
+
    # Get the properties for the portfolio
-    properties = get_properties_by_portfolio_id(session, PORTFOLIO_ID)
+    properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)

    # The first visual we want to produce is a horizontal bar chart showing the number of properties at each current
    # EPC band
@ -24,3 +32,52 @@ def app():
    properties_df = pd.DataFrame(properties)
    epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count")
    epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
+
+    # We now pull the data for the property details
+    property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
+    property_details_df = pd.DataFrame(property_details)
+
+    ########################################################################
+    # We pull out the data for the slides
+    ########################################################################
+
+    ############
+    # Slide 1:
+    ############
+    # visual
+    plot_epc_distribution(epc_rating_summary, title="", background_color="white", bar_height=0.75, font_size=15)
+
+    # floor area - upper and lower bounds
+    min_area, max_area, average_area = (
+        property_details_df["total_floor_area"].min(),
+        property_details_df["total_floor_area"].max(),
+        property_details_df["total_floor_area"].median()
+    )
+
+    # Annual energy consumption - upper and lower bounds
+    min_energy_consumption, max_energy_consumption, average_consumption = (
+        property_details_df["adjusted_energy_consumption"].min(),
+        property_details_df["adjusted_energy_consumption"].max(),
+        property_details_df["adjusted_energy_consumption"].median()
+    )
+
+    # Co2 emissions - upper and lower bounds
+    min_co2, max_co2, average_co2 = (
+        property_details_df["co2_emissions"].min(),
+        property_details_df["co2_emissions"].max(),
+        property_details_df["co2_emissions"].median()
+    )
+
+    # Valuation: upper and lower bounds - TODO!
+    min_valuation, max_valuation, average_valuation = 0, 0, 0
+
+    ############
+    # Slide 2:
+    ############
+    # What it would take to hit EPC C
+
+    # This is the number of properties that are below a C
+    n_units = properties_df[properties_df["current_epc_rating"].isin(["D", "E", "F", "G"])].shape[0]
+
+    # We calculate the number of units that will make it to an EPC C
+    n_units_to_epc_c = ()