diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index 55e7659a..d41f4f3b 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -1,65 +1,119 @@ import matplotlib.pyplot as plt from sqlalchemy.orm import Session from backend.app.db.utils import row2dict -from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import Recommendation EPC_COLOURS = { - "A": "#008054", - "B": "#1ab559", - "C": "#8ccf45", - "D": "#ffd600", - "E": "#fcab66", - "F": "#f08024", - "G": "#e8143b" + "A": "#028051", + "B": "#14b759", + "C": "#8ecd46", + "D": "#fdd401", + "E": "#fdab67", + "F": "#ee8023", + "G": "#e71437" } -def get_properties_by_portfolio_id(session: Session, portfolio_id: int): +def get_properties_with_default_recommendations(session: Session, portfolio_id: int): """ - This function retrieves all properties associated with a given portfolio_id. + Fetch properties for a given portfolio_id along with their default recommendations, + minimizing database queries. :param session: The SQLAlchemy session used to execute the query. - :param portfolio_id: The ID of the portfolio for which to retrieve properties. - :return: A list of dictionaries, where each dictionary represents a property. - Returns an empty list if no properties are found. + :param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations. + :return: A list of dictionaries, where each dictionary represents a property including + its associated default recommendations. """ - properties = session.query(PropertyModel).filter(PropertyModel.portfolio_id == portfolio_id).all() + # Perform a query that joins PropertyModel and Recommendation, filtering by portfolio_id and default=True + query = session.query(PropertyModel, Recommendation).join(Recommendation, isouter=True) \ + .filter(PropertyModel.portfolio_id == portfolio_id, Recommendation.default == True) \ + .all() + + # Process the query results + properties = {} + for property, recommendation in query: + # Check if the property is already added to the dictionary + if property.id not in properties: + properties[property.id] = row2dict(property) + properties[property.id]['recommendations'] = [] + + # Add recommendation to the property if it exists + if recommendation: + properties[property.id]['recommendations'].append(row2dict(recommendation)) + + return list(properties.values()) + + +def get_property_details_by_portfolio_id(session: Session, portfolio_id: int): + """ + This function retrieves all property details associated with a given portfolio_id. + + :param session: The SQLAlchemy session used to execute the query. + :param portfolio_id: The ID of the portfolio for which to retrieve property details. + :return: A list of dictionaries, where each dictionary represents a property's details. + Returns an empty list if no property details are found. + """ + property_details = session.query(PropertyDetailsEpcModel).filter( + PropertyDetailsEpcModel.portfolio_id == portfolio_id).all() # Convert the SQLAlchemy objects to dictionaries - properties_dict = [row2dict(p) for p in properties] if properties else [] + property_details_dict = [row2dict(pd) for pd in property_details] if property_details else [] - return properties_dict + return property_details_dict -def plot_epc_distribution(df, title='Your units', figsize=(10, 6)): +def plot_epc_distribution(df, title='Your Units', background_color='white', bar_height=0.4, font_size=15): """ - Plots a horizontal bar chart of EPC rating distribution with percentages annotated on the bars. + Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes. + Allows setting the plot background color and dynamically adjusts text size and bar spacing. :param df: DataFrame with columns ['current_epc_rating', 'count', 'percentage'] - :param title: Title of the plot (default is 'EPC Rating Distribution by Percentage') - :param figsize: Figure size as a tuple (default is (10, 6)) + :param title: Title of the plot + :param background_color: Background color of the plot + :param bar_height: Thickness of the bars (default 0.4) + :param font_size: Base font size for text annotations (default 15) """ - # Sort the DataFrame for a consistent plotting order + # Calculate dynamic figure size or adjust based on preferences + square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries + fig, ax = plt.subplots(figsize=(square_size, square_size)) + fig.patch.set_facecolor(background_color) # Set figure background color + ax.set_facecolor(background_color) # Set axes background color + + df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place df_sorted = df.sort_values('percentage', ascending=True) - colors = df_sorted['current_epc_rating'].map(EPC_COLOURS) # Map the EPC ratings to colors + # Plot bars with specified height for adjustable thickness + bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], + color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height) - # Create the horizontal bar chart - plt.figure(figsize=figsize) - bars = plt.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], color=colors) + epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size + count_percentage_font_size = font_size # Count (percentage) font size as base font size - # Annotate the bars with percentage values - for bar in bars: + # Annotate bars with EPC ratings inside and count with percentage values outside + for index, bar in enumerate(bars): width = bar.get_width() - label_x_pos = width + 1 # Adjust the offset for the label if necessary - plt.text(label_x_pos, bar.get_y() + bar.get_height() / 2, f'{width}%', va='center') + epc_rating = df_sorted.iloc[index]['current_epc_rating'] + count = df_sorted.iloc[index]['count'] + percentage = df_sorted.iloc[index]['percentage'] - # Customize the plot aesthetics for better readability and presentation - plt.xlabel('Percentage') - plt.ylabel('EPC Rating') - plt.title(title) - plt.tight_layout() # Adjust layout to not cut off labels - plt.grid(axis='x', linestyle='--') # Add a light grid for better readability + # EPC rating inside the bar with increased font size + ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2, + f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size) - # Show the plot + # Count and percentage outside the bar, original font size + ax.text(width + 1, bar.get_y() + bar.get_height() / 2, + f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size) + + ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally + ax.tick_params(axis='x', which='both', bottom=False, top=False, + labelbottom=False) # Remove x-axis tick marks and values + ax.tick_params(axis='y', which='both', left=False, right=False, + labelleft=False) # Remove y-axis tick marks and labels + ax.spines['top'].set_visible(False) # Remove top spine + ax.spines['right'].set_visible(False) # Remove right spine + ax.spines['left'].set_visible(False) # Remove left spine + ax.spines['bottom'].set_visible(False) # Remove bottom spine + + plt.tight_layout() # Adjust layout plt.show() diff --git a/etl/customers/urban_splash/slides.py b/etl/customers/urban_splash/slides.py index fa0df3c2..f036fa5a 100644 --- a/etl/customers/urban_splash/slides.py +++ b/etl/customers/urban_splash/slides.py @@ -6,7 +6,11 @@ a environment akin to the backend to run this script import pandas as pd from backend.app.db.connection import db_engine from sqlalchemy.orm import sessionmaker -from etl.customers.slide_utils import get_properties_by_portfolio_id, plot_epc_distribution +from etl.customers.slide_utils import ( + plot_epc_distribution, + get_property_details_by_portfolio_id, + get_properties_with_default_recommendations +) PORTFOLIO_ID = 66 @@ -15,8 +19,12 @@ def app(): # Connect to database session = sessionmaker(bind=db_engine)() + ######################################################################## + # Get the data we need + ######################################################################## + # Get the properties for the portfolio - properties = get_properties_by_portfolio_id(session, PORTFOLIO_ID) + properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID) # The first visual we want to produce is a horizontal bar chart showing the number of properties at each current # EPC band @@ -24,3 +32,52 @@ def app(): properties_df = pd.DataFrame(properties) epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count") epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100 + + # We now pull the data for the property details + property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID) + property_details_df = pd.DataFrame(property_details) + + ######################################################################## + # We pull out the data for the slides + ######################################################################## + + ############ + # Slide 1: + ############ + # visual + plot_epc_distribution(epc_rating_summary, title="", background_color="white", bar_height=0.75, font_size=15) + + # floor area - upper and lower bounds + min_area, max_area, average_area = ( + property_details_df["total_floor_area"].min(), + property_details_df["total_floor_area"].max(), + property_details_df["total_floor_area"].median() + ) + + # Annual energy consumption - upper and lower bounds + min_energy_consumption, max_energy_consumption, average_consumption = ( + property_details_df["adjusted_energy_consumption"].min(), + property_details_df["adjusted_energy_consumption"].max(), + property_details_df["adjusted_energy_consumption"].median() + ) + + # Co2 emissions - upper and lower bounds + min_co2, max_co2, average_co2 = ( + property_details_df["co2_emissions"].min(), + property_details_df["co2_emissions"].max(), + property_details_df["co2_emissions"].median() + ) + + # Valuation: upper and lower bounds - TODO! + min_valuation, max_valuation, average_valuation = 0, 0, 0 + + ############ + # Slide 2: + ############ + # What it would take to hit EPC C + + # This is the number of properties that are below a C + n_units = properties_df[properties_df["current_epc_rating"].isin(["D", "E", "F", "G"])].shape[0] + + # We calculate the number of units that will make it to an EPC C + n_units_to_epc_c = ()