slide script wip

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-21 13:55:28 +00:00
parent 80a542f02e
commit 0b8da8d8be
2 changed files with 149 additions and 38 deletions

View file

@ -1,65 +1,119 @@
import matplotlib.pyplot as plt
from sqlalchemy.orm import Session
from backend.app.db.utils import row2dict
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation
EPC_COLOURS = {
"A": "#008054",
"B": "#1ab559",
"C": "#8ccf45",
"D": "#ffd600",
"E": "#fcab66",
"F": "#f08024",
"G": "#e8143b"
"A": "#028051",
"B": "#14b759",
"C": "#8ecd46",
"D": "#fdd401",
"E": "#fdab67",
"F": "#ee8023",
"G": "#e71437"
}
def get_properties_by_portfolio_id(session: Session, portfolio_id: int):
def get_properties_with_default_recommendations(session: Session, portfolio_id: int):
"""
This function retrieves all properties associated with a given portfolio_id.
Fetch properties for a given portfolio_id along with their default recommendations,
minimizing database queries.
:param session: The SQLAlchemy session used to execute the query.
:param portfolio_id: The ID of the portfolio for which to retrieve properties.
:return: A list of dictionaries, where each dictionary represents a property.
Returns an empty list if no properties are found.
:param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations.
:return: A list of dictionaries, where each dictionary represents a property including
its associated default recommendations.
"""
properties = session.query(PropertyModel).filter(PropertyModel.portfolio_id == portfolio_id).all()
# Perform a query that joins PropertyModel and Recommendation, filtering by portfolio_id and default=True
query = session.query(PropertyModel, Recommendation).join(Recommendation, isouter=True) \
.filter(PropertyModel.portfolio_id == portfolio_id, Recommendation.default == True) \
.all()
# Process the query results
properties = {}
for property, recommendation in query:
# Check if the property is already added to the dictionary
if property.id not in properties:
properties[property.id] = row2dict(property)
properties[property.id]['recommendations'] = []
# Add recommendation to the property if it exists
if recommendation:
properties[property.id]['recommendations'].append(row2dict(recommendation))
return list(properties.values())
def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
"""
This function retrieves all property details associated with a given portfolio_id.
:param session: The SQLAlchemy session used to execute the query.
:param portfolio_id: The ID of the portfolio for which to retrieve property details.
:return: A list of dictionaries, where each dictionary represents a property's details.
Returns an empty list if no property details are found.
"""
property_details = session.query(PropertyDetailsEpcModel).filter(
PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
# Convert the SQLAlchemy objects to dictionaries
properties_dict = [row2dict(p) for p in properties] if properties else []
property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
return properties_dict
return property_details_dict
def plot_epc_distribution(df, title='Your units', figsize=(10, 6)):
def plot_epc_distribution(df, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
"""
Plots a horizontal bar chart of EPC rating distribution with percentages annotated on the bars.
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
:param df: DataFrame with columns ['current_epc_rating', 'count', 'percentage']
:param title: Title of the plot (default is 'EPC Rating Distribution by Percentage')
:param figsize: Figure size as a tuple (default is (10, 6))
:param title: Title of the plot
:param background_color: Background color of the plot
:param bar_height: Thickness of the bars (default 0.4)
:param font_size: Base font size for text annotations (default 15)
"""
# Sort the DataFrame for a consistent plotting order
# Calculate dynamic figure size or adjust based on preferences
square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries
fig, ax = plt.subplots(figsize=(square_size, square_size))
fig.patch.set_facecolor(background_color) # Set figure background color
ax.set_facecolor(background_color) # Set axes background color
df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values('percentage', ascending=True)
colors = df_sorted['current_epc_rating'].map(EPC_COLOURS) # Map the EPC ratings to colors
# Plot bars with specified height for adjustable thickness
bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
# Create the horizontal bar chart
plt.figure(figsize=figsize)
bars = plt.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], color=colors)
epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size
count_percentage_font_size = font_size # Count (percentage) font size as base font size
# Annotate the bars with percentage values
for bar in bars:
# Annotate bars with EPC ratings inside and count with percentage values outside
for index, bar in enumerate(bars):
width = bar.get_width()
label_x_pos = width + 1 # Adjust the offset for the label if necessary
plt.text(label_x_pos, bar.get_y() + bar.get_height() / 2, f'{width}%', va='center')
epc_rating = df_sorted.iloc[index]['current_epc_rating']
count = df_sorted.iloc[index]['count']
percentage = df_sorted.iloc[index]['percentage']
# Customize the plot aesthetics for better readability and presentation
plt.xlabel('Percentage')
plt.ylabel('EPC Rating')
plt.title(title)
plt.tight_layout() # Adjust layout to not cut off labels
plt.grid(axis='x', linestyle='--') # Add a light grid for better readability
# EPC rating inside the bar with increased font size
ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
# Show the plot
# Count and percentage outside the bar, original font size
ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally
ax.tick_params(axis='x', which='both', bottom=False, top=False,
labelbottom=False) # Remove x-axis tick marks and values
ax.tick_params(axis='y', which='both', left=False, right=False,
labelleft=False) # Remove y-axis tick marks and labels
ax.spines['top'].set_visible(False) # Remove top spine
ax.spines['right'].set_visible(False) # Remove right spine
ax.spines['left'].set_visible(False) # Remove left spine
ax.spines['bottom'].set_visible(False) # Remove bottom spine
plt.tight_layout() # Adjust layout
plt.show()

View file

@ -6,7 +6,11 @@ a environment akin to the backend to run this script
import pandas as pd
from backend.app.db.connection import db_engine
from sqlalchemy.orm import sessionmaker
from etl.customers.slide_utils import get_properties_by_portfolio_id, plot_epc_distribution
from etl.customers.slide_utils import (
plot_epc_distribution,
get_property_details_by_portfolio_id,
get_properties_with_default_recommendations
)
PORTFOLIO_ID = 66
@ -15,8 +19,12 @@ def app():
# Connect to database
session = sessionmaker(bind=db_engine)()
########################################################################
# Get the data we need
########################################################################
# Get the properties for the portfolio
properties = get_properties_by_portfolio_id(session, PORTFOLIO_ID)
properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)
# The first visual we want to produce is a horizontal bar chart showing the number of properties at each current
# EPC band
@ -24,3 +32,52 @@ def app():
properties_df = pd.DataFrame(properties)
epc_rating_summary = properties_df.groupby("current_epc_rating").size().reset_index(name="count")
epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
# We now pull the data for the property details
property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
property_details_df = pd.DataFrame(property_details)
########################################################################
# We pull out the data for the slides
########################################################################
############
# Slide 1:
############
# visual
plot_epc_distribution(epc_rating_summary, title="", background_color="white", bar_height=0.75, font_size=15)
# floor area - upper and lower bounds
min_area, max_area, average_area = (
property_details_df["total_floor_area"].min(),
property_details_df["total_floor_area"].max(),
property_details_df["total_floor_area"].median()
)
# Annual energy consumption - upper and lower bounds
min_energy_consumption, max_energy_consumption, average_consumption = (
property_details_df["adjusted_energy_consumption"].min(),
property_details_df["adjusted_energy_consumption"].max(),
property_details_df["adjusted_energy_consumption"].median()
)
# Co2 emissions - upper and lower bounds
min_co2, max_co2, average_co2 = (
property_details_df["co2_emissions"].min(),
property_details_df["co2_emissions"].max(),
property_details_df["co2_emissions"].median()
)
# Valuation: upper and lower bounds - TODO!
min_valuation, max_valuation, average_valuation = 0, 0, 0
############
# Slide 2:
############
# What it would take to hit EPC C
# This is the number of properties that are below a C
n_units = properties_df[properties_df["current_epc_rating"].isin(["D", "E", "F", "G"])].shape[0]
# We calculate the number of units that will make it to an EPC C
n_units_to_epc_c = ()