Model/etl/customers/l_and_g/ic_slides.py

299 lines
9.4 KiB
Python

import pandas as pd
from backend.app.utils import sap_to_epc
data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/property_202501170837.csv"
)
data["year_built"].value_counts()
# 1950-1966 26
# 1967-1975 37
# 1976-1982 37
# 1983-1990 33
# 1991-1995 139
# 1996-2002 42
# 2003-2006 50
data["full_property_type"] = data["property_type"] + ": " + data["built_form"]
houses = data[data["property_type"].isin(["House", "Bungalow"])]
houses["built_form"].value_counts()
data["property_type"].value_counts()
data["full_property_type"].value_counts()
# House: Mid-Terrace 136
# House: End-Terrace 83
# House: Semi-Detached 55
# Flat: Semi-Detached 24
# Flat: End-Terrace 19
# House: Detached 10
# Flat: Mid-Terrace 9
# Maisonette: Mid-Terrace 9
# Maisonette: Semi-Detached 8
# Maisonette: End-Terrace 6
# Flat: Detached 4
# Bungalow: Detached 1
epc_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/basildon EPC Data.csv"
)
# Classify floor area in <73m2, 73-98, 99-200, 200+
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
lambda x: (
"<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
)
)
# 73-98 185
# <73 156
# 99-200 23
epc_data["wall_type"] = epc_data["walls"].str.split(",").str[0]
epc_data["wall_type"].value_counts()
# Cavity wall 343
# Timber frame 15
# System built 6
# we pull some additional data
# We want:
# 1) The list of properties included in the portfolio, with uprn
# 2) The recommendations against each property with costs, and whether or not the recommendation was defaulted
# 3) The properties without recommendations and why
from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=124, scenario_ids=[205]
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
recommended_measures_df = recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = (
post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
recommendations_measures_pivot = recommendations_measures_pivot.rename(
columns={
"air_source_heat_pump": "Cost: Air Source Heat Pump",
"cavity_wall_insulation": "Cost: Cavity Wall Insulation",
"double_glazing": "Cost: Double Glazing",
"loft_insulation": "Cost: Loft Insulation",
"mechanical_ventilation": "Cost: Ventilation",
"solar_pv": "Cost: Solar PV",
}
)
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
recommendations_measures_pivot["Recommendation: Air Source Heat Pump"] = (
recommendations_measures_pivot["Cost: Air Source Heat Pump"] > 0
)
recommendations_measures_pivot["Recommendation: Cavity Wall Insulation"] = (
recommendations_measures_pivot["Cost: Cavity Wall Insulation"] > 0
)
recommendations_measures_pivot["Recommendation: Double Glazing"] = (
recommendations_measures_pivot["Cost: Double Glazing"] > 0
)
recommendations_measures_pivot["Recommendation: Loft Insulation"] = (
recommendations_measures_pivot["Cost: Loft Insulation"] > 0
)
recommendations_measures_pivot["Recommendation: Ventilation"] = (
recommendations_measures_pivot["Cost: Ventilation"] > 0
)
recommendations_measures_pivot["Recommendation: Solar PV"] = (
recommendations_measures_pivot["Cost: Solar PV"] > 0
)
df = (
properties_df[
[
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
]
]
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(post_install_sap, how="left", on="property_id")
)
df = df.drop(columns=["property_id"])
df["sap_points"] = df["sap_points"].fillna(0)
df = df.rename(
columns={
"uprn": "UPRN",
"address": "Address",
"postcode": "Postcode",
"walls": "Walls",
"roof": "Roof",
"heating": "Heating",
"windows": "Windows",
"current_epc_rating": "Current EPC Rating",
"current_sap_points": "Current SAP Points",
"total_floor_area": "Total Floor Area",
"number_of_rooms": "Number of Habitable Rooms",
"floor_height": "Floor Height",
}
)
df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
# We fill missings:
for col in [
"Recommendation: Air Source Heat Pump",
"Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing",
"Recommendation: Loft Insulation",
"Recommendation: Ventilation",
"Recommendation: Solar PV",
]:
df[col] = df[col].fillna(False)
for col in [
"Cost: Air Source Heat Pump",
"Cost: Cavity Wall Insulation",
"Cost: Double Glazing",
"Cost: Loft Insulation",
"Cost: Ventilation",
"Cost: Solar PV",
]:
df[col] = df[col].fillna(0)
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
lambda x: sap_to_epc(x)
)
df["Recommendation: Air Source Heat Pump"].sum()
df["Cost: Air Source Heat Pump"].sum()
df.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
index=False,
)