Model/etl/customers/l_and_g/ic_slides.py
2025-02-12 15:20:55 +00:00

246 lines
8.8 KiB
Python

import pandas as pd
from backend.app.utils import sap_to_epc
data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/property_202501170837.csv"
)
data["year_built"].value_counts()
# 1950-1966 26
# 1967-1975 37
# 1976-1982 37
# 1983-1990 33
# 1991-1995 139
# 1996-2002 42
# 2003-2006 50
data["full_property_type"] = data["property_type"] + ": " + data["built_form"]
houses = data[data["property_type"].isin(["House", "Bungalow"])]
houses["built_form"].value_counts()
data["property_type"].value_counts()
data["full_property_type"].value_counts()
# House: Mid-Terrace 136
# House: End-Terrace 83
# House: Semi-Detached 55
# Flat: Semi-Detached 24
# Flat: End-Terrace 19
# House: Detached 10
# Flat: Mid-Terrace 9
# Maisonette: Mid-Terrace 9
# Maisonette: Semi-Detached 8
# Maisonette: End-Terrace 6
# Flat: Detached 4
# Bungalow: Detached 1
epc_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/basildon EPC Data.csv"
)
# Classify floor area in <73m2, 73-98, 99-200, 200+
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
# 73-98 185
# <73 156
# 99-200 23
epc_data["wall_type"] = epc_data["walls"].str.split(",").str[0]
epc_data["wall_type"].value_counts()
# Cavity wall 343
# Timber frame 15
# System built 6
# we pull some additional data
# We want:
# 1) The list of properties included in the portfolio, with uprn
# 2) The recommendations against each property with costs, and whether or not the recommendation was defaulted
# 3) The properties without recommendations and why
from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
recommended_measures_df = recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
recommendations_measures_pivot = recommendations_measures_pivot.rename(
columns={
"air_source_heat_pump": "Cost: Air Source Heat Pump",
"cavity_wall_insulation": "Cost: Cavity Wall Insulation",
"double_glazing": "Cost: Double Glazing",
"loft_insulation": "Cost: Loft Insulation",
"mechanical_ventilation": "Cost: Ventilation",
"solar_pv": "Cost: Solar PV"
}
)
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
recommendations_measures_pivot["Recommendation: Air Source Heat Pump"] = (
recommendations_measures_pivot["Cost: Air Source Heat Pump"] > 0
)
recommendations_measures_pivot["Recommendation: Cavity Wall Insulation"] = (
recommendations_measures_pivot["Cost: Cavity Wall Insulation"] > 0
)
recommendations_measures_pivot["Recommendation: Double Glazing"] = (
recommendations_measures_pivot["Cost: Double Glazing"] > 0
)
recommendations_measures_pivot["Recommendation: Loft Insulation"] = (
recommendations_measures_pivot["Cost: Loft Insulation"] > 0
)
recommendations_measures_pivot["Recommendation: Ventilation"] = (
recommendations_measures_pivot["Cost: Ventilation"] > 0
)
recommendations_measures_pivot["Recommendation: Solar PV"] = (
recommendations_measures_pivot["Cost: Solar PV"] > 0
)
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
)
df = df.drop(columns=["property_id"])
df["sap_points"] = df["sap_points"].fillna(0)
df = df.rename(
columns={
"uprn": "UPRN",
"address": "Address",
"postcode": "Postcode",
"walls": "Walls",
"roof": "Roof",
"heating": "Heating",
"windows": "Windows",
"current_epc_rating": "Current EPC Rating",
"current_sap_points": "Current SAP Points",
"total_floor_area": "Total Floor Area",
"number_of_rooms": "Number of Habitable Rooms",
"floor_height": "Floor Height",
}
)
df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
# We fill missings:
for col in [
"Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
"Recommendation: Solar PV"
]:
df[col] = df[col].fillna(False)
for col in [
"Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
"Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
"Cost: Solar PV"
]:
df[col] = df[col].fillna(0)
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Recommendation: Air Source Heat Pump"].sum()
df["Cost: Air Source Heat Pump"].sum()
df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)