mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
299 lines
9.4 KiB
Python
299 lines
9.4 KiB
Python
import pandas as pd
|
|
from backend.app.utils import sap_to_epc
|
|
|
|
data = pd.read_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/property_202501170837.csv"
|
|
)
|
|
|
|
data["year_built"].value_counts()
|
|
|
|
# 1950-1966 26
|
|
# 1967-1975 37
|
|
# 1976-1982 37
|
|
# 1983-1990 33
|
|
# 1991-1995 139
|
|
# 1996-2002 42
|
|
# 2003-2006 50
|
|
|
|
data["full_property_type"] = data["property_type"] + ": " + data["built_form"]
|
|
|
|
houses = data[data["property_type"].isin(["House", "Bungalow"])]
|
|
houses["built_form"].value_counts()
|
|
|
|
data["property_type"].value_counts()
|
|
data["full_property_type"].value_counts()
|
|
# House: Mid-Terrace 136
|
|
# House: End-Terrace 83
|
|
# House: Semi-Detached 55
|
|
# Flat: Semi-Detached 24
|
|
# Flat: End-Terrace 19
|
|
# House: Detached 10
|
|
# Flat: Mid-Terrace 9
|
|
# Maisonette: Mid-Terrace 9
|
|
# Maisonette: Semi-Detached 8
|
|
# Maisonette: End-Terrace 6
|
|
# Flat: Detached 4
|
|
# Bungalow: Detached 1
|
|
|
|
epc_data = pd.read_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/basildon EPC Data.csv"
|
|
)
|
|
|
|
# Classify floor area in <73m2, 73-98, 99-200, 200+
|
|
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
|
|
lambda x: (
|
|
"<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
|
|
)
|
|
)
|
|
|
|
# 73-98 185
|
|
# <73 156
|
|
# 99-200 23
|
|
|
|
epc_data["wall_type"] = epc_data["walls"].str.split(",").str[0]
|
|
epc_data["wall_type"].value_counts()
|
|
|
|
# Cavity wall 343
|
|
# Timber frame 15
|
|
# System built 6
|
|
|
|
# we pull some additional data
|
|
# We want:
|
|
# 1) The list of properties included in the portfolio, with uprn
|
|
# 2) The recommendations against each property with costs, and whether or not the recommendation was defaulted
|
|
# 3) The properties without recommendations and why
|
|
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
import numpy as np
|
|
from sqlalchemy.orm import sessionmaker
|
|
from backend.app.db.connection import db_engine
|
|
from backend.app.db.models.recommendations import (
|
|
Recommendation,
|
|
PlanModel,
|
|
PlanRecommendations,
|
|
)
|
|
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
|
|
|
|
|
def get_data(portfolio_id, scenario_ids):
|
|
session = sessionmaker(bind=db_engine)()
|
|
session.begin()
|
|
|
|
# Get properties and their details for a specific portfolio
|
|
properties_query = (
|
|
session.query(PropertyModel, PropertyDetailsEpcModel)
|
|
.join(
|
|
PropertyDetailsEpcModel,
|
|
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
|
)
|
|
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
|
|
.all()
|
|
)
|
|
|
|
# Transform properties data to include all fields dynamically
|
|
properties_data = [
|
|
{
|
|
**{
|
|
col.name: getattr(prop.PropertyModel, col.name)
|
|
for col in PropertyModel.__table__.columns
|
|
},
|
|
**{
|
|
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
|
|
for col in PropertyDetailsEpcModel.__table__.columns
|
|
},
|
|
}
|
|
for prop in properties_query
|
|
]
|
|
|
|
# Get property IDs from fetched properties
|
|
|
|
# Get plans linked to the fetched properties
|
|
plans_query = (
|
|
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
|
|
)
|
|
|
|
# Transform plans data to include all fields dynamically
|
|
plans_data = [
|
|
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
|
|
for plan in plans_query
|
|
]
|
|
|
|
# Extract plan IDs for filtering recommendations through PlanRecommendations
|
|
plan_ids = [plan["id"] for plan in plans_data]
|
|
|
|
# Get recommendations through PlanRecommendations for those plans and that are default
|
|
recommendations_query = (
|
|
session.query(Recommendation, PlanModel.scenario_id)
|
|
.join(
|
|
PlanRecommendations,
|
|
Recommendation.id == PlanRecommendations.recommendation_id,
|
|
)
|
|
.join(
|
|
PlanModel,
|
|
PlanModel.id
|
|
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
|
|
)
|
|
.filter(
|
|
PlanRecommendations.plan_id.in_(plan_ids),
|
|
Recommendation.default == True, # Filtering for default recommendations
|
|
)
|
|
.all()
|
|
)
|
|
|
|
# Transform recommendations data to include all fields dynamically and include scenario_id
|
|
recommendations_data = [
|
|
{
|
|
**{
|
|
col.name: (
|
|
getattr(rec.Recommendation, col.name)
|
|
if hasattr(rec, "Recommendation")
|
|
else getattr(rec, col.name)
|
|
)
|
|
for col in Recommendation.__table__.columns
|
|
},
|
|
"Scenario ID": rec.scenario_id,
|
|
}
|
|
for rec in recommendations_query
|
|
]
|
|
|
|
session.close()
|
|
|
|
return properties_data, plans_data, recommendations_data
|
|
|
|
|
|
properties_data, plans_data, recommendations_data = get_data(
|
|
portfolio_id=124, scenario_ids=[205]
|
|
)
|
|
|
|
properties_df = pd.DataFrame(properties_data)
|
|
plans_df = pd.DataFrame(plans_data)
|
|
recommendations_df = pd.DataFrame(recommendations_data)
|
|
|
|
recommended_measures_df = recommendations_df[
|
|
["property_id", "measure_type", "estimated_cost", "default"]
|
|
]
|
|
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
|
|
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
|
|
|
|
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
|
|
post_install_sap = post_install_sap[post_install_sap["default"]]
|
|
# Sum up the sap points by property id
|
|
post_install_sap = (
|
|
post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
|
|
)
|
|
|
|
recommendations_measures_pivot = recommended_measures_df.pivot(
|
|
index="property_id", columns="measure_type", values="estimated_cost"
|
|
)
|
|
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
|
|
|
|
recommendations_measures_pivot = recommendations_measures_pivot.rename(
|
|
columns={
|
|
"air_source_heat_pump": "Cost: Air Source Heat Pump",
|
|
"cavity_wall_insulation": "Cost: Cavity Wall Insulation",
|
|
"double_glazing": "Cost: Double Glazing",
|
|
"loft_insulation": "Cost: Loft Insulation",
|
|
"mechanical_ventilation": "Cost: Ventilation",
|
|
"solar_pv": "Cost: Solar PV",
|
|
}
|
|
)
|
|
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
|
|
recommendations_measures_pivot["Recommendation: Air Source Heat Pump"] = (
|
|
recommendations_measures_pivot["Cost: Air Source Heat Pump"] > 0
|
|
)
|
|
recommendations_measures_pivot["Recommendation: Cavity Wall Insulation"] = (
|
|
recommendations_measures_pivot["Cost: Cavity Wall Insulation"] > 0
|
|
)
|
|
recommendations_measures_pivot["Recommendation: Double Glazing"] = (
|
|
recommendations_measures_pivot["Cost: Double Glazing"] > 0
|
|
)
|
|
recommendations_measures_pivot["Recommendation: Loft Insulation"] = (
|
|
recommendations_measures_pivot["Cost: Loft Insulation"] > 0
|
|
)
|
|
recommendations_measures_pivot["Recommendation: Ventilation"] = (
|
|
recommendations_measures_pivot["Cost: Ventilation"] > 0
|
|
)
|
|
recommendations_measures_pivot["Recommendation: Solar PV"] = (
|
|
recommendations_measures_pivot["Cost: Solar PV"] > 0
|
|
)
|
|
|
|
df = (
|
|
properties_df[
|
|
[
|
|
"property_id",
|
|
"uprn",
|
|
"address",
|
|
"postcode",
|
|
"property_type",
|
|
"walls",
|
|
"roof",
|
|
"heating",
|
|
"windows",
|
|
"current_epc_rating",
|
|
"current_sap_points",
|
|
"total_floor_area",
|
|
"number_of_rooms",
|
|
]
|
|
]
|
|
.merge(recommendations_measures_pivot, how="left", on="property_id")
|
|
.merge(post_install_sap, how="left", on="property_id")
|
|
)
|
|
|
|
df = df.drop(columns=["property_id"])
|
|
df["sap_points"] = df["sap_points"].fillna(0)
|
|
|
|
df = df.rename(
|
|
columns={
|
|
"uprn": "UPRN",
|
|
"address": "Address",
|
|
"postcode": "Postcode",
|
|
"walls": "Walls",
|
|
"roof": "Roof",
|
|
"heating": "Heating",
|
|
"windows": "Windows",
|
|
"current_epc_rating": "Current EPC Rating",
|
|
"current_sap_points": "Current SAP Points",
|
|
"total_floor_area": "Total Floor Area",
|
|
"number_of_rooms": "Number of Habitable Rooms",
|
|
"floor_height": "Floor Height",
|
|
}
|
|
)
|
|
|
|
df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
|
|
|
|
# We fill missings:
|
|
for col in [
|
|
"Recommendation: Air Source Heat Pump",
|
|
"Recommendation: Cavity Wall Insulation",
|
|
"Recommendation: Double Glazing",
|
|
"Recommendation: Loft Insulation",
|
|
"Recommendation: Ventilation",
|
|
"Recommendation: Solar PV",
|
|
]:
|
|
df[col] = df[col].fillna(False)
|
|
|
|
for col in [
|
|
"Cost: Air Source Heat Pump",
|
|
"Cost: Cavity Wall Insulation",
|
|
"Cost: Double Glazing",
|
|
"Cost: Loft Insulation",
|
|
"Cost: Ventilation",
|
|
"Cost: Solar PV",
|
|
]:
|
|
df[col] = df[col].fillna(0)
|
|
|
|
# Calculate post SAP
|
|
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
|
|
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
|
|
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
|
|
lambda x: sap_to_epc(x)
|
|
)
|
|
|
|
df["Recommendation: Air Source Heat Pump"].sum()
|
|
df["Cost: Air Source Heat Pump"].sum()
|
|
|
|
df.to_csv(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
|
|
index=False,
|
|
)
|