mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
224 lines
7.8 KiB
Python
224 lines
7.8 KiB
Python
"""
|
|
This script prepares the data for the financial model
|
|
"""
|
|
|
|
import pandas as pd
|
|
from backend.app.utils import sap_to_epc
|
|
from sqlalchemy.orm import sessionmaker
|
|
from backend.app.db.connection import db_engine
|
|
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
|
|
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
|
|
|
PORTFOLIO_ID = 206
|
|
SCENARIOS = [389]
|
|
|
|
|
|
def get_data(portfolio_id, scenario_ids):
|
|
session = sessionmaker(bind=db_engine)()
|
|
session.begin()
|
|
|
|
# Get properties and their details for a specific portfolio
|
|
properties_query = session.query(
|
|
PropertyModel,
|
|
PropertyDetailsEpcModel
|
|
).join(
|
|
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
|
|
).filter(
|
|
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
|
|
).all()
|
|
|
|
# Transform properties data to include all fields dynamically
|
|
properties_data = [
|
|
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
|
|
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
|
|
PropertyDetailsEpcModel.__table__.columns}}
|
|
for prop in properties_query
|
|
]
|
|
|
|
# Get property IDs from fetched properties
|
|
|
|
# Get plans linked to the fetched properties
|
|
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
|
|
|
|
# Transform plans data to include all fields dynamically
|
|
plans_data = [
|
|
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
|
|
for plan in plans_query
|
|
]
|
|
|
|
# Extract plan IDs for filtering recommendations through PlanRecommendations
|
|
plan_ids = [plan['id'] for plan in plans_data]
|
|
|
|
# Get recommendations through PlanRecommendations for those plans and that are default
|
|
recommendations_query = session.query(
|
|
Recommendation,
|
|
Plan.scenario_id
|
|
).join(
|
|
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
|
|
).join(
|
|
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
|
|
).filter(
|
|
PlanRecommendations.plan_id.in_(plan_ids),
|
|
Recommendation.default == True # Filtering for default recommendations
|
|
).all()
|
|
|
|
# Transform recommendations data to include all fields dynamically and include scenario_id
|
|
recommendations_data = [
|
|
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
|
|
col.name) for
|
|
col in Recommendation.__table__.columns},
|
|
"Scenario ID": rec.scenario_id}
|
|
for rec in recommendations_query
|
|
]
|
|
|
|
session.close()
|
|
|
|
return properties_data, plans_data, recommendations_data
|
|
|
|
|
|
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
|
|
|
|
properties_df = pd.DataFrame(properties_data)
|
|
plans_df = pd.DataFrame(plans_data)
|
|
recommendations_df = pd.DataFrame(recommendations_data)
|
|
|
|
recommended_measures_df = recommendations_df[
|
|
["property_id", "measure_type", "estimated_cost", "default"]
|
|
]
|
|
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
|
|
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
|
|
|
|
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
|
|
post_install_sap = post_install_sap[post_install_sap["default"]]
|
|
# Sum up the sap points by property id
|
|
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
|
|
|
|
recommendations_measures_pivot = recommended_measures_df.pivot(
|
|
index='property_id',
|
|
columns='measure_type',
|
|
values='estimated_cost'
|
|
)
|
|
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
|
|
|
|
# Total cost is the row sum, excluding the property_id column
|
|
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
|
|
columns=["property_id"]
|
|
).sum(axis=1)
|
|
|
|
df = properties_df[
|
|
[
|
|
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
|
|
"current_epc_rating",
|
|
"current_sap_points", "total_floor_area", "number_of_rooms",
|
|
]
|
|
].merge(
|
|
recommendations_measures_pivot, how="left", on="property_id"
|
|
).merge(
|
|
post_install_sap, how="left", on="property_id"
|
|
)
|
|
|
|
df = df.drop(columns=["property_id"])
|
|
df["sap_points"] = df["sap_points"].fillna(0)
|
|
|
|
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
|
|
df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round()
|
|
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
|
|
|
|
# We merge this back to the main dataframe, which will contain the bathrooms
|
|
from utils.s3 import read_csv_from_s3
|
|
|
|
asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
|
|
asset_list = pd.DataFrame(asset_list)
|
|
df["uprn"] = df["uprn"].astype(str)
|
|
asset_list = asset_list.merge(
|
|
df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
|
|
how="left",
|
|
on="uprn"
|
|
)
|
|
|
|
condition_costs = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
|
|
sheet_name="Prices - Khalim",
|
|
header=35
|
|
)
|
|
# Remove unnamed columns and reset index
|
|
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
|
|
condition_costs = condition_costs.reset_index(drop=True)
|
|
|
|
|
|
# We now estimate condition cost
|
|
def simulate_condition(asset_list, condition_costs):
|
|
"""
|
|
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
|
|
costing array looks like.
|
|
:param df:
|
|
:return:
|
|
"""
|
|
|
|
condition_df = []
|
|
for _, row in asset_list.iterrows():
|
|
|
|
n_bathrooms = row["bathrooms"]
|
|
|
|
conditions = {}
|
|
for condition in reversed(range(1, 11)):
|
|
condition_cost = condition_costs[
|
|
condition_costs["Condition"] == condition
|
|
].drop(columns=["Condition"]).iloc[0]
|
|
|
|
# Each cost is scaled by floor area
|
|
condition_cost = condition_cost * row["total_floor_area"]
|
|
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
|
|
|
|
total_condition_cost = condition_cost.sum()
|
|
conditions["Condition " + str(condition)] = (total_condition_cost)
|
|
|
|
condition_df.append(
|
|
{
|
|
"uprn": row["uprn"],
|
|
**conditions
|
|
}
|
|
)
|
|
|
|
condition_df = pd.DataFrame(condition_df)
|
|
|
|
asset_list = asset_list.merge(
|
|
condition_df,
|
|
how="left",
|
|
on="uprn"
|
|
)
|
|
|
|
return asset_list
|
|
|
|
|
|
# asset_list = simulate_condition(asset_list, condition_costs)
|
|
|
|
# We calculate the condition cost based on the condition
|
|
for _, row in asset_list.iterrows():
|
|
|
|
condition = row["condition_score"]
|
|
if condition in [None, ""]:
|
|
continue
|
|
condition = int(float(condition))
|
|
|
|
condition_cost = condition_costs[
|
|
condition_costs["Condition"] == condition
|
|
].drop(columns=["Condition"]).iloc[0]
|
|
|
|
# Each cost is scaled by floor area
|
|
condition_cost = condition_cost * float(row["total_floor_area"])
|
|
n_bathrooms = row["n_bathrooms"]
|
|
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
|
|
|
|
total_condition_cost = condition_cost.sum()
|
|
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
|
|
|
|
# Store output
|
|
asset_list.to_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
|
|
index=False
|
|
)
|
|
|
|
condition_cost_comparison = asset_list[
|
|
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
|
|
]
|