rename Plan and Scenario to PlanModel and ScenarioModel

This commit is contained in:
Daniel Roth 2026-02-12 12:01:39 +00:00
parent 73607a5117
commit b3fa7c3051
18 changed files with 1892 additions and 1230 deletions

View file

@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3
from backend.app.utils import sap_to_epc
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
class Outputs:
@ -42,7 +46,7 @@ class Outputs:
"flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
"room_in_roof_insulation": "RIR (POA - Prov sum only)",
"ev_charging": "EV Charging",
"battery": "Battery"
"battery": "Battery",
}
def __init__(self, format, portfolio_id):
@ -67,28 +71,38 @@ class Outputs:
# Download cleaned data
self.cleaned_epc_lookup = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
bucket_name="retrofit-data-dev",
)
self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)
def get_properties_from_db(self):
# Get properties and their details for a specific portfolio
properties_query = self.session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID
).all()
properties_query = (
self.session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(
PropertyModel.portfolio_id
== self.portfolio_id # Filter by portfolio ID
)
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
@ -96,10 +110,14 @@ class Outputs:
def get_plans_from_db(self):
plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all()
plans_query = (
self.session.query(PlanModel)
.filter(PlanModel.portfolio_id == self.portfolio_id)
.all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -107,28 +125,38 @@ class Outputs:
def get_recommendations_from_db(self, plan_ids):
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = self.session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
self.session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{
**{
col.name: getattr(rec.Recommendation, col.name) if
hasattr(rec, 'Recommendation') else getattr(rec, col.name)
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id
} for rec in recommendations_query
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
return recommendations_data
@ -148,7 +176,9 @@ class Outputs:
measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)
# If the property_id already exists in the collected rows, update it
existing_row = next((item for item in rows if item["property_id"] == property_id), None)
existing_row = next(
(item for item in rows if item["property_id"] == property_id), None
)
if existing_row is None:
# Create a new row if the property_id doesn't exist
new_row = {measure: None for measure in all_measures}
@ -196,7 +226,7 @@ class Outputs:
properties_data = self.get_properties_from_db()
plans_data = self.get_plans_from_db()
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
recommendations_data = self.get_recommendations_from_db(plan_ids)
self.session.close()
@ -209,50 +239,54 @@ class Outputs:
scenario_ids = plans_df["scenario_id"].unique()
# We start to create the MDS sheet
mds = properties_df[
[
"property_id",
"address",
"postcode",
"uprn",
"current_epc_rating",
"current_sap_points",
"primary_energy_consumption",
"property_type",
"built_form",
"total_floor_area",
"walls",
"tenure",
"mainfuel",
# The bills columns are split out - we include them and aggregate, without appliances
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge"
mds = (
properties_df[
[
"property_id",
"address",
"postcode",
"uprn",
"current_epc_rating",
"current_sap_points",
"primary_energy_consumption",
"property_type",
"built_form",
"total_floor_area",
"walls",
"tenure",
"mainfuel",
# The bills columns are split out - we include them and aggregate, without appliances
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
]
].copy().rename(
columns={
"address": "Address",
"postcode": "Postcode",
"uprn": "UPRN",
"current_epc_rating": "Pre EPC",
"current_sap_points": "EPC Source",
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
"property_type": "Property Type",
"built_form": "Built Form",
"total_floor_area": "Floor area m2 (If known)",
"walls": "Wall Type (Mandatory field)",
"tenure": "Tenure",
}
.copy()
.rename(
columns={
"address": "Address",
"postcode": "Postcode",
"uprn": "UPRN",
"current_epc_rating": "Pre EPC",
"current_sap_points": "EPC Source",
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
"property_type": "Property Type",
"built_form": "Built Form",
"total_floor_area": "Floor area m2 (If known)",
"walls": "Wall Type (Mandatory field)",
"tenure": "Tenure",
}
)
)
mds["Estimated bill (£ per year)"] = (
mds["heating_cost_current"] +
mds["hot_water_cost_current"] +
mds["lighting_cost_current"] +
mds["gas_standing_charge"] +
mds["electricity_standing_charge"]
mds["heating_cost_current"]
+ mds["hot_water_cost_current"]
+ mds["lighting_cost_current"]
+ mds["gas_standing_charge"]
+ mds["electricity_standing_charge"]
)
mds = mds.drop(
@ -261,65 +295,84 @@ class Outputs:
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge"
"electricity_standing_charge",
]
)
# Formatting - Pre EPC is an enum
mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0]
mds["Wall Type (Mandatory field)"] = (
mds["Wall Type (Mandatory field)"].str.split(",").str[0]
)
# Remove average thermal transmittance field
mds["Wall Type (Mandatory field)"] = np.where(
mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"),
mds["Wall Type (Mandatory field)"].str.contains(
"Average thermal transmittance"
),
"",
mds["Wall Type (Mandatory field)"]
mds["Wall Type (Mandatory field)"],
)
mds = mds.merge(
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]],
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
["clean_description", "fuel_type"]
],
left_on="mainfuel",
right_on="clean_description",
how="left"
how="left",
)
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
columns=["clean_description", "mainfuel"]
)
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"])
mds["Existing Fuel Type"].value_counts()
mds_output_by_scenario = {}
for scenario_id in scenario_ids:
scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id]
scenario_recommendations = recommendations_df[
recommendations_df["Scenario ID"] == scenario_id
]
# For each measure, we create the measure matrix
scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations)
scenario_measure_matrix = self.make_mds_measure_matrix(
scenario_recommendations
)
# Calculate the predicted impact on: SAP, heat demand, bills, kwh
recommendation_impacts = scenario_recommendations.groupby("property_id")[
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
].sum().reset_index()
recommendation_impacts = (
scenario_recommendations.groupby("property_id")[
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
]
.sum()
.reset_index()
)
scenario_mds = mds.merge(
scenario_measure_matrix, how="left", on="property_id"
).merge(
recommendation_impacts, how="left", on="property_id"
)
).merge(recommendation_impacts, how="left", on="property_id")
# If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
for col in to_clean:
scenario_mds[col].fillna(0, inplace=True)
scenario_mds.fillna(0, inplace=True)
scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"]
scenario_mds["Post SAP"] = (
scenario_mds["EPC Source"] + scenario_mds["sap_points"]
)
# Round Post SAP down to the nearest integer
scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x))
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
lambda x: sap_to_epc(x)
)
scenario_mds["Heating Demand Kwh/m2/y"] = (
scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"]
scenario_mds["Existing Heating Demand Kwh/m2/y"]
- scenario_mds["heat_demand"]
)
scenario_mds = scenario_mds.rename(
columns={
"sap_points": "Predicted SAP Points",
"kwh_savings": "Energy Saving (Kwh)",
"energy_cost_savings": "Bill Reduction (£ per yr)"
"energy_cost_savings": "Bill Reduction (£ per yr)",
}
)
@ -330,7 +383,7 @@ class Outputs:
save_excel_to_s3(
df=scenario_mds,
file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
bucket_name="retrofit-data-dev"
bucket_name="retrofit-data-dev",
)
def export(self):

View file

@ -1,5 +1,10 @@
from sqlalchemy import func
from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
from backend.app.db.models.recommendations import (
PlanModel,
PlanRecommendations,
Recommendation,
ScenarioModel,
)
def aggregate_portfolio_recommendations(
@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations(
scenario_id: int,
total_valuation_increase: float,
labour_days: float,
aggregated_data: dict
aggregated_data: dict,
):
# Aggregate multiple fields
aggregates = (
@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations(
func.sum(Recommendation.estimated_cost).label("cost"),
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
func.sum(Recommendation.kwh_savings).label("energy_savings"),
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
func.sum(Recommendation.co2_equivalent_savings).label(
"co2_equivalent_savings"
),
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
)
.join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.join(
PlanRecommendations,
PlanRecommendations.recommendation_id == Recommendation.id,
)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
Plan.portfolio_id == portfolio_id,
Plan.scenario_id == scenario_id,
Recommendation.default == True
PlanModel.portfolio_id == portfolio_id,
PlanModel.scenario_id == scenario_id,
Recommendation.default == True,
)
.one()
)
@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations(
"energy_savings": aggregates.energy_savings or 0,
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
"energy_cost_savings": aggregates.energy_cost_savings or 0,
**aggregated_data
**aggregated_data,
}
# Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one()
# Update the data
for key, value in aggregates_dict.items():

View file

@ -4,11 +4,11 @@ from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from backend.app.db.models.recommendations import (
Plan,
PlanModel,
Recommendation,
RecommendationMaterials,
PlanRecommendations,
Scenario,
ScenarioModel,
)
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session, db_read_session
@ -138,7 +138,7 @@ def create_plan(session: Session, plan):
:param plan: dictionary of data representing a plan to be created
"""
try:
new_plan = Plan(**plan)
new_plan = PlanModel(**plan)
session.add(new_plan)
session.flush()
session.commit()
@ -160,7 +160,9 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
for p in plans_to_create
]
stmt = insert(Plan).values(payload).returning(Plan.id, Plan.property_id)
stmt = (
insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id)
)
result = session.execute(stmt).all()
@ -170,12 +172,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
def create_scenario(session: Session, scenario: dict) -> int:
existing_scenario = (
session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first()
session.query(ScenarioModel)
.filter_by(portfolio_id=scenario["portfolio_id"])
.first()
)
scenario["is_default"] = not bool(existing_scenario)
new_scenario = Scenario(**scenario)
new_scenario = ScenarioModel(**scenario)
session.add(new_scenario)
session.flush() # ensures ID is populated
@ -578,7 +582,9 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int):
return
with db_session() as session:
session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id))
session.execute(
delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
)
print("Deleted scenarios for empty portfolio")
@ -611,11 +617,11 @@ def clear_portfolio_in_batches(
print("Portfolio cleared in batches.")
def get_plans_by_portfolio_id(portfolio_id: int) -> List[Plan]:
def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]:
raise NotImplementedError
def get_scenario(scenario_id: int) -> List[Scenario]:
def get_scenario(scenario_id: int) -> List[ScenarioModel]:
raise NotImplementedError

View file

@ -1,9 +1,18 @@
import enum
from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey
from sqlalchemy import (
Column,
Integer,
String,
Float,
Enum,
TIMESTAMP,
BigInteger,
ForeignKey,
)
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql import func
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from backend.app.db.models.materials import MaterialType, Material
Base = declarative_base()
@ -17,13 +26,17 @@ class SchemeEnum(enum.Enum):
class FundingPackage(Base):
__tablename__ = 'funding_package'
__tablename__ = "funding_package"
id = Column(Integer, primary_key=True, autoincrement=True)
plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False)
plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False)
scheme = Column(
Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
Enum(
SchemeEnum,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
project_funding = Column(Float)
@ -34,15 +47,23 @@ class FundingPackage(Base):
class FundingPackageMeasures(Base):
__tablename__ = 'funding_package_measures'
__tablename__ = "funding_package_measures"
id = Column(Integer, primary_key=True, autoincrement=True)
funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False)
measure = Column(
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
funding_package_id = Column(
BigInteger, ForeignKey(FundingPackage.id), nullable=False
)
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists
measure = Column(
Enum(
MaterialType,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
material_id = Column(
BigInteger, ForeignKey(Material.id), nullable=False
) # Assuming material table exists
innovation_uplift = Column(Float)
partial_project_score = Column(Float)
uplift_project_score = Column(Float)

View file

@ -74,7 +74,7 @@ class PlanTypeEnum(enum.Enum):
EXTRACTION_ECO = "extraction_eco"
class Plan(Base):
class PlanModel(Base):
__tablename__ = "plan"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
@ -139,7 +139,7 @@ class PlanRecommendations(Base):
)
class Scenario(Base):
class ScenarioModel(Base):
__tablename__ = "scenario"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)

View file

@ -1,12 +1,12 @@
from typing import List
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
class CategorisationLogic:
@staticmethod
def get_compliant_plans(plans: List[Plan]) -> List[Plan]:
def get_compliant_plans(plans: List[PlanModel]) -> List[PlanModel]:
raise NotImplementedError
@staticmethod
def get_cheapest_plan(plans: List[Plan]) -> Plan:
def get_cheapest_plan(plans: List[PlanModel]) -> PlanModel:
raise NotImplementedError

View file

@ -5,24 +5,24 @@ from backend.app.db.functions.recommendations_functions import (
get_property_ids,
set_plan_default,
)
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from backend.categorisation.categorisation_logic import CategorisationLogic
def process_portfolio(portfolio_id: int) -> None:
# Get all plans (including scenarios) for all properties in the portfolio
plans: List[Plan] = get_plans_by_portfolio_id(portfolio_id)
plans: List[PlanModel] = get_plans_by_portfolio_id(portfolio_id)
# For each property, get all compliant plans
property_ids: List[int] = get_property_ids(portfolio_id)
# For each property, find the cheapest compliant plan
for id in property_ids:
plans_for_property: List[Plan] = [
plans_for_property: List[PlanModel] = [
plan for plan in plans if plan.property_id == id
]
compliant_plans_for_property: List[Plan] = (
compliant_plans_for_property: List[PlanModel] = (
CategorisationLogic.get_compliant_plans(plans_for_property)
)

View file

@ -41,7 +41,10 @@ epc_data = pd.read_csv(
# Classify floor area in <73m2, 73-98, 99-200, 200+
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
lambda x: (
"<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
)
)
# 73-98 185
# <73 156
@ -65,7 +68,11 @@ import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids):
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids):
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=124, scenario_ids=[205]
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
post_install_sap = (
post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename(
"double_glazing": "Cost: Double Glazing",
"loft_insulation": "Cost: Loft Insulation",
"mechanical_ventilation": "Cost: Ventilation",
"solar_pv": "Cost: Solar PV"
"solar_pv": "Cost: Solar PV",
}
)
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = (
recommendations_measures_pivot["Cost: Solar PV"] > 0
)
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
df = (
properties_df[
[
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(post_install_sap, how="left", on="property_id")
)
df = df.drop(columns=["property_id"])
@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
# We fill missings:
for col in [
"Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
"Recommendation: Solar PV"
"Recommendation: Air Source Heat Pump",
"Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing",
"Recommendation: Loft Insulation",
"Recommendation: Ventilation",
"Recommendation: Solar PV",
]:
df[col] = df[col].fillna(False)
for col in [
"Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
"Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
"Cost: Solar PV"
"Cost: Air Source Heat Pump",
"Cost: Cavity Wall Insulation",
"Cost: Double Glazing",
"Cost: Loft Insulation",
"Cost: Ventilation",
"Cost: Solar PV",
]:
df[col] = df[col].fillna(0)
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
lambda x: sap_to_epc(x)
)
df["Recommendation: Air Source Heat Pump"].sum()
df["Cost: Air Source Heat Pump"].sum()
df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
df.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
index=False,
)

View file

@ -4,7 +4,11 @@ import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids):
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
else getattr(rec, col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
@ -94,16 +121,34 @@ def app():
)
property_asset_data = properties_df.merge(
mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
mod_property_data.drop(columns=["address", "postcode", "tenure"]),
how="left",
on="uprn",
)
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains(
"pitched", case=False
)
property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
property_asset_data["is_insulated"] = (
property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
["filled cavity", "with external insulation", "filled cavity and external insulation"]
) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
property_asset_data["wall_type"] = (
property_asset_data["walls"].str.split(" ").str[0].str.strip()
)
property_asset_data["is_insulated"] = property_asset_data["walls"].str.split(
","
).str[1].str.strip().isin(
[
"filled cavity",
"with external insulation",
"filled cavity and external insulation",
]
) | property_asset_data[
"walls"
].str.split(
","
).str[
2
].str.strip().isin(
["insulated"]
)
property_asset_data["is_insulated"] = np.where(
property_asset_data["is_insulated"], "Insulated", "Uninsulated"
@ -115,18 +160,26 @@ def app():
property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
)
archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
archetype_variables = [
"property_type",
"wall_type",
"is_insulated",
"is_pitched",
"pre_1970",
]
assigned_archetypes = (
property_asset_data.groupby(
archetype_variables
).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
property_asset_data.groupby(archetype_variables)
.size()
.reset_index()
.rename(columns={0: "n_properties"})
.sort_values("n_properties", ascending=False)
)
# Make the archetype ID a concatenation of the variables
assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
lambda x: "_".join(x.astype(str)), axis=1
)
assigned_archetypes["archetype_id"] = assigned_archetypes[
archetype_variables
].apply(lambda x: "_".join(x.astype(str)), axis=1)
# Most prominent archetypes
prominent_archetypes = assigned_archetypes.head(6)
@ -136,7 +189,7 @@ def app():
property_asset_data = property_asset_data.merge(
assigned_archetypes[archetype_variables + ["archetype_id"]],
how="left",
on=archetype_variables
on=archetype_variables,
)
# Create age bands:
@ -148,7 +201,7 @@ def app():
property_asset_data["age_band"] = pd.cut(
property_asset_data["BUILD_YEAR"],
bins=[1959, 1969, 1979, 1989, 1999, 2022],
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"],
)
# Create floor area bands
@ -159,47 +212,59 @@ def app():
property_asset_data["floor_area_band"] = pd.cut(
property_asset_data["total_floor_area"],
bins=[0, 73, 97, 199, 10000],
labels=["0-73", "74-97", "98-199", "200+"]
labels=["0-73", "74-97", "98-199", "200+"],
)
property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
property_asset_data["archetype_group"] = np.where(
property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
property_asset_data["archetype_id"].isin(
other_archetypes["archetype_id"].values
),
"other",
property_asset_data["archetype_group"]
property_asset_data["archetype_group"],
)
# For colour
wall_types = (
property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
columns={"wall_type": "Wall Type"}
)
property_asset_data[["wall_type"]]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"wall_type": "Wall Type"})
)
# Group into age bands
ages = (
property_asset_data[["age_band"]].value_counts()
property_asset_data[["age_band"]]
.value_counts()
.to_frame()
.reset_index().sort_values("age_band", ascending=True)
.reset_index()
.sort_values("age_band", ascending=True)
.rename(columns={"age_band": "Age Band"})
)
floor_area_bands = (
property_asset_data[["floor_area_band"]].value_counts()
property_asset_data[["floor_area_band"]]
.value_counts()
.to_frame()
.reset_index().sort_values("floor_area_band", ascending=True)
.reset_index()
.sort_values("floor_area_band", ascending=True)
.rename(columns={"floor_area_band": "Floor Area Band"})
)
archetype_counts = (
property_asset_data[["archetype_group"]].
value_counts().
to_frame().
reset_index()
property_asset_data[["archetype_group"]]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"archetype_group": "Archetype"})
)
property_types = (
(property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
value_counts().
to_frame().
reset_index()
(
property_asset_data["property_type"]
+ ": "
+ property_asset_data["built_form"]
)
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"index": "Property Type", 0: "Count"})
)
@ -217,18 +282,24 @@ def app():
totals = property_asset_data[
[
"Total_household_members",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
"co2_emissions",
"current_energy_demand",
"current_energy_demand_heating_hotwater",
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"appliances_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
].copy()
totals["total_cost"] = (
totals["heating_cost_current"] +
totals["hot_water_cost_current"] +
totals["lighting_cost_current"] +
totals["appliances_cost_current"] +
totals["gas_standing_charge"] +
totals["electricity_standing_charge"]
totals["heating_cost_current"]
+ totals["hot_water_cost_current"]
+ totals["lighting_cost_current"]
+ totals["appliances_cost_current"]
+ totals["gas_standing_charge"]
+ totals["electricity_standing_charge"]
)
print(
totals[
@ -259,38 +330,59 @@ def app():
scenario_recommendations_df = recommendations_df[
recommendations_df["Scenario ID"] == scenario
].copy()
].copy()
scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
scenario_recommendations_df["contingency"] = (
contingency * scenario_recommendations_df["estimated_cost"]
)
scenario_recommendations_df["total_cost"] = (
scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
scenario_recommendations_df["estimated_cost"]
+ scenario_recommendations_df["contingency"]
)
recommended_measures_df = scenario_recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df[
recommended_measures_df["default"]
]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
# Metrics by property ID
aggregated_metrics = scenario_recommendations_df[
[
"property_id", "type", "default", "sap_points",
"energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
"total_cost"
"property_id",
"type",
"default",
"sap_points",
"energy_cost_savings",
"kwh_savings",
"co2_equivalent_savings",
"estimated_cost",
"contingency",
"total_cost",
]
]
aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
aggregated_metrics = aggregated_metrics.groupby("property_id")[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].sum().reset_index()
aggregated_metrics = (
aggregated_metrics.groupby("property_id")[
[
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
"estimated_cost",
"total_cost",
"contingency",
]
]
.sum()
.reset_index()
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -299,30 +391,58 @@ def app():
for c in recommendations_measures_pivot.columns:
if c == "property_id":
continue
recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
recommendations_measures_pivot["Recommendation: " + c] = (
recommendations_measures_pivot[c] > 0
)
# We now create a final output
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
df = (
properties_df[
[
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
"co2_emissions",
"current_energy_demand",
"current_energy_demand_heating_hotwater",
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"appliances_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
aggregated_metrics, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(aggregated_metrics, how="left", on="property_id")
)
df["bills_total_cost"] = (
df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
df["heating_cost_current"]
+ df["hot_water_cost_current"]
+ df["lighting_cost_current"]
+ df["appliances_cost_current"]
+ df["gas_standing_charge"]
+ df["electricity_standing_charge"]
)
df = df.drop(columns=["property_id"])
for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
for c in [
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
]:
df[c] = df[c].fillna(0)
df = df.rename(
@ -345,16 +465,23 @@ def app():
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
lambda x: sap_to_epc(x)
)
# Calculate the relative savings on carbon, kwh, and bills
df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
df["relative_carbon_savings"] = (
df["co2_equivalent_savings"] / df["co2_emissions"]
)
df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]
# Add on the archetype
df = df.merge(
property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
property_asset_data[["uprn", "archetype_group"]],
how="left",
left_on="UPRN",
right_on="uprn",
)
# For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
@ -387,7 +514,9 @@ def app():
printing_scenario_id = scenario_ids[0]
# EPC breakdown
print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
print(
scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts()
)
# Cost
# Total cost
print(scenario_data[printing_scenario_id]["total_cost"].sum())
@ -408,16 +537,24 @@ def app():
measure_details = {}
for scenario in scenario_ids:
measure_details[scenario] = {}
recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
recommendation_cols = [
c for c in scenario_data[scenario].columns if "Recommendation:" in c
]
measure_details[scenario]["count"] = (
scenario_data[scenario][recommendation_cols].sum().to_dict()
)
# Get average cost per measure
measure_columns = [
c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
c.split("Recommendation: ")[1]
for c in scenario_data[scenario].columns
if "Recommendation:" in c
]
# Take the mean, drop zero columns
measure_costs = {}
for m in measure_columns:
measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
measure_costs[m] = float(
scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()
)
measure_details[scenario]["cost_per_measure"] = measure_costs
pprint(measure_details[scenario_ids[0]]["count"])
@ -452,12 +589,27 @@ def app():
for scenario in scenario_ids:
df = scenario_data[scenario].copy()
avg_savings = df[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].mean().to_dict()
avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
avg_savings = (
df[
[
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
"estimated_cost",
"total_cost",
"contingency",
]
]
.mean()
.to_dict()
)
avg_savings["cost_per_sap_point"] = (
avg_savings["total_cost"] / avg_savings["sap_points"]
)
avg_savings["cost_per_carbon"] = (
avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
)
scenario_metrics[scenario] = avg_savings
pprint(scenario_metrics[scenario_ids[0]])
@ -465,11 +617,11 @@ def app():
scenario_data[scenario_ids[0]]["loft_insulation"][
scenario_data[scenario_ids[0]]["loft_insulation"] > 0
].mean()
].mean()
scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
].mean()
].mean()
# Testing checking floor risk
@ -477,11 +629,7 @@ def app():
def get_flood_risk(lat, lon, radius_km=1):
url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
params = {
'lat': lat,
'long': lon,
'dist': radius_km # search radius in km
}
params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km
response = requests.get(url, params=params)
response.raise_for_status()
@ -495,20 +643,19 @@ def app():
print(f"{len(flood_warnings)} warning(s) found near the location:")
for warning in flood_warnings:
print(f"- Area: {warning.get('description')}")
print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
print(
f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})"
)
print(f" Message changed at: {warning.get('timeMessageChanged')}")
print()
return flood_warnings
from shapely.geometry import shape, Point
def get_flood_areas_near_point(lat, lon, radius_km=2):
url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
params = {
'lat': lat,
'long': lon,
'dist': radius_km
}
params = {"lat": lat, "long": lon, "dist": radius_km}
response = requests.get(url, params=params)
response.raise_for_status()
@ -531,7 +678,7 @@ def app():
if not features:
continue
flood_polygon = shape(features[0]['geometry'])
flood_polygon = shape(features[0]["geometry"])
try:
is_inside = flood_polygon.contains(point)
@ -539,12 +686,17 @@ def app():
is_inside = False
if is_inside:
print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
print(
f"📍 Point is inside flood area: {area['label']} ({area['notation']})"
)
return area
from tqdm import tqdm
floor_warnings_data = []
for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
for _, property in tqdm(
property_asset_data.iterrows(), total=len(property_asset_data)
):
# warnings = floor_warnings_data.extend(
# get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
# )
@ -556,7 +708,7 @@ def app():
"uprn": property["uprn"],
"address": property["address"],
"postcode": property["postcode"],
"area": resp
"area": resp,
}
)
continue
@ -570,7 +722,7 @@ def app():
"House_Cavity_Uninsulated_Pitched roof_Post 1970",
"other",
"House_System_Uninsulated_Pitched roof_Pre 1970",
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970",
]
values = [62, 36, 21, 16, 16, 4, 2]
@ -582,36 +734,39 @@ def app():
"Cavity wall insulation, ventilation",
"Bespoke retrofit measures",
"External wall insulation, roof insulation",
"Flat roof insulation, internal wall insulation"
"Flat roof insulation, internal wall insulation",
]
fig = go.Figure(go.Treemap(
labels=labels,
parents=[""] * len(labels), # No root
values=values,
hovertext=hovertext,
hoverinfo="text",
textinfo="none",
marker=dict(
line=dict(color="white", width=4),
colors=values,
colorscale="Blues"
fig = go.Figure(
go.Treemap(
labels=labels,
parents=[""] * len(labels), # No root
values=values,
hovertext=hovertext,
hoverinfo="text",
textinfo="none",
marker=dict(
line=dict(color="white", width=4), colors=values, colorscale="Blues"
),
)
))
)
fig.update_layout(
margin=dict(t=10, l=10, r=10, b=10),
plot_bgcolor="white",
paper_bgcolor="white"
margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white"
)
fig.show()
# Get the recommended measures by scenario id
recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
recommendation_cols
].sum().reset_index()
recommendation_cols = [
c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c
]
measure_counts_by_scenario = (
scenario_data[scenario_ids[1]]
.groupby("archetype_group")[recommendation_cols]
.sum()
.reset_index()
)
measure_counts_by_scenario.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
@ -630,15 +785,13 @@ def app():
to_append = {"uprn": uprn}
for _id in scenario_ids:
scenario = scenario_data[_id][
scenario_data[_id]["uprn"] == uprn
].squeeze()
scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze()
val = PropertyValuation.estimate_valuation_improvement(
current_value=x["valuation"],
current_epc=scenario["Current EPC Rating"].value,
target_epc=scenario["Predicted Post Works EPC"],
total_cost=None
total_cost=None,
)
to_append[_id] = val["average_increase"]

File diff suppressed because it is too large Load diff

View file

@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them
Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan
or recommendations in case something went wrong
"""
import pandas as pd
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
@ -19,8 +20,7 @@ from backend.app.db.connection import db_session
def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]:
return [
uprn
for (uprn,) in
session.query(PropertyModel.uprn)
for (uprn,) in session.query(PropertyModel.uprn)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
if uprn is not None
@ -34,7 +34,7 @@ with db_session() as session:
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)]
@ -44,7 +44,7 @@ missed_properties.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_failed_properties_to_restart_20260102.xlsx",
sheet_name="Standardised Asset List",
index=False
index=False,
)
# Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios:
@ -52,14 +52,14 @@ scenario_id = None
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
def count_plans_for_scenario(session: Session, scenario_id: int) -> int:
return session.execute(
select(func.count())
.select_from(Plan)
.where(Plan.scenario_id == scenario_id)
.select_from(PlanModel)
.where(PlanModel.scenario_id == scenario_id)
).scalar_one()
@ -69,8 +69,7 @@ with db_session() as session:
def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]:
result = session.execute(
select(Plan.id)
.where(Plan.scenario_id == scenario_id)
select(PlanModel.id).where(PlanModel.scenario_id == scenario_id)
)
return [row.id for row in result]
@ -84,7 +83,7 @@ from sqlalchemy.orm import Session
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
from sqlalchemy import text
@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)

View file

@ -5,6 +5,7 @@ This includes:
# EPC C, there should be a plan
2) If the plan is fabric first, make sure they are actually fabric first
"""
import pandas as pd
scenario_names = {
@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items():
)
# find properties that are below the scenario sap target, but have no recommended measures
df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
df["below_scenario_target"] = (
df["current_sap_points"] < scenario_sap_targets[scenario_id]
)
df["no_recommended_measures"] = df["sap_points"] == 0
df["zero_cost"] = df["total_retrofit_cost"] == 0
df["sap_points_above_zero"] = df["sap_points"] > 0
@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items():
].copy()
if scenario_sap_targets[scenario_id] == 81:
problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
problematic_properties = problematic_properties[
problematic_properties["property_type"] != "Flat"
]
zero_cost_above_zero_sap = df[
(df["sap_points_above_zero"] & df["zero_cost"])
@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items():
# pd.set_option('display.width', 1000)
# problematic_properties.head(len(problematic_properties))
print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
print(
f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})"
)
print(
f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})"
)
problems.append(problematic_properties)
problems.append(zero_cost_above_zero_sap)
@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"])
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal = pd.concat([sal, sal2])
@ -114,7 +123,7 @@ retry.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_problematic_properties_to_review_20260106.xlsx",
sheet_name="Standardised Asset List",
index=False
index=False,
)
# Delete associated plans
@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist()
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from sqlalchemy import select, delete
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import sessionmaker
def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
def get_property_ids_for_uprns(
session: Session, portfolio_id: int, uprns: list[int]
) -> list[int]:
return [
property.id
for property in session.query(PropertyModel)
.filter(
PropertyModel.portfolio_id == portfolio_id,
PropertyModel.uprn.in_(uprns)
PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns)
)
.all()
]
@ -149,15 +159,21 @@ with db_session() as session:
# Get all and delete plans for these property IDs
def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
def get_all_plans_for_property_ids(
session: Session, property_ids: list[int]
) -> list[PlanModel]:
return (
session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all()
)
def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
def get_ids_of_plans_for_deletion(
session: Session, property_ids: list[int]
) -> list[int]:
return [
plan.id
for plan in session.query(Plan)
.filter(Plan.property_id.in_(property_ids))
for plan in session.query(PlanModel)
.filter(PlanModel.property_id.in_(property_ids))
.all()
]
@ -168,7 +184,7 @@ with db_session() as session:
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
from sqlalchemy import text
@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)

View file

@ -3,31 +3,41 @@ from sqlalchemy.orm import Session
from sqlalchemy import text, select
from backend.app.db.connection import db_read_session
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
PORTFOLIO_ID = 435
with db_read_session() as session:
# Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419
estimated_epcs = session.query(PropertyDetailsEpcModel).filter(
# PropertyDetailsEpcModel.estimated == True,
PropertyDetailsEpcModel.property_id.in_(
session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID)
estimated_epcs = (
session.query(PropertyDetailsEpcModel)
.filter(
# PropertyDetailsEpcModel.estimated == True,
PropertyDetailsEpcModel.property_id.in_(
session.query(PropertyModel.id).filter(
PropertyModel.portfolio_id == PORTFOLIO_ID
)
)
)
).all()
.all()
)
# Get the ids
estimated_epc_ids = [epc.property_id for epc in estimated_epcs]
# I want to get the UPRNS for these properties, from the property model
with db_read_session() as session:
estimated_uprns = session.query(PropertyModel.uprn).filter(
PropertyModel.id.in_(
session.query(PropertyDetailsEpcModel.property_id).filter(
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
estimated_uprns = (
session.query(PropertyModel.uprn)
.filter(
PropertyModel.id.in_(
session.query(PropertyDetailsEpcModel.property_id).filter(
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
)
)
)
).all()
.all()
)
estimated_uprns_list = [uprn for (uprn,) in estimated_uprns]
@ -35,16 +45,16 @@ with db_read_session() as session:
sal_1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal_2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal = pd.concat([sal_1, sal_2])
sal = sal.drop_duplicates(subset=['epc_os_uprn'])
sal = sal.drop_duplicates(subset=["epc_os_uprn"])
estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy()
@ -55,20 +65,24 @@ SCENARIOS = [
# 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
# 859, # EPC C - no solid floor, ashp 3.0
# 885, # EPC B - fabric first, no solid floor, ashp 3.0
908, 909, 910
908,
909,
910,
]
# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids
with db_read_session() as session:
result = session.execute(
select(Plan.id, Plan.property_id)
.where(Plan.property_id.in_(estimated_epc_ids))
select(PlanModel.id, PlanModel.property_id).where(
PlanModel.property_id.in_(estimated_epc_ids)
)
)
plans = [
{
"plan_id": row.id,
"property_id": row.property_id,
} for row in result
}
for row in result
]
df = pd.DataFrame(plans)
@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)
# Store the SAL
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
"sal.xlsx")
filename = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
"sal.xlsx"
)
with pd.ExcelWriter(filename) as writer:
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer:
b1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 1"
sheet_name="batch 1",
)
b2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 2"
sheet_name="batch 2",
)
b3 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 3"
sheet_name="batch 3",
)
b4 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 4"
sheet_name="batch 4",
)
b5 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 5"
sheet_name="batch 5",
)
# Batch 6 should be the remaining
total = pd.concat([b1, b2, b3, b4, b5])
remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)]
# Create new output
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
"20260107 corrected batch 6 sal.xlsx")
filename = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
"20260107 corrected batch 6 sal.xlsx"
)
with pd.ExcelWriter(filename) as writer:
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer:
b5.to_excel(writer, sheet_name="batch 5", index=False)
remaining.to_excel(writer, sheet_name="batch 6", index=False)
all_together = pd.concat(
[b1, b2, b3, b4, b5, remaining]
)
all_together = pd.concat([b1, b2, b3, b4, b5, remaining])

View file

@ -110,14 +110,17 @@ import pandas as pd
# Solar PV savings - we need the amount of solar PV bill savings
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
RecommendationMaterials,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from collections import defaultdict
PORTFOLIO_ID = 485 # Peabody
SCENARIOS = [
970
]
SCENARIOS = [970]
scenario_names = {
970: "EPC C - no solid floor, ashp 3.0",
}
@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Properties
# --------------------
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties_data = [
{
**{col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns},
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns},
**{
col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for p in properties_query
]
@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Plans
# --------------------
plans_query = session.query(Plan).filter(
Plan.scenario_id.in_(scenario_ids)
).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendations (NO materials yet)
# --------------------
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan,
Plan.id == PlanRecommendations.plan_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False),
)
.all()
)
recommendations_data = [
{
**{col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns},
**{
col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns
},
"scenario_id": r.scenario_id,
"materials": [] # placeholder
"materials": [], # placeholder
}
for r in recommendations_query
]
@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendation materials (SEPARATE QUERY)
# --------------------
materials_query = session.query(
RecommendationMaterials
).filter(
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
).all()
materials_query = (
session.query(RecommendationMaterials)
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
.all()
)
# Group materials by recommendation_id
materials_by_recommendation = defaultdict(list)
for m in materials_query:
materials_by_recommendation[m.recommendation_id].append({
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
})
materials_by_recommendation[m.recommendation_id].append(
{
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
}
)
# Attach materials safely (no filtering side effects)
for r in recommendations_data:
@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
properties_df.to_excel(writer, sheet_name="properties", index=False)
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
# # Check tenures
# initial_asset_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "

View file

@ -4,7 +4,7 @@ import pandas as pd
full_sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
"SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
# ------Pull in the reduced sample ------
@ -12,7 +12,7 @@ full_sal = pd.read_excel(
reduced_sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
"ownership filtered sal.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
# ------ Pull in the confirmed ownership column from Peabody ------
@ -20,18 +20,20 @@ new_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
"- Peabody "
"- Data Extracts for Domna v2.xlsx",
sheet_name="Properties"
sheet_name="Properties",
)
correct_sample = new_asset_data[
~new_asset_data["AH Tenure"].isin(
["Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership"]
[
"Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership",
]
)
].copy()
@ -41,9 +43,7 @@ stuff_to_add = correct_sample[
~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
]["UPRN"].values
sal_to_add = full_sal[
full_sal["domna_property_id"].isin(stuff_to_add)
].copy()
sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy()
# ------- Stuff to remove -------
stuff_to_remove = reduced_sal[
@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session, db_read_session
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()

View file

@ -7,7 +7,7 @@ from sqlalchemy.sql import true
from backend.app.db.utils import row2dict
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from backend.app.utils import sap_to_epc
EPC_COLOURS = {
@ -17,7 +17,7 @@ EPC_COLOURS = {
"D": "#fdd401",
"E": "#fdab67",
"F": "#ee8023",
"G": "#e71437"
"G": "#e71437",
}
@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id:
its associated default recommendations if any.
"""
# Adjust the join to correctly filter recommendations while including all properties
query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
(Recommendation.property_id == PropertyModel.id) & (
Recommendation.default == true())) \
.filter(PropertyModel.portfolio_id == portfolio_id) \
query = (
session.query(PropertyModel, Recommendation)
.outerjoin(
Recommendation,
(Recommendation.property_id == PropertyModel.id)
& (Recommendation.default == true()),
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties = {}
for property, recommendation in query:
# Ensure the property is added once with an empty list of recommendations initially
if property.id not in properties:
properties[property.id] = row2dict(property)
properties[property.id]['recommendations'] = []
properties[property.id]["recommendations"] = []
# Append recommendations if they exist and meet the criteria (already filtered by the query)
if recommendation and recommendation.default:
properties[property.id]['recommendations'].append(row2dict(recommendation))
properties[property.id]["recommendations"].append(row2dict(recommendation))
return list(properties.values())
@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
:return: A list of dictionaries, where each dictionary represents a property's details.
Returns an empty list if no property details are found.
"""
property_details = session.query(PropertyDetailsEpcModel).filter(
PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
property_details = (
session.query(PropertyDetailsEpcModel)
.filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id)
.all()
)
# Convert the SQLAlchemy objects to dictionaries
property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
property_details_dict = (
[row2dict(pd) for pd in property_details] if property_details else []
)
return property_details_dict
@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
:return: A list of dictionaries, where each dictionary represents a plan.
Returns an empty list if no plans are found.
"""
plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
plans = (
session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all()
)
# Convert the SQLAlchemy objects to dictionaries
plans_dict = [row2dict(plan) for plan in plans] if plans else []
@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
return plans_dict
def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
def plot_epc_distribution(
df,
customer_key,
title="Your Units",
background_color="white",
bar_height=0.4,
font_size=15,
):
"""
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color
:param font_size: Base font size for text annotations (default 15)
"""
# Calculate dynamic figure size or adjust based on preferences
square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries
square_size = max(
6, len(df) * 0.6
) # Ensure minimum size and adjust based on number of entries
fig, ax = plt.subplots(figsize=(square_size, square_size))
fig.patch.set_facecolor(background_color) # Set figure background color
ax.set_facecolor(background_color) # Set axes background color
df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values('percentage', ascending=True)
df["percentage"] = df["percentage"].round(
1
) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values("percentage", ascending=True)
# Plot bars with specified height for adjustable thickness
bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
bars = ax.barh(
df_sorted["current_epc_rating"],
df_sorted["percentage"],
color=df_sorted["current_epc_rating"].map(EPC_COLOURS),
edgecolor="none",
height=bar_height,
)
epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size
count_percentage_font_size = font_size # Count (percentage) font size as base font size
epc_rating_font_size = (
font_size * 2
) # EPC rating font size larger than base font size
count_percentage_font_size = (
font_size # Count (percentage) font size as base font size
)
# Annotate bars with EPC ratings inside and count with percentage values outside
for index, bar in enumerate(bars):
width = bar.get_width()
epc_rating = df_sorted.iloc[index]['current_epc_rating']
count = df_sorted.iloc[index]['count']
percentage = df_sorted.iloc[index]['percentage']
epc_rating = df_sorted.iloc[index]["current_epc_rating"]
count = df_sorted.iloc[index]["count"]
percentage = df_sorted.iloc[index]["percentage"]
# EPC rating inside the bar with increased font size
ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
ax.text(
width - (width * 0.05),
bar.get_y() + bar.get_height() / 2,
f"{epc_rating}",
va="center",
ha="right",
color="white",
fontsize=epc_rating_font_size,
)
# Count and percentage outside the bar, original font size
ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
ax.text(
width + 1,
bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)",
va="center",
color="black",
fontsize=count_percentage_font_size,
)
ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally
ax.tick_params(axis='x', which='both', bottom=False, top=False,
labelbottom=False) # Remove x-axis tick marks and values
ax.tick_params(axis='y', which='both', left=False, right=False,
labelleft=False) # Remove y-axis tick marks and labels
ax.spines['top'].set_visible(False) # Remove top spine
ax.spines['right'].set_visible(False) # Remove right spine
ax.spines['left'].set_visible(False) # Remove left spine
ax.spines['bottom'].set_visible(False) # Remove bottom spine
ax.set_title(
title, fontsize=font_size * 1.2
) # Adjust title font size proportionally
ax.tick_params(
axis="x", which="both", bottom=False, top=False, labelbottom=False
) # Remove x-axis tick marks and values
ax.tick_params(
axis="y", which="both", left=False, right=False, labelleft=False
) # Remove y-axis tick marks and labels
ax.spines["top"].set_visible(False) # Remove top spine
ax.spines["right"].set_visible(False) # Remove right spine
ax.spines["left"].set_visible(False) # Remove left spine
ax.spines["bottom"].set_visible(False) # Remove bottom spine
plt.tight_layout() # Adjust layout
plt.show()
# Save the figure as an image
figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
fig.savefig(figure_path, bbox_inches='tight')
figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png"
fig.savefig(figure_path, bbox_inches="tight")
plt.close(fig) # Close the figure to free memory
return fig, figure_path
def save_plot_to_image(figure, path='plot.png'):
def save_plot_to_image(figure, path="plot.png"):
"""
Saves a matplotlib figure to an image file for insertion into PowerPoint.
"""
figure.savefig(path, bbox_inches='tight')
figure.savefig(path, bbox_inches="tight")
plt.close(figure)
def save_figure_as_image(figure, filename='temp_plot.png'):
def save_figure_as_image(figure, filename="temp_plot.png"):
"""
Saves a matplotlib figure to an image file.
"""
figure.savefig(filename, dpi=300)
plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments
plt.close(
figure
) # Close the figure to prevent it from displaying in notebooks or Python environments
def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
height_inches=Inches(2)):
def add_commentary_with_bullets(
slide,
commentary,
top_inches,
left_inches=Inches(1),
width_inches=Inches(8),
height_inches=Inches(2),
):
"""
Adds commentary with bullet points to a slide.
@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
:param width_inches: The width of the commentary text box.
:param height_inches: The height of the commentary text box.
"""
txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
txBox = slide.shapes.add_textbox(
left_inches, top_inches, width_inches, height_inches
)
tf = txBox.text_frame
# Configure text frame
@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
for i, section in enumerate(sections):
if i > 0:
p = tf.add_paragraph() # Add a new paragraph for each section after the first
p = (
tf.add_paragraph()
) # Add a new paragraph for each section after the first
else:
p = tf.paragraphs[0] # Use the first paragraph for the first section
p.text = section
@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None):
# Determine the position of the commentary text box based on whether an image is included
if img_path:
# Add the image
slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
slide.shapes.add_picture(
img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)
)
# Position for commentary when image is present
commentary_top = Inches(6)
else:
@ -237,16 +300,18 @@ def create_powerpoint(data, save_location):
prs = Presentation()
for slide, slide_data in data.items():
slide_figure_path = data[slide].get('image_path')
text = data[slide].get('text')
title = data[slide].get('title', "")
slide_figure_path = data[slide].get("image_path")
text = data[slide].get("text")
title = data[slide].get("title", "")
add_slide_with_image(prs, title, slide_figure_path, text)
# Save the presentation
prs.save(save_location)
def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
def create_recommendations_summary(
recommendations_df, properties_df, property_details_df, sap_target
):
# Aggregate the impact of the recommendations
# We want:
# Total number of sap points
@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d
# total bill savings
# total cost
# Total Co2 impact
recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum")
).reset_index()
recommendations_summary = (
recommendations_df.groupby(["property_id"])
.agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum"),
)
.reset_index()
)
# Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
recommendations_summary = recommendations_summary.merge(
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
how="left"
properties_df[["id", "uprn", "current_sap_points"]].rename(
columns={"id": "property_id"}
),
on="property_id",
how="left",
)
recommendations_summary["expected_sap_points"] = (
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
recommendations_summary["current_sap_points"]
+ recommendations_summary["total_sap_points"]
)
recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
lambda x: sap_to_epc(x)
recommendations_summary["expected_epc_rating"] = recommendations_summary[
"expected_sap_points"
].apply(lambda x: sap_to_epc(x))
recommendations_summary["sap_difference"] = (
sap_target - recommendations_summary["expected_sap_points"]
)
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
if property_details_df is not None:
recommendations_summary = recommendations_summary.merge(
property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
property_details_df[
["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]
].rename(
columns={
"id": "property_id",
"co2_emissions": "current_co2",
"adjusted_energy_consumption": "current_energy",
"energy_bill": "current_energy_bill"
"energy_bill": "current_energy_bill",
}
),
on="uprn",
how="left"
how="left",
)
return recommendations_summary

View file

@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine, db_read_session
from backend.app.db.models.recommendations import (
Recommendation,
Plan,
PlanModel,
PlanRecommendations,
RecommendationMaterials,
)
@ -73,12 +73,12 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
latest_plans_subq = (
session.query(
Plan.scenario_id,
Plan.property_id,
func.max(Plan.created_at).label("latest_created_at"),
PlanModel.scenario_id,
PlanModel.property_id,
func.max(PlanModel.created_at).label("latest_created_at"),
)
.filter(Plan.scenario_id.in_(scenario_ids))
.group_by(Plan.scenario_id, Plan.property_id)
.filter(PlanModel.scenario_id.in_(scenario_ids))
.group_by(PlanModel.scenario_id, PlanModel.property_id)
.subquery()
)
@ -87,12 +87,12 @@ def get_data(portfolio_id, scenario_ids):
# ).all()
plans_query = (
session.query(Plan)
session.query(PlanModel)
.join(
latest_plans_subq,
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
& (Plan.property_id == latest_plans_subq.c.property_id)
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
(PlanModel.scenario_id == latest_plans_subq.c.scenario_id)
& (PlanModel.property_id == latest_plans_subq.c.property_id)
& (PlanModel.created_at == latest_plans_subq.c.latest_created_at),
)
.all()
)
@ -108,7 +108,7 @@ def get_data(portfolio_id, scenario_ids):
# )
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -118,12 +118,14 @@ def get_data(portfolio_id, scenario_ids):
# Recommendations (NO materials yet)
# --------------------
recommendations_query = (
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
session.query(
Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id
)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),