Merge pull request #708 from Hestia-Homes/feature/automate-categorisation-of-works

Automate categorisation of works - local runner
This commit is contained in:
Daniel Roth 2026-02-16 12:37:13 +00:00 committed by GitHub
commit a4ae2ea26a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 2967 additions and 1483 deletions

View file

@ -6,7 +6,7 @@
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
"source=${localEnv:HOME},target=/home/vscode,type=bind"
],
"customizations": {
"vscode": {
@ -22,7 +22,11 @@
"corentinartaud.pdfpreview",
"ms-python.vscode-python-envs",
"ms-python.black-formatter",
"waderyan.gitblame"
"waderyan.gitblame",
"GrapeCity.gc-excelviewer",
"jakobhoeg.vscode-pokemon",
"github.vscode-github-actions",
"me-dutour-mathieu.vscode-github-actions"
],
"settings": {
"files.defaultWorkspace": "/workspaces/model",
@ -38,3 +42,4 @@
"PYTHONFLAGS": "-Xfrozen_modules=off"
}
}

10
.vscode/settings.json vendored
View file

@ -9,12 +9,14 @@
"path": "/bin/bash"
}
},
<<<<<<< HEAD
=======
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"],
"python.languageServer": "Pylance",
"python.analysis.typeCheckingMode": "strict",
"python.analysis.autoSearchPaths": true,
"python.analysis.extraPaths": ["./src"]
// Hot reload setting that needs to be in user settings
// "jupyter.runStartupCommands": [

View file

@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3
from backend.app.utils import sap_to_epc
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
class Outputs:
@ -42,7 +46,7 @@ class Outputs:
"flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
"room_in_roof_insulation": "RIR (POA - Prov sum only)",
"ev_charging": "EV Charging",
"battery": "Battery"
"battery": "Battery",
}
def __init__(self, format, portfolio_id):
@ -67,28 +71,38 @@ class Outputs:
# Download cleaned data
self.cleaned_epc_lookup = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
bucket_name="retrofit-data-dev",
)
self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)
def get_properties_from_db(self):
# Get properties and their details for a specific portfolio
properties_query = self.session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID
).all()
properties_query = (
self.session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(
PropertyModel.portfolio_id
== self.portfolio_id # Filter by portfolio ID
)
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
@ -96,10 +110,14 @@ class Outputs:
def get_plans_from_db(self):
plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all()
plans_query = (
self.session.query(PlanModel)
.filter(PlanModel.portfolio_id == self.portfolio_id)
.all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -107,28 +125,38 @@ class Outputs:
def get_recommendations_from_db(self, plan_ids):
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = self.session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
self.session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{
**{
col.name: getattr(rec.Recommendation, col.name) if
hasattr(rec, 'Recommendation') else getattr(rec, col.name)
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id
} for rec in recommendations_query
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
return recommendations_data
@ -148,7 +176,9 @@ class Outputs:
measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)
# If the property_id already exists in the collected rows, update it
existing_row = next((item for item in rows if item["property_id"] == property_id), None)
existing_row = next(
(item for item in rows if item["property_id"] == property_id), None
)
if existing_row is None:
# Create a new row if the property_id doesn't exist
new_row = {measure: None for measure in all_measures}
@ -196,7 +226,7 @@ class Outputs:
properties_data = self.get_properties_from_db()
plans_data = self.get_plans_from_db()
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
recommendations_data = self.get_recommendations_from_db(plan_ids)
self.session.close()
@ -209,50 +239,54 @@ class Outputs:
scenario_ids = plans_df["scenario_id"].unique()
# We start to create the MDS sheet
mds = properties_df[
[
"property_id",
"address",
"postcode",
"uprn",
"current_epc_rating",
"current_sap_points",
"primary_energy_consumption",
"property_type",
"built_form",
"total_floor_area",
"walls",
"tenure",
"mainfuel",
# The bills columns are split out - we include them and aggregate, without appliances
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge"
mds = (
properties_df[
[
"property_id",
"address",
"postcode",
"uprn",
"current_epc_rating",
"current_sap_points",
"primary_energy_consumption",
"property_type",
"built_form",
"total_floor_area",
"walls",
"tenure",
"mainfuel",
# The bills columns are split out - we include them and aggregate, without appliances
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
]
].copy().rename(
columns={
"address": "Address",
"postcode": "Postcode",
"uprn": "UPRN",
"current_epc_rating": "Pre EPC",
"current_sap_points": "EPC Source",
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
"property_type": "Property Type",
"built_form": "Built Form",
"total_floor_area": "Floor area m2 (If known)",
"walls": "Wall Type (Mandatory field)",
"tenure": "Tenure",
}
.copy()
.rename(
columns={
"address": "Address",
"postcode": "Postcode",
"uprn": "UPRN",
"current_epc_rating": "Pre EPC",
"current_sap_points": "EPC Source",
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
"property_type": "Property Type",
"built_form": "Built Form",
"total_floor_area": "Floor area m2 (If known)",
"walls": "Wall Type (Mandatory field)",
"tenure": "Tenure",
}
)
)
mds["Estimated bill (£ per year)"] = (
mds["heating_cost_current"] +
mds["hot_water_cost_current"] +
mds["lighting_cost_current"] +
mds["gas_standing_charge"] +
mds["electricity_standing_charge"]
mds["heating_cost_current"]
+ mds["hot_water_cost_current"]
+ mds["lighting_cost_current"]
+ mds["gas_standing_charge"]
+ mds["electricity_standing_charge"]
)
mds = mds.drop(
@ -261,65 +295,84 @@ class Outputs:
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge"
"electricity_standing_charge",
]
)
# Formatting - Pre EPC is an enum
mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0]
mds["Wall Type (Mandatory field)"] = (
mds["Wall Type (Mandatory field)"].str.split(",").str[0]
)
# Remove average thermal transmittance field
mds["Wall Type (Mandatory field)"] = np.where(
mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"),
mds["Wall Type (Mandatory field)"].str.contains(
"Average thermal transmittance"
),
"",
mds["Wall Type (Mandatory field)"]
mds["Wall Type (Mandatory field)"],
)
mds = mds.merge(
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]],
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
["clean_description", "fuel_type"]
],
left_on="mainfuel",
right_on="clean_description",
how="left"
how="left",
)
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
columns=["clean_description", "mainfuel"]
)
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"])
mds["Existing Fuel Type"].value_counts()
mds_output_by_scenario = {}
for scenario_id in scenario_ids:
scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id]
scenario_recommendations = recommendations_df[
recommendations_df["Scenario ID"] == scenario_id
]
# For each measure, we create the measure matrix
scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations)
scenario_measure_matrix = self.make_mds_measure_matrix(
scenario_recommendations
)
# Calculate the predicted impact on: SAP, heat demand, bills, kwh
recommendation_impacts = scenario_recommendations.groupby("property_id")[
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
].sum().reset_index()
recommendation_impacts = (
scenario_recommendations.groupby("property_id")[
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
]
.sum()
.reset_index()
)
scenario_mds = mds.merge(
scenario_measure_matrix, how="left", on="property_id"
).merge(
recommendation_impacts, how="left", on="property_id"
)
).merge(recommendation_impacts, how="left", on="property_id")
# If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
for col in to_clean:
scenario_mds[col].fillna(0, inplace=True)
scenario_mds.fillna(0, inplace=True)
scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"]
scenario_mds["Post SAP"] = (
scenario_mds["EPC Source"] + scenario_mds["sap_points"]
)
# Round Post SAP down to the nearest integer
scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x))
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
lambda x: sap_to_epc(x)
)
scenario_mds["Heating Demand Kwh/m2/y"] = (
scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"]
scenario_mds["Existing Heating Demand Kwh/m2/y"]
- scenario_mds["heat_demand"]
)
scenario_mds = scenario_mds.rename(
columns={
"sap_points": "Predicted SAP Points",
"kwh_savings": "Energy Saving (Kwh)",
"energy_cost_savings": "Bill Reduction (£ per yr)"
"energy_cost_savings": "Bill Reduction (£ per yr)",
}
)
@ -330,7 +383,7 @@ class Outputs:
save_excel_to_s3(
df=scenario_mds,
file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
bucket_name="retrofit-data-dev"
bucket_name="retrofit-data-dev",
)
def export(self):

View file

@ -1,5 +1,10 @@
from sqlalchemy import func
from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
from backend.app.db.models.recommendations import (
PlanModel,
PlanRecommendations,
Recommendation,
ScenarioModel,
)
def aggregate_portfolio_recommendations(
@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations(
scenario_id: int,
total_valuation_increase: float,
labour_days: float,
aggregated_data: dict
aggregated_data: dict,
):
# Aggregate multiple fields
aggregates = (
@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations(
func.sum(Recommendation.estimated_cost).label("cost"),
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
func.sum(Recommendation.kwh_savings).label("energy_savings"),
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
func.sum(Recommendation.co2_equivalent_savings).label(
"co2_equivalent_savings"
),
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
)
.join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.join(
PlanRecommendations,
PlanRecommendations.recommendation_id == Recommendation.id,
)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
Plan.portfolio_id == portfolio_id,
Plan.scenario_id == scenario_id,
Recommendation.default == True
PlanModel.portfolio_id == portfolio_id,
PlanModel.scenario_id == scenario_id,
Recommendation.default == True,
)
.one()
)
@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations(
"energy_savings": aggregates.energy_savings or 0,
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
"energy_cost_savings": aggregates.energy_cost_savings or 0,
**aggregated_data
**aggregated_data,
}
# Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one()
# Update the data
for key, value in aggregates_dict.items():

View file

@ -1,17 +1,33 @@
from sqlalchemy import text
from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from typing import Any, Dict, List, Optional
from sqlalchemy import inspect, text, insert, delete, select, update
from sqlalchemy.orm import Session, Mapper
from sqlalchemy.exc import SQLAlchemyError
from sqlmodel import Session
from backend.app.db.models.recommendations import (
Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
PlanModel,
Recommendation,
RecommendationMaterials,
PlanRecommendations,
ScenarioModel,
)
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session, db_read_session
def prepare_plan_data(
p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations,
rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0,
p,
body,
scenario_id,
eco_packages,
valuations,
new_sap_points,
new_epc,
default_recommendations,
rebaselining_carbon=0,
rebaselining_heat_demand=0,
rebaselining_kwh=0,
rebaselining_bills=0,
):
"""
Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured
@ -32,21 +48,37 @@ def prepare_plan_data(
"""
# Plan carbon savings
co2_savings = sum(
[r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)]
[
r["co2_equivalent_savings"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings
# Plan bill savings
energy_bill_savings = sum(
[r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)]
[
r["energy_cost_savings"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
post_energy_bill = (
sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
)
post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
# energy consumption
energy_consumption_savings = sum(
[r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)]
[
r["kwh_savings"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
post_energy_consumption = (
p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
)
post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
valuation_post_retrofit, valuation_increase = None, None
if valuations["current_value"]:
@ -54,9 +86,19 @@ def prepare_plan_data(
valuation_post_retrofit = valuations["average_increased_value"]
# plan costing data
cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)])
cost_of_works = sum(
[
r["total"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
contingency_cost = sum(
[r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)]
[
r.get("contingency", 0)
for r in default_recommendations
if not r.get("already_installed", False)
]
)
return {
@ -86,7 +128,7 @@ def prepare_plan_data(
"valuation_increase": valuation_increase,
"cost_of_works": float(cost_of_works),
"contingency_cost": float(contingency_cost),
"plan_type": eco_packages.get(p.id, (None, None, None))[2]
"plan_type": eco_packages.get(p.id, (None, None, None))[2],
}
@ -97,7 +139,7 @@ def create_plan(session: Session, plan):
:param plan: dictionary of data representing a plan to be created
"""
try:
new_plan = Plan(**plan)
new_plan = PlanModel(**plan)
session.add(new_plan)
session.flush()
session.commit()
@ -120,9 +162,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
]
stmt = (
insert(Plan)
.values(payload)
.returning(Plan.id, Plan.property_id)
insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id)
)
result = session.execute(stmt).all()
@ -133,14 +173,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
def create_scenario(session: Session, scenario: dict) -> int:
existing_scenario = (
session.query(Scenario)
session.query(ScenarioModel)
.filter_by(portfolio_id=scenario["portfolio_id"])
.first()
)
scenario["is_default"] = not bool(existing_scenario)
new_scenario = Scenario(**scenario)
new_scenario = ScenarioModel(**scenario)
session.add(new_scenario)
session.flush() # ensures ID is populated
@ -167,7 +207,9 @@ def create_recommendation(session: Session, recommendation):
raise e
def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
def create_recommendation_material(
session: Session, recommendation_id, material_id, depth
):
"""
This function will create a record for the recommendation_material in the database if it does not exist.
:param session: The databse session
@ -177,9 +219,7 @@ def create_recommendation_material(session: Session, recommendation_id, material
"""
new_recommendation_material = RecommendationMaterials(
recommendation_id=recommendation_id,
material_id=material_id,
depth=depth
recommendation_id=recommendation_id, material_id=material_id, depth=depth
)
session.add(new_recommendation_material)
session.flush()
@ -196,13 +236,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
"""
# Prepare a list of dictionaries for bulk insert
data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids]
data = [
{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids
]
# Bulk insert using SQLAlchemy's core API
session.execute(insert(PlanRecommendations).values(data))
def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
def upload_recommendations(
session: Session, recommendations_to_upload, property_id, new_plan_id
):
try:
# Prepare data for bulk insert for Recommendation
recommendations_data = [
@ -213,8 +257,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"description": rec["description"],
"estimated_cost": float(rec["total"]),
"default": rec["default"],
"starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None,
"new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None,
"starting_u_value": (
float(rec.get("starting_u_value"))
if rec.get("starting_u_value")
else None
),
"new_u_value": (
float(rec.get("new_u_value")) if rec.get("new_u_value") else None
),
"sap_points": float(rec["sap_points"]),
"energy_savings": float(rec["heat_demand"]),
"kwh_savings": float(rec["kwh_savings"]),
@ -223,13 +273,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"energy_cost_savings": float(rec["energy_cost_savings"]),
"labour_days": float(rec["labour_days"]),
"already_installed": rec["already_installed"],
"heat_demand": float(rec["heat_demand"])
"heat_demand": float(rec["heat_demand"]),
}
for rec in recommendations_to_upload
]
# Insert the recommendations, get back the IDs
stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
stmt = (
insert(Recommendation)
.returning(Recommendation.id)
.values(recommendations_data)
)
result = session.execute(stmt)
uploaded_recommendation_ids = [row[0] for row in result]
@ -243,11 +297,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"quantity_unit": part.get("quantity_unit", None),
"estimated_cost": float(part.get("total", part.get("total_cost"))),
}
for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
for rec, recommendation_id in zip(
recommendations_to_upload, uploaded_recommendation_ids
)
for part in rec["parts"]
]
session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
session.bulk_insert_mappings(
RecommendationMaterials, recommendation_materials_data
)
# flush the changes to get the newly created IDs
session.flush()
@ -283,25 +341,27 @@ def bulk_upload_recommendations_and_materials(
plan_ids_by_index = []
for rec in recommendation_payload:
recommendation_rows.append({
"property_id": rec["property_id"],
"type": rec["type"],
"measure_type": rec["measure_type"],
"description": rec["description"],
"estimated_cost": rec["estimated_cost"],
"default": rec["default"],
"starting_u_value": rec["starting_u_value"],
"new_u_value": rec["new_u_value"],
"sap_points": rec["sap_points"],
"heat_demand": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"energy_savings": rec["energy_savings"],
"energy_cost_savings": rec["energy_cost_savings"],
"total_work_hours": rec["total_work_hours"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
})
recommendation_rows.append(
{
"property_id": rec["property_id"],
"type": rec["type"],
"measure_type": rec["measure_type"],
"description": rec["description"],
"estimated_cost": rec["estimated_cost"],
"default": rec["default"],
"starting_u_value": rec["starting_u_value"],
"new_u_value": rec["new_u_value"],
"sap_points": rec["sap_points"],
"heat_demand": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"energy_savings": rec["energy_savings"],
"energy_cost_savings": rec["energy_cost_savings"],
"total_work_hours": rec["total_work_hours"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
}
)
parts_by_index.append(rec["parts"])
plan_ids_by_index.append(rec["plan_id"])
@ -310,9 +370,7 @@ def bulk_upload_recommendations_and_materials(
# 2. Insert recommendations and get IDs
# ---------------------------------------------------------
result = session.execute(
insert(Recommendation)
.values(recommendation_rows)
.returning(Recommendation.id)
insert(Recommendation).values(recommendation_rows).returning(Recommendation.id)
)
recommendation_ids = [row[0] for row in result]
@ -324,19 +382,19 @@ def bulk_upload_recommendations_and_materials(
for recommendation_id, parts in zip(recommendation_ids, parts_by_index):
for part in parts:
materials_rows.append({
"recommendation_id": recommendation_id,
"material_id": part["material_id"],
"depth": part["depth"],
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["estimated_cost"],
})
materials_rows.append(
{
"recommendation_id": recommendation_id,
"material_id": part["material_id"],
"depth": part["depth"],
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["estimated_cost"],
}
)
if materials_rows:
session.execute(
insert(RecommendationMaterials).values(materials_rows)
)
session.execute(insert(RecommendationMaterials).values(materials_rows))
# ---------------------------------------------------------
# 4. Insert plan ↔ recommendation links
@ -346,26 +404,22 @@ def bulk_upload_recommendations_and_materials(
"plan_id": plan_id,
"recommendation_id": recommendation_id,
}
for plan_id, recommendation_id in zip(
plan_ids_by_index, recommendation_ids
)
for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids)
]
session.execute(
insert(PlanRecommendations).values(plan_recommendation_rows)
)
session.execute(insert(PlanRecommendations).values(plan_recommendation_rows))
def chunked(iterable, size=100):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
def get_property_ids(portfolio_id: int) -> list[int]:
with db_read_session() as session:
return [
pid for (pid,) in
session.query(PropertyModel.id)
pid
for (pid,) in session.query(PropertyModel.id)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
]
@ -381,12 +435,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# recommendation_materials (via recommendation)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING recommendation r
WHERE rm.recommendation_id = r.id
AND r.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -394,12 +450,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# plan_recommendations (via plan)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations pr
USING plan p
WHERE pr.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -407,13 +465,15 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# funding_package_measures
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM funding_package_measures fpm
USING funding_package fp, plan p
WHERE fpm.funding_package_id = fp.id
AND fp.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -421,10 +481,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# inspections (direct)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM inspections
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -432,12 +494,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# funding_package
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM funding_package fp
USING plan p
WHERE fp.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -445,10 +509,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# recommendation (direct — CRITICAL FIX)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -456,10 +522,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# plan (direct)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -467,18 +535,22 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# property-scoped tables
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM property_details_epc
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
session.execute(
text("""
text(
"""
DELETE FROM property_targets
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -486,10 +558,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# properties LAST
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM property
WHERE id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -510,8 +584,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int):
with db_session() as session:
session.execute(
delete(Scenario)
.where(Scenario.portfolio_id == portfolio_id)
delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
)
print("Deleted scenarios for empty portfolio")
@ -530,6 +603,7 @@ def clear_portfolio_in_batches(
total = (len(property_ids) + property_batch_size - 1) // property_batch_size
import time
for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1):
print(f"Deleting batch {i}/{total} ({len(batch)} properties)")
start_time = time.time()
@ -542,3 +616,61 @@ def clear_portfolio_in_batches(
delete_portfolio_scenarios_if_empty(portfolio_id)
print("Portfolio cleared in batches.")
def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]:
stmt = select(PlanModel).where(PlanModel.portfolio_id == portfolio_id)
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance...
return session_any.exec(stmt).scalars().all()
def get_scenario(scenario_id: int) -> Optional[ScenarioModel]:
stmt = select(ScenarioModel).where(ScenarioModel.id == scenario_id)
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance...
return session_any.exec(stmt).scalar_one_or_none()
def bulk_update_plans(
plan_models: List[PlanModel],
scenario_models: List[ScenarioModel],
) -> int:
if not plan_models:
return 0
with db_read_session() as session:
plan_mapper: Mapper[Any] = inspect(PlanModel)
scenario_mapper: Mapper[Any] = inspect(ScenarioModel)
plan_mappings: List[Dict[str, Any]] = (
[]
) # Typehint as Any to satisfy Pylance...
for plan in plan_models:
data: Dict[str, Any] = {
c.name: getattr(plan, c.name)
for c in plan.__table__.columns
if c.name != "id"
}
data["id"] = plan.id
plan_mappings.append(data)
session.bulk_update_mappings(plan_mapper, plan_mappings)
scenario_mappings: List[Dict[str, Any]] = (
[]
) # Typehint as Any to satisfy Pylance...
for scenario in scenario_models:
data: Dict[str, Any] = {
c.name: getattr(scenario, c.name)
for c in scenario.__table__.columns
if c.name not in {"id", "portfolio_id"}
}
data["id"] = scenario.id
scenario_mappings.append(data)
session.bulk_update_mappings(scenario_mapper, scenario_mappings)
session.commit()
return len(plan_models)

View file

@ -1,9 +1,18 @@
import enum
from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey
from sqlalchemy import (
Column,
Integer,
String,
Float,
Enum,
TIMESTAMP,
BigInteger,
ForeignKey,
)
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql import func
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from backend.app.db.models.materials import MaterialType, Material
Base = declarative_base()
@ -17,13 +26,17 @@ class SchemeEnum(enum.Enum):
class FundingPackage(Base):
__tablename__ = 'funding_package'
__tablename__ = "funding_package"
id = Column(Integer, primary_key=True, autoincrement=True)
plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False)
plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False)
scheme = Column(
Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
Enum(
SchemeEnum,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
project_funding = Column(Float)
@ -34,15 +47,23 @@ class FundingPackage(Base):
class FundingPackageMeasures(Base):
__tablename__ = 'funding_package_measures'
__tablename__ = "funding_package_measures"
id = Column(Integer, primary_key=True, autoincrement=True)
funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False)
measure = Column(
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
funding_package_id = Column(
BigInteger, ForeignKey(FundingPackage.id), nullable=False
)
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists
measure = Column(
Enum(
MaterialType,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
material_id = Column(
BigInteger, ForeignKey(Material.id), nullable=False
) # Assuming material table exists
innovation_uplift = Column(Float)
partial_project_score = Column(Float)
uplift_project_score = Column(Float)

View file

@ -1,7 +1,17 @@
import enum
import pytz
import datetime
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
from sqlalchemy import (
Column,
Integer,
Text,
Boolean,
Float,
DateTime,
Enum,
ForeignKey,
CheckConstraint,
)
from sqlalchemy.ext.declarative import declarative_base
from backend.app.db.models.users import UserModel # noqa
from backend.app.db.models.materials import MaterialType
@ -22,7 +32,7 @@ class PortfolioStatus(enum.Enum):
NEEDS_REVIEW = "needs review"
class PortfolioGoal(enum.Enum):
class PortfolioGoal(enum.Enum): # TODO: Move to domain?
VALUATION_IMPROVEMENT = "Valuation Improvement"
INCREASING_EPC = "Increasing EPC"
REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions"
@ -31,23 +41,43 @@ class PortfolioGoal(enum.Enum):
class Portfolio(Base):
__tablename__ = 'portfolio'
__tablename__ = "portfolio"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(Text, nullable=False)
budget = Column(Float)
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False)
status = Column(
Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
goal = Column(
Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
cost = Column(Float)
number_of_properties = Column(Integer)
co2_equivalent_savings = Column(Float) # Unit is always tonnes so we don't need to store the unit
energy_savings = Column(Float) # Unit is always kWh so we don't need to store the unit
energy_cost_savings = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
co2_equivalent_savings = Column(
Float
) # Unit is always tonnes so we don't need to store the unit
energy_savings = Column(
Float
) # Unit is always kWh so we don't need to store the unit
energy_cost_savings = Column(
Float
) # Unit is always £ so we don't need to store the unit for the moment
property_valuation_increase = Column(
Float
) # Unit is always £ so we don't need to store the unit for the moment
rental_yield_increase = Column(
Float
) # Unit is always £ so we don't need to store the unit for the moment
total_work_hours = Column(Float)
labour_days = Column(Float)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
updated_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
# Aggregations for summary
epc_breakdown_pre_retrofit = Column(Text)
epc_breakdown_post_retrofit = Column(Text)
@ -71,7 +101,7 @@ class PropertyCreationStatus(enum.Enum):
ERROR = "ERROR"
class Epc(enum.Enum):
class Epc(enum.Enum): # TODO: Move to domain?
A = "A"
B = "B"
C = "C"
@ -82,20 +112,27 @@ class Epc(enum.Enum):
class PropertyModel(Base):
__tablename__ = 'property'
__tablename__ = "property"
id = Column(Integer, primary_key=True, autoincrement=True)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
creation_status = Column(Enum(PropertyCreationStatus), nullable=False)
uprn = Column(Integer)
landlord_property_id = Column(Text)
building_reference_number = Column(Integer)
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
status = Column(
Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
address = Column(Text)
postcode = Column(Text)
has_pre_condition_report = Column(Boolean)
has_recommendations = Column(Boolean)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
updated_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
property_type = Column(Text)
built_form = Column(Text)
local_authority = Column(Text)
@ -127,7 +164,7 @@ rating_lookup = {
"Average": FeatureRating.AVERAGE,
"Poor": FeatureRating.POOR,
"Very Poor": FeatureRating.VERY_POOR,
"N/A": FeatureRating.NA
"N/A": FeatureRating.NA,
}
@ -136,32 +173,45 @@ def get_feature_rating_from_string(rating_str: str):
class PropertyDetailsEpcModel(Base):
__tablename__ = 'property_details_epc'
__tablename__ = "property_details_epc"
id = Column(Integer, primary_key=True, autoincrement=True)
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
full_address = Column(Text)
lodgement_date = Column(DateTime)
is_expired = Column(Boolean)
total_floor_area = Column(Float)
walls = Column(Text)
walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5'))
walls_rating = Column(
Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5")
)
roof = Column(Text)
roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5'))
roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5"))
floor = Column(Text)
floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5'))
floor_rating = Column(
Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5")
)
windows = Column(Text)
windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5'))
windows_rating = Column(
Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5")
)
heating = Column(Text)
heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5'))
heating_rating = Column(
Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5")
)
heating_controls = Column(Text)
heating_controls_rating = Column(
Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5')
Integer,
CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"),
)
hot_water = Column(Text)
hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5'))
hot_water_rating = Column(
Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5")
)
lighting = Column(Text)
lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5'))
lighting_rating = Column(
Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5")
)
mainfuel = Column(Text)
ventilation = Column(Text)
solar_pv = Column(Text)
@ -219,7 +269,7 @@ class PropertyDetailsSpatial(Base):
class PropertyDetailsMeter(Base):
__tablename__ = 'property_details_meter'
__tablename__ = "property_details_meter"
id = Column(Integer, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
energy_supplier = Column(Text)
@ -230,11 +280,13 @@ class PropertyDetailsMeter(Base):
class PropertyTargetsModel(Base):
__tablename__ = 'property_targets'
__tablename__ = "property_targets"
id = Column(Integer, primary_key=True, autoincrement=True)
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
epc = Column(Enum(Epc))
heat_demand = Column(Text)
@ -242,23 +294,36 @@ class PropertyTargetsModel(Base):
class PortfolioUsers(Base):
__tablename__ = "portfolioUsers"
id = Column(Integer, primary_key=True, autoincrement=True)
user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
user_id = Column(Integer, ForeignKey("user.id"), nullable=False)
portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
role = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
updated_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
class PropertyInstalledMeasures(Base):
"""
This model keeps a record of the installed measures for each property, at the UPRN level
"""
__tablename__ = 'property_installed_measures'
__tablename__ = "property_installed_measures"
id = Column(Integer, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
measure_type = Column(
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
Enum(
MaterialType,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
installed_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))

View file

@ -1,7 +1,19 @@
from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum
from sqlalchemy.orm import declarative_base
from typing import Iterable, List, NamedTuple, Optional, Type
from sqlalchemy import (
Column,
BigInteger,
String,
Float,
Boolean,
TIMESTAMP,
ForeignKey,
Enum,
)
from sqlalchemy.orm import declarative_base, Mapped, mapped_column
from sqlalchemy.sql import func
from backend.app.db.models.portfolio import Portfolio, PropertyModel
from datetime import datetime
from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel
from backend.app.db.models.materials import Material
from backend.app.db.models.portfolio import Epc
from datatypes.enums import QuantityUnits
@ -10,8 +22,12 @@ import enum
Base = declarative_base()
def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]:
return [e.value for e in enum_cls]
class Recommendation(Base):
__tablename__ = 'recommendation'
__tablename__ = "recommendation"
id = Column(BigInteger, primary_key=True, autoincrement=True)
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
@ -37,19 +53,24 @@ class Recommendation(Base):
class RecommendationMaterials(Base):
__tablename__ = 'recommendation_materials'
__tablename__ = "recommendation_materials"
id = Column(BigInteger, primary_key=True, autoincrement=True)
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
recommendation_id = Column(
BigInteger, ForeignKey("recommendation.id"), nullable=False
)
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
depth = Column(Float, nullable=False)
quantity = Column(Float, nullable=False)
quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False)
quantity_unit = Column(
Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
estimated_cost = Column(Float, nullable=False)
class PlanTypeEnum(enum.Enum):
class PlanTypeEnum(enum.Enum): # TODO: move this to domain?
SOLAR_ECO4 = "solar_eco4"
SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4"
EMPTY_CAVITY_ECO = "empty_cavity_eco"
@ -57,20 +78,36 @@ class PlanTypeEnum(enum.Enum):
EXTRACTION_ECO = "extraction_eco"
class Plan(Base):
__tablename__ = 'plan'
class PlanModel(Base):
__tablename__ = "plan"
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=True, default="")
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
is_default = Column(Boolean, nullable=False)
valuation_increase_lower_bound = Column(Float)
valuation_increase_upper_bound = Column(Float)
valuation_increase_average = Column(Float)
plan_type = Column(
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="")
portfolio_id: Mapped[int] = mapped_column(
BigInteger, ForeignKey(Portfolio.id), nullable=False
)
property_id: Mapped[int] = mapped_column(
BigInteger, ForeignKey(PropertyModel.id), nullable=False
)
scenario_id: Mapped[Optional[int]] = mapped_column(
BigInteger, ForeignKey("scenario.id")
)
created_at: Mapped[datetime] = mapped_column( # type: ignore
TIMESTAMP, nullable=False, server_default=func.now()
)
is_default: Mapped[bool] = mapped_column(Boolean, nullable=False)
valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float)
valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float)
valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float)
plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column(
Enum(
PlanTypeEnum,
name="plan_type",
@ -79,73 +116,90 @@ class Plan(Base):
),
nullable=True,
)
post_sap_points = Column(Float)
post_epc_rating = Column(Enum(Epc))
post_co2_emissions = Column(Float)
co2_savings = Column(Float)
post_energy_bill = Column(Float)
energy_bill_savings = Column(Float)
post_energy_consumption = Column(Float) # energy demand in kWh/year
energy_consumption_savings = Column(Float)
valuation_post_retrofit = Column(Float)
valuation_increase = Column(Float)
post_sap_points: Mapped[Optional[float]] = mapped_column(Float)
post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc))
post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float)
co2_savings: Mapped[Optional[float]] = mapped_column(Float)
post_energy_bill: Mapped[Optional[float]] = mapped_column(Float)
energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float)
post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float)
energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float)
valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float)
valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
# Financial metrics, excluding funding
cost_of_works = Column(Float)
contingency_cost = Column(Float)
cost_of_works: Mapped[Optional[float]] = mapped_column(Float)
contingency_cost: Mapped[Optional[float]] = mapped_column(Float)
class PlanRecommendations(Base):
__tablename__ = 'plan_recommendations'
__tablename__ = "plan_recommendations"
id = Column(BigInteger, primary_key=True, autoincrement=True)
plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False)
recommendation_id = Column(
BigInteger, ForeignKey("recommendation.id"), nullable=False
)
class Scenario(Base):
__tablename__ = 'scenario'
class ScenarioModel(Base):
__tablename__ = "scenario"
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
budget = Column(Float)
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
housing_type = Column(String, nullable=False)
goal = Column(String, nullable=False)
goal_value = Column(String, nullable=False)
trigger_file_path = Column(String, nullable=False)
already_installed_file_path = Column(String)
patches_file_path = Column(String)
non_invasive_recommendations_file_path = Column(String)
exclusions = Column(String)
multi_plan = Column(Boolean, default=False)
is_default = Column(Boolean, default=False, nullable=False)
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
name: Mapped[str] = mapped_column(String, nullable=False)
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP, nullable=False, server_default=func.now()
)
budget: Mapped[Optional[float]] = mapped_column(Float)
portfolio_id: Mapped[int] = mapped_column(
BigInteger, ForeignKey(Portfolio.id), nullable=False
)
housing_type: Mapped[str] = mapped_column(String, nullable=False)
goal: Mapped[PortfolioGoal] = mapped_column(
Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"),
nullable=False,
)
goal_value: Mapped[str] = mapped_column(String, nullable=False)
trigger_file_path: Mapped[str] = mapped_column(String, nullable=False)
already_installed_file_path: Mapped[Optional[str]] = mapped_column(String)
patches_file_path: Mapped[Optional[str]] = mapped_column(String)
non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column(
String
)
exclusions: Mapped[Optional[str]] = mapped_column(String)
multi_plan: Mapped[bool] = mapped_column(Boolean, default=False)
is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
# Add in the fields we need, which were previously sitting at the portfolio level
cost = Column(Float)
contingency = Column(Float)
funding = Column(Float)
total_work_hours = Column(Float)
energy_savings = Column(Float)
co2_equivalent_savings = Column(Float)
energy_cost_savings = Column(Float)
epc_breakdown_pre_retrofit = Column(String)
epc_breakdown_post_retrofit = Column(String)
number_of_properties = Column(BigInteger)
n_units_to_retrofit = Column(BigInteger)
co2_per_unit_pre_retrofit = Column(String)
co2_per_unit_post_retrofit = Column(String)
energy_bill_per_unit_pre_retrofit = Column(String)
energy_bill_per_unit_post_retrofit = Column(String)
energy_consumption_per_unit_pre_retrofit = Column(String)
energy_consumption_per_unit_post_retrofit = Column(String)
valuation_improvement_per_unit = Column(String)
cost_per_unit = Column(String)
cost_per_co2_saved = Column(String)
cost_per_sap_point = Column(String)
valuation_return_on_investment = Column(String)
property_valuation_increase = Column(Float)
labour_days = Column(Float)
cost: Mapped[Optional[float]] = mapped_column(Float)
contingency: Mapped[Optional[float]] = mapped_column(Float)
funding: Mapped[Optional[float]] = mapped_column(Float)
total_work_hours: Mapped[Optional[float]] = mapped_column(Float)
energy_savings: Mapped[Optional[float]] = mapped_column(Float)
co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float)
energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float)
epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger)
n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger)
co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(
String
)
energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(
String
)
valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String)
cost_per_unit: Mapped[Optional[str]] = mapped_column(String)
cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String)
cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String)
valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String)
property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
labour_days: Mapped[Optional[float]] = mapped_column(Float)
class MeasureType(enum.Enum):
@ -201,3 +255,12 @@ class InstalledMeasure(Base):
heat_demand_savings = Column(Float)
source = Column(String)
is_active = Column(Boolean, nullable=False, default=True)
def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]:
return [m.value for m in e]
class PlanPersistence(NamedTuple):
plan: PlanModel
scenario: ScenarioModel

View file

@ -0,0 +1,150 @@
from __future__ import annotations
from dataclasses import replace
from typing import Optional
from backend.app.db.models.portfolio import PortfolioGoal
from backend.app.db.models.recommendations import (
PlanModel,
PlanPersistence,
ScenarioModel,
)
from backend.app.domain.classes.scenario import Scenario
from backend.app.domain.records.plan_record import PlanRecord
from backend.app.utils import sap_to_epc
class Plan:
def __init__(
self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None
):
self.id: Optional[int] = id
self.record: PlanRecord = record
self.scenario: Scenario = scenario
@classmethod
def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan:
if not scenario:
raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}")
record = PlanRecord(
property_id=plan_model.property_id,
portfolio_id=plan_model.portfolio_id,
created_at=plan_model.created_at,
is_default=plan_model.is_default,
valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound,
valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound,
valuation_increase_average=plan_model.valuation_increase_average,
plan_type=plan_model.plan_type,
post_sap_points=plan_model.post_sap_points,
post_epc_rating=plan_model.post_epc_rating,
post_co2_emissions=plan_model.post_co2_emissions,
co2_savings=plan_model.co2_savings,
post_energy_bill=plan_model.post_energy_bill,
energy_bill_savings=plan_model.energy_bill_savings,
post_energy_consumption=plan_model.post_energy_consumption,
energy_consumption_savings=plan_model.energy_consumption_savings,
valuation_post_retrofit=plan_model.valuation_post_retrofit,
valuation_increase=plan_model.valuation_increase,
cost_of_works=plan_model.cost_of_works,
contingency_cost=plan_model.contingency_cost,
)
return cls(record=record, scenario=scenario, id=plan_model.id)
@property
def is_compliant(self) -> bool:
goal: PortfolioGoal = self.scenario.record.goal
match goal:
case PortfolioGoal.INCREASING_EPC:
return self._is_compliant_epc()
case _:
raise NotImplementedError
def to_sqlalchemy(self) -> PlanPersistence:
scenario_record = self.scenario.record
scenario_model = ScenarioModel(
id=self.scenario.id,
name=scenario_record.name,
created_at=scenario_record.created_at,
housing_type=scenario_record.housing_type,
goal=scenario_record.goal,
goal_value=scenario_record.goal_value,
trigger_file_path=scenario_record.trigger_file_path,
multi_plan=scenario_record.multi_plan,
is_default=scenario_record.is_default,
budget=scenario_record.budget,
already_installed_file_path=scenario_record.already_installed_file_path,
patches_file_path=scenario_record.patches_file_path,
non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path,
exclusions=scenario_record.exclusions,
cost=scenario_record.cost,
contingency=scenario_record.contingency,
funding=scenario_record.funding,
total_work_hours=scenario_record.total_work_hours,
energy_savings=scenario_record.energy_savings,
co2_equivalent_savings=scenario_record.co2_equivalent_savings,
energy_cost_savings=scenario_record.energy_cost_savings,
epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit,
epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit,
number_of_properties=scenario_record.number_of_properties,
n_units_to_retrofit=scenario_record.n_units_to_retrofit,
co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit,
co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit,
energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit,
energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit,
energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit,
energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit,
valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit,
cost_per_unit=scenario_record.cost_per_unit,
cost_per_co2_saved=scenario_record.cost_per_co2_saved,
cost_per_sap_point=scenario_record.cost_per_sap_point,
valuation_return_on_investment=scenario_record.valuation_return_on_investment,
property_valuation_increase=scenario_record.property_valuation_increase,
labour_days=scenario_record.labour_days,
)
record = self.record
plan_model = PlanModel(
id=self.id,
property_id=record.property_id,
portfolio_id=record.portfolio_id,
scenario_id=self.scenario.id,
created_at=record.created_at,
is_default=record.is_default,
valuation_increase_lower_bound=record.valuation_increase_lower_bound,
valuation_increase_upper_bound=record.valuation_increase_upper_bound,
valuation_increase_average=record.valuation_increase_average,
plan_type=record.plan_type,
post_sap_points=record.post_sap_points,
post_epc_rating=record.post_epc_rating,
post_co2_emissions=record.post_co2_emissions,
co2_savings=record.co2_savings,
post_energy_bill=record.post_energy_bill,
energy_bill_savings=record.energy_bill_savings,
post_energy_consumption=record.post_energy_consumption,
energy_consumption_savings=record.energy_consumption_savings,
valuation_post_retrofit=record.valuation_post_retrofit,
valuation_increase=record.valuation_increase,
cost_of_works=record.cost_of_works,
contingency_cost=record.contingency_cost,
)
return PlanPersistence(plan=plan_model, scenario=scenario_model)
def set_default(self, value: bool) -> None:
self.record = replace(self.record, is_default=value)
self.scenario.record = replace(self.scenario.record, is_default=value)
def _is_compliant_epc(self) -> bool:
goal_value: str = self.scenario.record.goal_value
if self.record.post_epc_rating:
post_epc = self.record.post_epc_rating.value
elif self.record.post_sap_points:
post_epc = sap_to_epc(self.record.post_sap_points)
else:
return False
return post_epc <= goal_value

View file

@ -0,0 +1,58 @@
from __future__ import annotations
from dataclasses import replace
from typing import Optional
from backend.app.db.models.recommendations import ScenarioModel
from backend.app.domain.records.scenario_record import ScenarioRecord
class Scenario:
def __init__(self, record: ScenarioRecord, id: Optional[int] = None):
self.id = id
self.record = record
@classmethod
def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario:
record = ScenarioRecord(
name=scenario_model.name,
created_at=scenario_model.created_at,
housing_type=scenario_model.housing_type,
goal=scenario_model.goal,
goal_value=scenario_model.goal_value,
trigger_file_path=scenario_model.trigger_file_path,
multi_plan=scenario_model.multi_plan,
is_default=scenario_model.is_default,
budget=scenario_model.budget,
already_installed_file_path=scenario_model.already_installed_file_path,
patches_file_path=scenario_model.patches_file_path,
non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path,
exclusions=scenario_model.exclusions,
cost=scenario_model.cost,
contingency=scenario_model.contingency,
funding=scenario_model.funding,
total_work_hours=scenario_model.total_work_hours,
energy_savings=scenario_model.energy_savings,
co2_equivalent_savings=scenario_model.co2_equivalent_savings,
energy_cost_savings=scenario_model.energy_cost_savings,
epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit,
epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit,
number_of_properties=scenario_model.number_of_properties,
n_units_to_retrofit=scenario_model.n_units_to_retrofit,
co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit,
co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit,
energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit,
energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit,
energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit,
energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit,
valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit,
cost_per_unit=scenario_model.cost_per_unit,
cost_per_co2_saved=scenario_model.cost_per_co2_saved,
cost_per_sap_point=scenario_model.cost_per_sap_point,
valuation_return_on_investment=scenario_model.valuation_return_on_investment,
property_valuation_increase=scenario_model.property_valuation_increase,
labour_days=scenario_model.labour_days,
)
return cls(record, scenario_model.id)
def set_default(self, value: bool) -> None:
self.record = replace(self.record, is_default=value)

View file

@ -0,0 +1,31 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from backend.app.db.models.portfolio import Epc
from backend.app.db.models.recommendations import PlanTypeEnum
@dataclass(frozen=True)
class PlanRecord:
property_id: int
portfolio_id: int
created_at: datetime
is_default: bool
valuation_increase_lower_bound: Optional[float] = None
valuation_increase_upper_bound: Optional[float] = None
valuation_increase_average: Optional[float] = None
plan_type: Optional[PlanTypeEnum] = None
post_sap_points: Optional[float] = None
post_epc_rating: Optional[Epc] = None
post_co2_emissions: Optional[float] = None
co2_savings: Optional[float] = None
post_energy_bill: Optional[float] = None
energy_bill_savings: Optional[float] = None
post_energy_consumption: Optional[float] = None
energy_consumption_savings: Optional[float] = None
valuation_post_retrofit: Optional[float] = None
valuation_increase: Optional[float] = None
cost_of_works: Optional[float] = None
contingency_cost: Optional[float] = None

View file

@ -0,0 +1,47 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from backend.app.db.models.portfolio import PortfolioGoal
@dataclass(frozen=True)
class ScenarioRecord:
name: str
created_at: datetime
housing_type: str
goal: PortfolioGoal
goal_value: str
trigger_file_path: str
multi_plan: bool
is_default: bool
budget: Optional[float] = None
already_installed_file_path: Optional[str] = None
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
exclusions: Optional[str] = None
cost: Optional[float] = None
contingency: Optional[float] = None
funding: Optional[float] = None
total_work_hours: Optional[float] = None
energy_savings: Optional[float] = None
co2_equivalent_savings: Optional[float] = None
energy_cost_savings: Optional[float] = None
epc_breakdown_pre_retrofit: Optional[str] = None
epc_breakdown_post_retrofit: Optional[str] = None
number_of_properties: Optional[int] = None
n_units_to_retrofit: Optional[int] = None
co2_per_unit_pre_retrofit: Optional[str] = None
co2_per_unit_post_retrofit: Optional[str] = None
energy_bill_per_unit_pre_retrofit: Optional[str] = None
energy_bill_per_unit_post_retrofit: Optional[str] = None
energy_consumption_per_unit_pre_retrofit: Optional[str] = None
energy_consumption_per_unit_post_retrofit: Optional[str] = None
valuation_improvement_per_unit: Optional[str] = None
cost_per_unit: Optional[str] = None
cost_per_co2_saved: Optional[str] = None
cost_per_sap_point: Optional[str] = None
valuation_return_on_investment: Optional[str] = None
property_valuation_increase: Optional[float] = None
labour_days: Optional[float] = None

View file

View file

@ -0,0 +1,5 @@
from pydantic import BaseModel
class CategorisationTriggerRequest(BaseModel):
portfolio_id: int

View file

@ -0,0 +1,47 @@
FROM public.ecr.aws/lambda/python:3.11
# For local running:
# FROM python:3.11.10-bullseye
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
# Set working directory (Lambda task root)
WORKDIR /var/task
# Environment
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
COPY backend/.env.test backend/.env
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/categorisation/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/categorisation/ backend/categorisation/
COPY backend/app/db/connection.py backend/app/db/connection.py
COPY backend/app/config.py backend/app/config.py
COPY backend/__init__.py backend/__init__.py
COPY backend/app/__init__.py backend/app/__init__.py
COPY backend/app/db/__init__.py backend/app/db/__init__.py
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["backend/categorisation/handler/handler.handler"]
# For local running
# CMD ["python", "-m", "backend.categorisation.handler.handler"]

View file

@ -0,0 +1,10 @@
from typing import Any, Mapping
from utils.logger import setup_logger
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
pass

View file

@ -0,0 +1,3 @@
sqlmodel
pydantic-settings
psycopg2-binary==2.9.10

View file

@ -0,0 +1,11 @@
from backend.categorisation.processor import process_portfolio
def main() -> None:
portfolio_id = 556
process_portfolio(portfolio_id)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,93 @@
from collections import defaultdict
from typing import Dict, List
from backend.app.db.functions.recommendations_functions import (
bulk_update_plans,
get_plans_by_portfolio_id,
get_scenario,
)
from backend.app.db.models.recommendations import PlanModel, ScenarioModel
from backend.app.domain.classes.plan import Plan
from backend.app.domain.classes.scenario import Scenario
from utils.logger import setup_logger
logger = setup_logger()
def process_portfolio(portfolio_id: int) -> None:
print(f"Processing portfolio {portfolio_id}")
plans: List[Plan] = _load_plans_for_portfolio(portfolio_id)
plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans)
for uprn, property_plans in plans_by_property.items():
if not property_plans:
raise ValueError(f"No plans for property {uprn}")
cheapest_plan = _choose_cheapest_relevant_plan(property_plans)
_update_default_flags(property_plans, cheapest_plan)
def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]:
plan_models = get_plans_by_portfolio_id(portfolio_id)
print(f"Got {len(plan_models)} plans from database")
plans: List[Plan] = []
for model in plan_models:
if not model.scenario_id:
logger.info(f"No Scenario associated with Plan of ID {model.id}")
continue
scenario_model = get_scenario(model.scenario_id)
plans.append(
Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model))
)
print("Successfully mapped plan and scenario to domain object")
return plans
def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]:
grouped: dict[int, List[Plan]] = defaultdict(list)
for plan in plans:
grouped[plan.record.property_id].append(plan)
return grouped
def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan:
plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans
def plan_cost(plan: Plan) -> float:
return (
plan.record.cost_of_works
if plan.record.cost_of_works is not None
else float("inf")
)
cheapest_plan = min(plans_to_consider, key=plan_cost)
return cheapest_plan
def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None:
plans_to_update: List[Plan] = []
for plan in plans:
should_be_default: bool = plan.id == cheapest_plan.id
if plan.record.is_default != should_be_default:
plan.set_default(should_be_default)
plans_to_update.append(plan)
if plans_to_update:
plan_models: List[PlanModel] = []
scenario_models: List[ScenarioModel] = []
for plan in plans_to_update:
plan_model, scenario_model = plan.to_sqlalchemy()
plan_models.append(plan_model)
scenario_models.append(scenario_model)
bulk_update_plans(plan_models, scenario_models)

View file

@ -0,0 +1,73 @@
from typing import Callable
import pytest
from datetime import datetime
from backend.app.domain.classes.plan import Plan
from backend.app.domain.classes.scenario import Scenario
from backend.app.domain.records.plan_record import PlanRecord
from backend.app.domain.records.scenario_record import ScenarioRecord
from backend.app.db.models.portfolio import Epc, PortfolioGoal
@pytest.fixture
def created_at_datetime() -> datetime:
return datetime.now()
@pytest.fixture
def epc_c_scenario(created_at_datetime: datetime) -> "Scenario":
# arrange
scenario_record = ScenarioRecord(
name="EPC C",
created_at=created_at_datetime,
housing_type="",
goal=PortfolioGoal.INCREASING_EPC,
goal_value="C",
trigger_file_path="",
multi_plan=False,
is_default=False,
)
return Scenario(record=scenario_record, id=1)
@pytest.fixture
def plan_factory(
epc_c_scenario: "Scenario", created_at_datetime: datetime
) -> Callable[[int, "Epc"], "Plan"]:
# returns a function to create plans with different attributes
def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan":
plan_record = PlanRecord(
property_id=1,
portfolio_id=1,
created_at=created_at_datetime,
is_default=False,
post_sap_points=post_sap_points,
post_epc_rating=post_epc_rating,
)
return Plan(record=plan_record, scenario=epc_c_scenario, id=1)
return _create_plan
@pytest.mark.parametrize(
"post_sap_points, post_epc_rating, expected_compliance",
[
(75, Epc.C, True),
(100, Epc.A, True),
(60, Epc.D, False),
],
)
def test_scenario_goal_is_epc_c(
plan_factory: Callable[[int, "Epc"], "Plan"],
post_sap_points: int,
post_epc_rating: "Epc",
expected_compliance: bool,
) -> None:
# arrange
plan = plan_factory(post_sap_points, post_epc_rating)
# act
actual_compliance: bool = plan.is_compliant
# assert
assert actual_compliance == expected_compliance

View file

@ -41,7 +41,10 @@ epc_data = pd.read_csv(
# Classify floor area in <73m2, 73-98, 99-200, 200+
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
lambda x: (
"<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
)
)
# 73-98 185
# <73 156
@ -65,7 +68,11 @@ import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids):
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids):
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=124, scenario_ids=[205]
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
post_install_sap = (
post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename(
"double_glazing": "Cost: Double Glazing",
"loft_insulation": "Cost: Loft Insulation",
"mechanical_ventilation": "Cost: Ventilation",
"solar_pv": "Cost: Solar PV"
"solar_pv": "Cost: Solar PV",
}
)
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = (
recommendations_measures_pivot["Cost: Solar PV"] > 0
)
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
df = (
properties_df[
[
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(post_install_sap, how="left", on="property_id")
)
df = df.drop(columns=["property_id"])
@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
# We fill missings:
for col in [
"Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
"Recommendation: Solar PV"
"Recommendation: Air Source Heat Pump",
"Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing",
"Recommendation: Loft Insulation",
"Recommendation: Ventilation",
"Recommendation: Solar PV",
]:
df[col] = df[col].fillna(False)
for col in [
"Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
"Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
"Cost: Solar PV"
"Cost: Air Source Heat Pump",
"Cost: Cavity Wall Insulation",
"Cost: Double Glazing",
"Cost: Loft Insulation",
"Cost: Ventilation",
"Cost: Solar PV",
]:
df[col] = df[col].fillna(0)
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
lambda x: sap_to_epc(x)
)
df["Recommendation: Air Source Heat Pump"].sum()
df["Cost: Air Source Heat Pump"].sum()
df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
df.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
index=False,
)

View file

@ -4,7 +4,11 @@ import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids):
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
else getattr(rec, col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
@ -94,16 +121,34 @@ def app():
)
property_asset_data = properties_df.merge(
mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
mod_property_data.drop(columns=["address", "postcode", "tenure"]),
how="left",
on="uprn",
)
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains(
"pitched", case=False
)
property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
property_asset_data["is_insulated"] = (
property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
["filled cavity", "with external insulation", "filled cavity and external insulation"]
) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
property_asset_data["wall_type"] = (
property_asset_data["walls"].str.split(" ").str[0].str.strip()
)
property_asset_data["is_insulated"] = property_asset_data["walls"].str.split(
","
).str[1].str.strip().isin(
[
"filled cavity",
"with external insulation",
"filled cavity and external insulation",
]
) | property_asset_data[
"walls"
].str.split(
","
).str[
2
].str.strip().isin(
["insulated"]
)
property_asset_data["is_insulated"] = np.where(
property_asset_data["is_insulated"], "Insulated", "Uninsulated"
@ -115,18 +160,26 @@ def app():
property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
)
archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
archetype_variables = [
"property_type",
"wall_type",
"is_insulated",
"is_pitched",
"pre_1970",
]
assigned_archetypes = (
property_asset_data.groupby(
archetype_variables
).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
property_asset_data.groupby(archetype_variables)
.size()
.reset_index()
.rename(columns={0: "n_properties"})
.sort_values("n_properties", ascending=False)
)
# Make the archetype ID a concatenation of the variables
assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
lambda x: "_".join(x.astype(str)), axis=1
)
assigned_archetypes["archetype_id"] = assigned_archetypes[
archetype_variables
].apply(lambda x: "_".join(x.astype(str)), axis=1)
# Most prominent archetypes
prominent_archetypes = assigned_archetypes.head(6)
@ -136,7 +189,7 @@ def app():
property_asset_data = property_asset_data.merge(
assigned_archetypes[archetype_variables + ["archetype_id"]],
how="left",
on=archetype_variables
on=archetype_variables,
)
# Create age bands:
@ -148,7 +201,7 @@ def app():
property_asset_data["age_band"] = pd.cut(
property_asset_data["BUILD_YEAR"],
bins=[1959, 1969, 1979, 1989, 1999, 2022],
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"],
)
# Create floor area bands
@ -159,47 +212,59 @@ def app():
property_asset_data["floor_area_band"] = pd.cut(
property_asset_data["total_floor_area"],
bins=[0, 73, 97, 199, 10000],
labels=["0-73", "74-97", "98-199", "200+"]
labels=["0-73", "74-97", "98-199", "200+"],
)
property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
property_asset_data["archetype_group"] = np.where(
property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
property_asset_data["archetype_id"].isin(
other_archetypes["archetype_id"].values
),
"other",
property_asset_data["archetype_group"]
property_asset_data["archetype_group"],
)
# For colour
wall_types = (
property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
columns={"wall_type": "Wall Type"}
)
property_asset_data[["wall_type"]]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"wall_type": "Wall Type"})
)
# Group into age bands
ages = (
property_asset_data[["age_band"]].value_counts()
property_asset_data[["age_band"]]
.value_counts()
.to_frame()
.reset_index().sort_values("age_band", ascending=True)
.reset_index()
.sort_values("age_band", ascending=True)
.rename(columns={"age_band": "Age Band"})
)
floor_area_bands = (
property_asset_data[["floor_area_band"]].value_counts()
property_asset_data[["floor_area_band"]]
.value_counts()
.to_frame()
.reset_index().sort_values("floor_area_band", ascending=True)
.reset_index()
.sort_values("floor_area_band", ascending=True)
.rename(columns={"floor_area_band": "Floor Area Band"})
)
archetype_counts = (
property_asset_data[["archetype_group"]].
value_counts().
to_frame().
reset_index()
property_asset_data[["archetype_group"]]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"archetype_group": "Archetype"})
)
property_types = (
(property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
value_counts().
to_frame().
reset_index()
(
property_asset_data["property_type"]
+ ": "
+ property_asset_data["built_form"]
)
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"index": "Property Type", 0: "Count"})
)
@ -217,18 +282,24 @@ def app():
totals = property_asset_data[
[
"Total_household_members",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
"co2_emissions",
"current_energy_demand",
"current_energy_demand_heating_hotwater",
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"appliances_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
].copy()
totals["total_cost"] = (
totals["heating_cost_current"] +
totals["hot_water_cost_current"] +
totals["lighting_cost_current"] +
totals["appliances_cost_current"] +
totals["gas_standing_charge"] +
totals["electricity_standing_charge"]
totals["heating_cost_current"]
+ totals["hot_water_cost_current"]
+ totals["lighting_cost_current"]
+ totals["appliances_cost_current"]
+ totals["gas_standing_charge"]
+ totals["electricity_standing_charge"]
)
print(
totals[
@ -259,38 +330,59 @@ def app():
scenario_recommendations_df = recommendations_df[
recommendations_df["Scenario ID"] == scenario
].copy()
].copy()
scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
scenario_recommendations_df["contingency"] = (
contingency * scenario_recommendations_df["estimated_cost"]
)
scenario_recommendations_df["total_cost"] = (
scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
scenario_recommendations_df["estimated_cost"]
+ scenario_recommendations_df["contingency"]
)
recommended_measures_df = scenario_recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df[
recommended_measures_df["default"]
]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
# Metrics by property ID
aggregated_metrics = scenario_recommendations_df[
[
"property_id", "type", "default", "sap_points",
"energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
"total_cost"
"property_id",
"type",
"default",
"sap_points",
"energy_cost_savings",
"kwh_savings",
"co2_equivalent_savings",
"estimated_cost",
"contingency",
"total_cost",
]
]
aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
aggregated_metrics = aggregated_metrics.groupby("property_id")[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].sum().reset_index()
aggregated_metrics = (
aggregated_metrics.groupby("property_id")[
[
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
"estimated_cost",
"total_cost",
"contingency",
]
]
.sum()
.reset_index()
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -299,30 +391,58 @@ def app():
for c in recommendations_measures_pivot.columns:
if c == "property_id":
continue
recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
recommendations_measures_pivot["Recommendation: " + c] = (
recommendations_measures_pivot[c] > 0
)
# We now create a final output
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
df = (
properties_df[
[
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
"co2_emissions",
"current_energy_demand",
"current_energy_demand_heating_hotwater",
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"appliances_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
aggregated_metrics, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(aggregated_metrics, how="left", on="property_id")
)
df["bills_total_cost"] = (
df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
df["heating_cost_current"]
+ df["hot_water_cost_current"]
+ df["lighting_cost_current"]
+ df["appliances_cost_current"]
+ df["gas_standing_charge"]
+ df["electricity_standing_charge"]
)
df = df.drop(columns=["property_id"])
for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
for c in [
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
]:
df[c] = df[c].fillna(0)
df = df.rename(
@ -345,16 +465,23 @@ def app():
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
lambda x: sap_to_epc(x)
)
# Calculate the relative savings on carbon, kwh, and bills
df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
df["relative_carbon_savings"] = (
df["co2_equivalent_savings"] / df["co2_emissions"]
)
df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]
# Add on the archetype
df = df.merge(
property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
property_asset_data[["uprn", "archetype_group"]],
how="left",
left_on="UPRN",
right_on="uprn",
)
# For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
@ -387,7 +514,9 @@ def app():
printing_scenario_id = scenario_ids[0]
# EPC breakdown
print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
print(
scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts()
)
# Cost
# Total cost
print(scenario_data[printing_scenario_id]["total_cost"].sum())
@ -408,16 +537,24 @@ def app():
measure_details = {}
for scenario in scenario_ids:
measure_details[scenario] = {}
recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
recommendation_cols = [
c for c in scenario_data[scenario].columns if "Recommendation:" in c
]
measure_details[scenario]["count"] = (
scenario_data[scenario][recommendation_cols].sum().to_dict()
)
# Get average cost per measure
measure_columns = [
c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
c.split("Recommendation: ")[1]
for c in scenario_data[scenario].columns
if "Recommendation:" in c
]
# Take the mean, drop zero columns
measure_costs = {}
for m in measure_columns:
measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
measure_costs[m] = float(
scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()
)
measure_details[scenario]["cost_per_measure"] = measure_costs
pprint(measure_details[scenario_ids[0]]["count"])
@ -452,12 +589,27 @@ def app():
for scenario in scenario_ids:
df = scenario_data[scenario].copy()
avg_savings = df[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].mean().to_dict()
avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
avg_savings = (
df[
[
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
"estimated_cost",
"total_cost",
"contingency",
]
]
.mean()
.to_dict()
)
avg_savings["cost_per_sap_point"] = (
avg_savings["total_cost"] / avg_savings["sap_points"]
)
avg_savings["cost_per_carbon"] = (
avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
)
scenario_metrics[scenario] = avg_savings
pprint(scenario_metrics[scenario_ids[0]])
@ -465,11 +617,11 @@ def app():
scenario_data[scenario_ids[0]]["loft_insulation"][
scenario_data[scenario_ids[0]]["loft_insulation"] > 0
].mean()
].mean()
scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
].mean()
].mean()
# Testing checking floor risk
@ -477,11 +629,7 @@ def app():
def get_flood_risk(lat, lon, radius_km=1):
url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
params = {
'lat': lat,
'long': lon,
'dist': radius_km # search radius in km
}
params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km
response = requests.get(url, params=params)
response.raise_for_status()
@ -495,20 +643,19 @@ def app():
print(f"{len(flood_warnings)} warning(s) found near the location:")
for warning in flood_warnings:
print(f"- Area: {warning.get('description')}")
print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
print(
f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})"
)
print(f" Message changed at: {warning.get('timeMessageChanged')}")
print()
return flood_warnings
from shapely.geometry import shape, Point
def get_flood_areas_near_point(lat, lon, radius_km=2):
url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
params = {
'lat': lat,
'long': lon,
'dist': radius_km
}
params = {"lat": lat, "long": lon, "dist": radius_km}
response = requests.get(url, params=params)
response.raise_for_status()
@ -531,7 +678,7 @@ def app():
if not features:
continue
flood_polygon = shape(features[0]['geometry'])
flood_polygon = shape(features[0]["geometry"])
try:
is_inside = flood_polygon.contains(point)
@ -539,12 +686,17 @@ def app():
is_inside = False
if is_inside:
print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
print(
f"📍 Point is inside flood area: {area['label']} ({area['notation']})"
)
return area
from tqdm import tqdm
floor_warnings_data = []
for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
for _, property in tqdm(
property_asset_data.iterrows(), total=len(property_asset_data)
):
# warnings = floor_warnings_data.extend(
# get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
# )
@ -556,7 +708,7 @@ def app():
"uprn": property["uprn"],
"address": property["address"],
"postcode": property["postcode"],
"area": resp
"area": resp,
}
)
continue
@ -570,7 +722,7 @@ def app():
"House_Cavity_Uninsulated_Pitched roof_Post 1970",
"other",
"House_System_Uninsulated_Pitched roof_Pre 1970",
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970",
]
values = [62, 36, 21, 16, 16, 4, 2]
@ -582,36 +734,39 @@ def app():
"Cavity wall insulation, ventilation",
"Bespoke retrofit measures",
"External wall insulation, roof insulation",
"Flat roof insulation, internal wall insulation"
"Flat roof insulation, internal wall insulation",
]
fig = go.Figure(go.Treemap(
labels=labels,
parents=[""] * len(labels), # No root
values=values,
hovertext=hovertext,
hoverinfo="text",
textinfo="none",
marker=dict(
line=dict(color="white", width=4),
colors=values,
colorscale="Blues"
fig = go.Figure(
go.Treemap(
labels=labels,
parents=[""] * len(labels), # No root
values=values,
hovertext=hovertext,
hoverinfo="text",
textinfo="none",
marker=dict(
line=dict(color="white", width=4), colors=values, colorscale="Blues"
),
)
))
)
fig.update_layout(
margin=dict(t=10, l=10, r=10, b=10),
plot_bgcolor="white",
paper_bgcolor="white"
margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white"
)
fig.show()
# Get the recommended measures by scenario id
recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
recommendation_cols
].sum().reset_index()
recommendation_cols = [
c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c
]
measure_counts_by_scenario = (
scenario_data[scenario_ids[1]]
.groupby("archetype_group")[recommendation_cols]
.sum()
.reset_index()
)
measure_counts_by_scenario.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
@ -630,15 +785,13 @@ def app():
to_append = {"uprn": uprn}
for _id in scenario_ids:
scenario = scenario_data[_id][
scenario_data[_id]["uprn"] == uprn
].squeeze()
scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze()
val = PropertyValuation.estimate_valuation_improvement(
current_value=x["valuation"],
current_epc=scenario["Current EPC Rating"].value,
target_epc=scenario["Predicted Post Works EPC"],
total_cost=None
total_cost=None,
)
to_append[_id] = val["average_increase"]

File diff suppressed because it is too large Load diff

View file

@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them
Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan
or recommendations in case something went wrong
"""
import pandas as pd
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
@ -19,8 +20,7 @@ from backend.app.db.connection import db_session
def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]:
return [
uprn
for (uprn,) in
session.query(PropertyModel.uprn)
for (uprn,) in session.query(PropertyModel.uprn)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
if uprn is not None
@ -34,7 +34,7 @@ with db_session() as session:
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)]
@ -44,7 +44,7 @@ missed_properties.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_failed_properties_to_restart_20260102.xlsx",
sheet_name="Standardised Asset List",
index=False
index=False,
)
# Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios:
@ -52,14 +52,14 @@ scenario_id = None
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
def count_plans_for_scenario(session: Session, scenario_id: int) -> int:
return session.execute(
select(func.count())
.select_from(Plan)
.where(Plan.scenario_id == scenario_id)
.select_from(PlanModel)
.where(PlanModel.scenario_id == scenario_id)
).scalar_one()
@ -69,8 +69,7 @@ with db_session() as session:
def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]:
result = session.execute(
select(Plan.id)
.where(Plan.scenario_id == scenario_id)
select(PlanModel.id).where(PlanModel.scenario_id == scenario_id)
)
return [row.id for row in result]
@ -84,7 +83,7 @@ from sqlalchemy.orm import Session
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
from sqlalchemy import text
@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)

View file

@ -5,6 +5,7 @@ This includes:
# EPC C, there should be a plan
2) If the plan is fabric first, make sure they are actually fabric first
"""
import pandas as pd
scenario_names = {
@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items():
)
# find properties that are below the scenario sap target, but have no recommended measures
df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
df["below_scenario_target"] = (
df["current_sap_points"] < scenario_sap_targets[scenario_id]
)
df["no_recommended_measures"] = df["sap_points"] == 0
df["zero_cost"] = df["total_retrofit_cost"] == 0
df["sap_points_above_zero"] = df["sap_points"] > 0
@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items():
].copy()
if scenario_sap_targets[scenario_id] == 81:
problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
problematic_properties = problematic_properties[
problematic_properties["property_type"] != "Flat"
]
zero_cost_above_zero_sap = df[
(df["sap_points_above_zero"] & df["zero_cost"])
@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items():
# pd.set_option('display.width', 1000)
# problematic_properties.head(len(problematic_properties))
print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
print(
f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})"
)
print(
f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})"
)
problems.append(problematic_properties)
problems.append(zero_cost_above_zero_sap)
@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"])
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal = pd.concat([sal, sal2])
@ -114,7 +123,7 @@ retry.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_problematic_properties_to_review_20260106.xlsx",
sheet_name="Standardised Asset List",
index=False
index=False,
)
# Delete associated plans
@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist()
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from sqlalchemy import select, delete
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import sessionmaker
def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
def get_property_ids_for_uprns(
session: Session, portfolio_id: int, uprns: list[int]
) -> list[int]:
return [
property.id
for property in session.query(PropertyModel)
.filter(
PropertyModel.portfolio_id == portfolio_id,
PropertyModel.uprn.in_(uprns)
PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns)
)
.all()
]
@ -149,15 +159,21 @@ with db_session() as session:
# Get all and delete plans for these property IDs
def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
def get_all_plans_for_property_ids(
session: Session, property_ids: list[int]
) -> list[PlanModel]:
return (
session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all()
)
def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
def get_ids_of_plans_for_deletion(
session: Session, property_ids: list[int]
) -> list[int]:
return [
plan.id
for plan in session.query(Plan)
.filter(Plan.property_id.in_(property_ids))
for plan in session.query(PlanModel)
.filter(PlanModel.property_id.in_(property_ids))
.all()
]
@ -168,7 +184,7 @@ with db_session() as session:
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
from sqlalchemy import text
@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)

View file

@ -3,31 +3,41 @@ from sqlalchemy.orm import Session
from sqlalchemy import text, select
from backend.app.db.connection import db_read_session
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
PORTFOLIO_ID = 435
with db_read_session() as session:
# Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419
estimated_epcs = session.query(PropertyDetailsEpcModel).filter(
# PropertyDetailsEpcModel.estimated == True,
PropertyDetailsEpcModel.property_id.in_(
session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID)
estimated_epcs = (
session.query(PropertyDetailsEpcModel)
.filter(
# PropertyDetailsEpcModel.estimated == True,
PropertyDetailsEpcModel.property_id.in_(
session.query(PropertyModel.id).filter(
PropertyModel.portfolio_id == PORTFOLIO_ID
)
)
)
).all()
.all()
)
# Get the ids
estimated_epc_ids = [epc.property_id for epc in estimated_epcs]
# I want to get the UPRNS for these properties, from the property model
with db_read_session() as session:
estimated_uprns = session.query(PropertyModel.uprn).filter(
PropertyModel.id.in_(
session.query(PropertyDetailsEpcModel.property_id).filter(
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
estimated_uprns = (
session.query(PropertyModel.uprn)
.filter(
PropertyModel.id.in_(
session.query(PropertyDetailsEpcModel.property_id).filter(
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
)
)
)
).all()
.all()
)
estimated_uprns_list = [uprn for (uprn,) in estimated_uprns]
@ -35,16 +45,16 @@ with db_read_session() as session:
sal_1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal_2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal = pd.concat([sal_1, sal_2])
sal = sal.drop_duplicates(subset=['epc_os_uprn'])
sal = sal.drop_duplicates(subset=["epc_os_uprn"])
estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy()
@ -55,20 +65,24 @@ SCENARIOS = [
# 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
# 859, # EPC C - no solid floor, ashp 3.0
# 885, # EPC B - fabric first, no solid floor, ashp 3.0
908, 909, 910
908,
909,
910,
]
# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids
with db_read_session() as session:
result = session.execute(
select(Plan.id, Plan.property_id)
.where(Plan.property_id.in_(estimated_epc_ids))
select(PlanModel.id, PlanModel.property_id).where(
PlanModel.property_id.in_(estimated_epc_ids)
)
)
plans = [
{
"plan_id": row.id,
"property_id": row.property_id,
} for row in result
}
for row in result
]
df = pd.DataFrame(plans)
@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)
# Store the SAL
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
"sal.xlsx")
filename = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
"sal.xlsx"
)
with pd.ExcelWriter(filename) as writer:
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer:
b1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 1"
sheet_name="batch 1",
)
b2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 2"
sheet_name="batch 2",
)
b3 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 3"
sheet_name="batch 3",
)
b4 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 4"
sheet_name="batch 4",
)
b5 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 5"
sheet_name="batch 5",
)
# Batch 6 should be the remaining
total = pd.concat([b1, b2, b3, b4, b5])
remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)]
# Create new output
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
"20260107 corrected batch 6 sal.xlsx")
filename = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
"20260107 corrected batch 6 sal.xlsx"
)
with pd.ExcelWriter(filename) as writer:
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer:
b5.to_excel(writer, sheet_name="batch 5", index=False)
remaining.to_excel(writer, sheet_name="batch 6", index=False)
all_together = pd.concat(
[b1, b2, b3, b4, b5, remaining]
)
all_together = pd.concat([b1, b2, b3, b4, b5, remaining])

View file

@ -110,14 +110,17 @@ import pandas as pd
# Solar PV savings - we need the amount of solar PV bill savings
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
RecommendationMaterials,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from collections import defaultdict
PORTFOLIO_ID = 485 # Peabody
SCENARIOS = [
970
]
SCENARIOS = [970]
scenario_names = {
970: "EPC C - no solid floor, ashp 3.0",
}
@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Properties
# --------------------
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties_data = [
{
**{col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns},
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns},
**{
col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for p in properties_query
]
@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Plans
# --------------------
plans_query = session.query(Plan).filter(
Plan.scenario_id.in_(scenario_ids)
).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendations (NO materials yet)
# --------------------
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan,
Plan.id == PlanRecommendations.plan_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False),
)
.all()
)
recommendations_data = [
{
**{col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns},
**{
col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns
},
"scenario_id": r.scenario_id,
"materials": [] # placeholder
"materials": [], # placeholder
}
for r in recommendations_query
]
@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendation materials (SEPARATE QUERY)
# --------------------
materials_query = session.query(
RecommendationMaterials
).filter(
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
).all()
materials_query = (
session.query(RecommendationMaterials)
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
.all()
)
# Group materials by recommendation_id
materials_by_recommendation = defaultdict(list)
for m in materials_query:
materials_by_recommendation[m.recommendation_id].append({
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
})
materials_by_recommendation[m.recommendation_id].append(
{
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
}
)
# Attach materials safely (no filtering side effects)
for r in recommendations_data:
@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
properties_df.to_excel(writer, sheet_name="properties", index=False)
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
# # Check tenures
# initial_asset_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "

View file

@ -4,7 +4,7 @@ import pandas as pd
full_sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
"SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
# ------Pull in the reduced sample ------
@ -12,7 +12,7 @@ full_sal = pd.read_excel(
reduced_sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
"ownership filtered sal.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
# ------ Pull in the confirmed ownership column from Peabody ------
@ -20,18 +20,20 @@ new_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
"- Peabody "
"- Data Extracts for Domna v2.xlsx",
sheet_name="Properties"
sheet_name="Properties",
)
correct_sample = new_asset_data[
~new_asset_data["AH Tenure"].isin(
["Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership"]
[
"Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership",
]
)
].copy()
@ -41,9 +43,7 @@ stuff_to_add = correct_sample[
~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
]["UPRN"].values
sal_to_add = full_sal[
full_sal["domna_property_id"].isin(stuff_to_add)
].copy()
sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy()
# ------- Stuff to remove -------
stuff_to_remove = reduced_sal[
@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session, db_read_session
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()

View file

@ -7,7 +7,7 @@ from sqlalchemy.sql import true
from backend.app.db.utils import row2dict
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from backend.app.utils import sap_to_epc
EPC_COLOURS = {
@ -17,7 +17,7 @@ EPC_COLOURS = {
"D": "#fdd401",
"E": "#fdab67",
"F": "#ee8023",
"G": "#e71437"
"G": "#e71437",
}
@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id:
its associated default recommendations if any.
"""
# Adjust the join to correctly filter recommendations while including all properties
query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
(Recommendation.property_id == PropertyModel.id) & (
Recommendation.default == true())) \
.filter(PropertyModel.portfolio_id == portfolio_id) \
query = (
session.query(PropertyModel, Recommendation)
.outerjoin(
Recommendation,
(Recommendation.property_id == PropertyModel.id)
& (Recommendation.default == true()),
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties = {}
for property, recommendation in query:
# Ensure the property is added once with an empty list of recommendations initially
if property.id not in properties:
properties[property.id] = row2dict(property)
properties[property.id]['recommendations'] = []
properties[property.id]["recommendations"] = []
# Append recommendations if they exist and meet the criteria (already filtered by the query)
if recommendation and recommendation.default:
properties[property.id]['recommendations'].append(row2dict(recommendation))
properties[property.id]["recommendations"].append(row2dict(recommendation))
return list(properties.values())
@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
:return: A list of dictionaries, where each dictionary represents a property's details.
Returns an empty list if no property details are found.
"""
property_details = session.query(PropertyDetailsEpcModel).filter(
PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
property_details = (
session.query(PropertyDetailsEpcModel)
.filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id)
.all()
)
# Convert the SQLAlchemy objects to dictionaries
property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
property_details_dict = (
[row2dict(pd) for pd in property_details] if property_details else []
)
return property_details_dict
@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
:return: A list of dictionaries, where each dictionary represents a plan.
Returns an empty list if no plans are found.
"""
plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
plans = (
session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all()
)
# Convert the SQLAlchemy objects to dictionaries
plans_dict = [row2dict(plan) for plan in plans] if plans else []
@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
return plans_dict
def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
def plot_epc_distribution(
df,
customer_key,
title="Your Units",
background_color="white",
bar_height=0.4,
font_size=15,
):
"""
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color
:param font_size: Base font size for text annotations (default 15)
"""
# Calculate dynamic figure size or adjust based on preferences
square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries
square_size = max(
6, len(df) * 0.6
) # Ensure minimum size and adjust based on number of entries
fig, ax = plt.subplots(figsize=(square_size, square_size))
fig.patch.set_facecolor(background_color) # Set figure background color
ax.set_facecolor(background_color) # Set axes background color
df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values('percentage', ascending=True)
df["percentage"] = df["percentage"].round(
1
) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values("percentage", ascending=True)
# Plot bars with specified height for adjustable thickness
bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
bars = ax.barh(
df_sorted["current_epc_rating"],
df_sorted["percentage"],
color=df_sorted["current_epc_rating"].map(EPC_COLOURS),
edgecolor="none",
height=bar_height,
)
epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size
count_percentage_font_size = font_size # Count (percentage) font size as base font size
epc_rating_font_size = (
font_size * 2
) # EPC rating font size larger than base font size
count_percentage_font_size = (
font_size # Count (percentage) font size as base font size
)
# Annotate bars with EPC ratings inside and count with percentage values outside
for index, bar in enumerate(bars):
width = bar.get_width()
epc_rating = df_sorted.iloc[index]['current_epc_rating']
count = df_sorted.iloc[index]['count']
percentage = df_sorted.iloc[index]['percentage']
epc_rating = df_sorted.iloc[index]["current_epc_rating"]
count = df_sorted.iloc[index]["count"]
percentage = df_sorted.iloc[index]["percentage"]
# EPC rating inside the bar with increased font size
ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
ax.text(
width - (width * 0.05),
bar.get_y() + bar.get_height() / 2,
f"{epc_rating}",
va="center",
ha="right",
color="white",
fontsize=epc_rating_font_size,
)
# Count and percentage outside the bar, original font size
ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
ax.text(
width + 1,
bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)",
va="center",
color="black",
fontsize=count_percentage_font_size,
)
ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally
ax.tick_params(axis='x', which='both', bottom=False, top=False,
labelbottom=False) # Remove x-axis tick marks and values
ax.tick_params(axis='y', which='both', left=False, right=False,
labelleft=False) # Remove y-axis tick marks and labels
ax.spines['top'].set_visible(False) # Remove top spine
ax.spines['right'].set_visible(False) # Remove right spine
ax.spines['left'].set_visible(False) # Remove left spine
ax.spines['bottom'].set_visible(False) # Remove bottom spine
ax.set_title(
title, fontsize=font_size * 1.2
) # Adjust title font size proportionally
ax.tick_params(
axis="x", which="both", bottom=False, top=False, labelbottom=False
) # Remove x-axis tick marks and values
ax.tick_params(
axis="y", which="both", left=False, right=False, labelleft=False
) # Remove y-axis tick marks and labels
ax.spines["top"].set_visible(False) # Remove top spine
ax.spines["right"].set_visible(False) # Remove right spine
ax.spines["left"].set_visible(False) # Remove left spine
ax.spines["bottom"].set_visible(False) # Remove bottom spine
plt.tight_layout() # Adjust layout
plt.show()
# Save the figure as an image
figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
fig.savefig(figure_path, bbox_inches='tight')
figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png"
fig.savefig(figure_path, bbox_inches="tight")
plt.close(fig) # Close the figure to free memory
return fig, figure_path
def save_plot_to_image(figure, path='plot.png'):
def save_plot_to_image(figure, path="plot.png"):
"""
Saves a matplotlib figure to an image file for insertion into PowerPoint.
"""
figure.savefig(path, bbox_inches='tight')
figure.savefig(path, bbox_inches="tight")
plt.close(figure)
def save_figure_as_image(figure, filename='temp_plot.png'):
def save_figure_as_image(figure, filename="temp_plot.png"):
"""
Saves a matplotlib figure to an image file.
"""
figure.savefig(filename, dpi=300)
plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments
plt.close(
figure
) # Close the figure to prevent it from displaying in notebooks or Python environments
def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
height_inches=Inches(2)):
def add_commentary_with_bullets(
slide,
commentary,
top_inches,
left_inches=Inches(1),
width_inches=Inches(8),
height_inches=Inches(2),
):
"""
Adds commentary with bullet points to a slide.
@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
:param width_inches: The width of the commentary text box.
:param height_inches: The height of the commentary text box.
"""
txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
txBox = slide.shapes.add_textbox(
left_inches, top_inches, width_inches, height_inches
)
tf = txBox.text_frame
# Configure text frame
@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
for i, section in enumerate(sections):
if i > 0:
p = tf.add_paragraph() # Add a new paragraph for each section after the first
p = (
tf.add_paragraph()
) # Add a new paragraph for each section after the first
else:
p = tf.paragraphs[0] # Use the first paragraph for the first section
p.text = section
@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None):
# Determine the position of the commentary text box based on whether an image is included
if img_path:
# Add the image
slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
slide.shapes.add_picture(
img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)
)
# Position for commentary when image is present
commentary_top = Inches(6)
else:
@ -237,16 +300,18 @@ def create_powerpoint(data, save_location):
prs = Presentation()
for slide, slide_data in data.items():
slide_figure_path = data[slide].get('image_path')
text = data[slide].get('text')
title = data[slide].get('title', "")
slide_figure_path = data[slide].get("image_path")
text = data[slide].get("text")
title = data[slide].get("title", "")
add_slide_with_image(prs, title, slide_figure_path, text)
# Save the presentation
prs.save(save_location)
def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
def create_recommendations_summary(
recommendations_df, properties_df, property_details_df, sap_target
):
# Aggregate the impact of the recommendations
# We want:
# Total number of sap points
@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d
# total bill savings
# total cost
# Total Co2 impact
recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum")
).reset_index()
recommendations_summary = (
recommendations_df.groupby(["property_id"])
.agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum"),
)
.reset_index()
)
# Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
recommendations_summary = recommendations_summary.merge(
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
how="left"
properties_df[["id", "uprn", "current_sap_points"]].rename(
columns={"id": "property_id"}
),
on="property_id",
how="left",
)
recommendations_summary["expected_sap_points"] = (
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
recommendations_summary["current_sap_points"]
+ recommendations_summary["total_sap_points"]
)
recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
lambda x: sap_to_epc(x)
recommendations_summary["expected_epc_rating"] = recommendations_summary[
"expected_sap_points"
].apply(lambda x: sap_to_epc(x))
recommendations_summary["sap_difference"] = (
sap_target - recommendations_summary["expected_sap_points"]
)
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
if property_details_df is not None:
recommendations_summary = recommendations_summary.merge(
property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
property_details_df[
["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]
].rename(
columns={
"id": "property_id",
"co2_emissions": "current_co2",
"adjusted_energy_consumption": "current_energy",
"energy_bill": "current_energy_bill"
"energy_bill": "current_energy_bill",
}
),
on="uprn",
how="left"
how="left",
)
return recommendations_summary

View file

@ -1,4 +1,4 @@
[pytest]
pythonpath = .
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests

View file

@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine, db_read_session
from backend.app.db.models.recommendations import (
Recommendation,
Plan,
PlanModel,
PlanRecommendations,
RecommendationMaterials,
)
@ -73,12 +73,12 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
latest_plans_subq = (
session.query(
Plan.scenario_id,
Plan.property_id,
func.max(Plan.created_at).label("latest_created_at"),
PlanModel.scenario_id,
PlanModel.property_id,
func.max(PlanModel.created_at).label("latest_created_at"),
)
.filter(Plan.scenario_id.in_(scenario_ids))
.group_by(Plan.scenario_id, Plan.property_id)
.filter(PlanModel.scenario_id.in_(scenario_ids))
.group_by(PlanModel.scenario_id, PlanModel.property_id)
.subquery()
)
@ -87,12 +87,12 @@ def get_data(portfolio_id, scenario_ids):
# ).all()
plans_query = (
session.query(Plan)
session.query(PlanModel)
.join(
latest_plans_subq,
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
& (Plan.property_id == latest_plans_subq.c.property_id)
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
(PlanModel.scenario_id == latest_plans_subq.c.scenario_id)
& (PlanModel.property_id == latest_plans_subq.c.property_id)
& (PlanModel.created_at == latest_plans_subq.c.latest_created_at),
)
.all()
)
@ -108,7 +108,7 @@ def get_data(portfolio_id, scenario_ids):
# )
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -118,12 +118,14 @@ def get_data(portfolio_id, scenario_ids):
# Recommendations (NO materials yet)
# --------------------
recommendations_query = (
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
session.query(
Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id
)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),

View file

@ -1,7 +1,13 @@
import logging
from os import PathLike
from typing import Optional, Union
def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
def setup_logger(
log_file: Optional[Union[str, PathLike[str]]] = None,
level: int = logging.INFO,
overwrite_handler: bool = False,
) -> logging.Logger:
# Create a logger and set the logging level
logger = logging.getLogger()
logger.setLevel(level)

View file

@ -17,11 +17,11 @@ def read_from_s3(bucket_name, s3_file_name):
:param s3_file_name: The file name to use for the saved data in S3
"""
# Initialize a session using Amazon S3
s3 = boto3.resource('s3')
s3 = boto3.resource("s3")
# Get the MessagePack data from S3
obj = s3.Object(bucket_name, s3_file_name)
data = obj.get()['Body'].read()
data = obj.get()["Body"].read()
return data
@ -36,7 +36,7 @@ def save_data_to_s3(data, bucket_name, s3_file_name):
"""
# Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles
try:
s3 = boto3.client('s3')
s3 = boto3.client("s3")
except NoCredentialsError:
print("Credentials not available.")
return
@ -46,12 +46,12 @@ def save_data_to_s3(data, bucket_name, s3_file_name):
try:
s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data)
print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}')
print(f"Successfully uploaded data to {bucket_name}/{s3_file_name}")
except Exception as e:
print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}')
print(f"Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}")
def read_io_from_s3(bucket_name, file_key):
def read_io_from_s3(bucket_name: str, file_key: str) -> BytesIO:
"""
Read a file from S3 into a BytesIO object. This can be used by other methods to parse the response
@ -61,13 +61,13 @@ def read_io_from_s3(bucket_name, file_key):
:param file_key: The file name of the shapefile in S3
:return: Io file to be parsed by another method
"""
client = boto3.client('s3')
client = boto3.client("s3")
# Get the Parquet file from S3
response = client.get_object(Bucket=bucket_name, Key=file_key)
# Read the file into an io object
buffer = BytesIO(response['Body'].read())
buffer = BytesIO(response["Body"].read())
return buffer
@ -86,7 +86,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
df.to_parquet(parquet_buffer)
# Create the boto3 client
client = boto3.client('s3')
client = boto3.client("s3")
# Upload the Parquet file to S3
client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue())
@ -102,15 +102,14 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
"""
if bucket_name is None:
raise ValueError("Bucket name is None when trying to read dataframe from parquet")
raise ValueError(
"Bucket name is None when trying to read dataframe from parquet"
)
if not file_key.endswith(".parquet"):
raise ValueError("This file doesn't look like a parquet file")
parquet_buffer = read_io_from_s3(
bucket_name=bucket_name,
file_key=file_key
)
parquet_buffer = read_io_from_s3(bucket_name=bucket_name, file_key=file_key)
df = pd.read_parquet(parquet_buffer)
@ -130,7 +129,7 @@ def save_csv_to_s3(dataframe, bucket_name, file_name):
bool: True if the file was successfully saved, False otherwise.
"""
# Initialize S3 client
s3 = boto3.client('s3')
s3 = boto3.client("s3")
# Create an in-memory text stream
csv_buffer = StringIO()
@ -159,7 +158,7 @@ def save_pickle_to_s3(data, bucket_name, s3_file_name):
try:
serialized_data = pickle.dumps(data)
except Exception as e:
print(f'Failed to serialize data: {str(e)}')
print(f"Failed to serialize data: {str(e)}")
return
# Use save_data_to_s3 function to upload the serialized data to S3
@ -175,9 +174,9 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
:return: The data read from the pickle file
"""
try:
s3 = boto3.client('s3')
s3 = boto3.client("s3")
s3_response = s3.get_object(Bucket=bucket_name, Key=s3_file_name)
serialized_data = s3_response['Body'].read()
serialized_data = s3_response["Body"].read()
except NoCredentialsError:
logger.errpr("Credentials not available.")
return None
@ -185,20 +184,24 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
logger.errpr("Incomplete credentials provided.")
return None
except Exception as e:
logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
logger.error(
f"Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}"
)
return None
# Deserialize data from pickle format
try:
data = pickle.loads(serialized_data)
except Exception as e:
logger.error(f'Failed to deserialize data: {str(e)}')
logger.error(f"Failed to deserialize data: {str(e)}")
return None
return data
def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None):
def read_excel_from_s3(
bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None
):
"""
Read an Excel file from an S3 bucket and return it as a pandas DataFrame.
@ -222,7 +225,7 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, shee
# Drop columns where all values are NaN
if drop_all_na:
df.dropna(axis=1, how='all', inplace=True)
df.dropna(axis=1, how="all", inplace=True)
# Reset index if the first column is just an index or entirely NaN
df.reset_index(drop=True, inplace=True)
@ -254,7 +257,7 @@ def save_excel_to_s3(df, bucket_name, file_key):
# Initialize a session using boto3
session = boto3.session.Session()
s3 = session.resource('s3')
s3 = session.resource("s3")
# Upload the Excel file from the buffer to S3
bucket = s3.Bucket(bucket_name)
@ -264,17 +267,19 @@ def save_excel_to_s3(df, bucket_name, file_key):
def read_csv_from_s3(bucket_name, filepath):
logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'")
s3 = boto3.client('s3')
logger.info(
f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'"
)
s3 = boto3.client("s3")
# Get the object from s3
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
# Read the CSV body from the s3 object
body = s3_object['Body'].read()
body = s3_object["Body"].read()
# Use StringIO to create a file-like object from the string
csv_data = StringIO(body.decode('utf-8'))
csv_data = StringIO(body.decode("utf-8"))
# Use csv library to read it into a list of dictionaries
reader = csv.DictReader(csv_data)
@ -292,14 +297,16 @@ def list_files_in_s3_folder(bucket_name, folder_name):
:return: A list of file keys in the specified S3 folder.
"""
try:
s3 = boto3.client('s3')
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
if 'Contents' not in response:
logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
if "Contents" not in response:
logger.info(
f"No files found in folder {folder_name} in bucket {bucket_name}."
)
return []
file_keys = [content['Key'] for content in response['Contents']]
file_keys = [content["Key"] for content in response["Contents"]]
return file_keys
except NoCredentialsError:
@ -309,7 +316,9 @@ def list_files_in_s3_folder(bucket_name, folder_name):
logger.error("Incomplete credentials provided.")
return []
except Exception as e:
logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
logger.error(
f"Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}"
)
return []
@ -335,22 +344,30 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
"""
# For this function, folder_name should end with a forward slash
if not folder_name.endswith('/'):
folder_name += '/'
if not folder_name.endswith("/"):
folder_name += "/"
try:
s3 = boto3.client('s3')
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/')
s3 = boto3.client("s3")
response = s3.list_objects_v2(
Bucket=bucket_name, Prefix=folder_name, Delimiter="/"
)
items = []
# Add files to the list
if 'Contents' in response:
items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name])
if "Contents" in response:
items.extend(
[
content["Key"]
for content in response["Contents"]
if content["Key"] != folder_name
]
)
# Add immediate subfolders to the list
if 'CommonPrefixes' in response:
items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']])
if "CommonPrefixes" in response:
items.extend([prefix["Prefix"] for prefix in response["CommonPrefixes"]])
return items
@ -361,7 +378,9 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
logger.error("Incomplete credentials provided.")
return []
except Exception as e:
logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}')
logger.error(
f"Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}"
)
return []
@ -374,15 +393,21 @@ def list_xmls_in_s3_folder(bucket_name, folder_name):
:return: A list of XML file keys in the specified S3 folder.
"""
try:
s3 = boto3.client('s3')
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
if 'Contents' not in response:
logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
if "Contents" not in response:
logger.info(
f"No files found in folder {folder_name} in bucket {bucket_name}."
)
return []
# Filter XML files
xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')]
xml_files = [
content["Key"]
for content in response["Contents"]
if content["Key"].endswith(".xml")
]
return xml_files
except NoCredentialsError:
@ -392,5 +417,7 @@ def list_xmls_in_s3_folder(bucket_name, folder_name):
logger.error("Incomplete credentials provided.")
return []
except Exception as e:
logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
logger.error(
f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}"
)
return []