Model/backend/Outputs.py
Khalim Conn-Kowlessar af5dbe325d feat(modelling): cut plan→recommendation readers onto plan_id
Rewrite the three structurally-identical m2m-join readers
(portfolio_functions.aggregate_portfolio_recommendations,
Outputs.get_recommendations_from_db, export get_recommendations) to join
PlanModel directly via recommendation.plan_id, dropping the plan_recommendations
join and its now-unused import. The writers set plan_id (prior slice), so the
rows resolve. test_export pins the export reader through the cut (its fixtures
now set recommendation.plan_id). A portfolio_functions DB characterization test
lands with the scenario consolidation (which provides the full-parity scenario
table the aggregation writes to).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 21:09:43 +00:00

390 lines
14 KiB
Python

import msgpack
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from datetime import datetime
from utils.s3 import read_from_s3, save_excel_to_s3
from backend.app.utils import sap_to_epc
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
)
class Outputs:
FORMATS = ["mds"]
MDS_MEASURE_MAPPING = {
"external_wall_insulation": "EWI (Trad Const)",
"cavity_wall_insulation": "CWI",
"loft_insulation": "LI",
"party_wall_insulation": "Party Wall Insu",
"internal_wall_insulation": "IWI (POA - Prov Sum Only)",
"suspended_floor_insulation": "U/F Insu (Manual install)",
"solid_floor_insulation": "Solid floor insl (Out of scope - Prov sum only)",
"air_source_heat_pump": "ASHP Htg",
"ground_source_heat_pump": "GSHP Htg",
"shared_ground_loops": "Shared ground loops",
"communal_heat_networks": "Communal heat networks",
"district_heating_networks": "District heating networks",
"high_heat_retention_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)",
"low_energy_lighting": "Low Energy Bulbs",
"cylinder_insulation": "Cyl Insulation",
"smart_controls": "Smart controls",
"zone_controls": "Zone controls",
"trvs": "Upgrade TRV's",
"solar_pv": "Solar PV",
"solar_thermal": "Solar Thermal",
"double_glazing": "Double Glazing (POA - Prov sum only)",
"draught_proofing": "Draught Proofing",
"mechanical_ventilation": "Ventilation upgrade",
"gas_boiler": "Gas Boiler Replacement",
"flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
"room_in_roof_insulation": "RIR (POA - Prov sum only)",
"ev_charging": "EV Charging",
"battery": "Battery",
}
def __init__(self, format, portfolio_id):
"""
This class handles the creation of standard outputs for the backend. For example, creation of
an excel output, to be used for the MDS data sheet, required by E.ON
:param format: The format of the output, e.g. mds
:param portfolio_id: The id of the portfolio for which the output is being created
"""
if format not in self.FORMATS:
raise ValueError("Invalid format, should be one of {}".format(self.FORMATS))
self.format = format
self.portfolio_id = portfolio_id
self.today = datetime.now().strftime("%Y-%m-%d")
# Connect to the database
self.session = sessionmaker(bind=db_engine)()
# Download cleaned data
self.cleaned_epc_lookup = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev",
)
self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)
def get_properties_from_db(self):
# Get properties and their details for a specific portfolio
properties_query = (
self.session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(
PropertyModel.portfolio_id
== self.portfolio_id # Filter by portfolio ID
)
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
return properties_data
def get_plans_from_db(self):
plans_query = (
self.session.query(PlanModel)
.filter(PlanModel.portfolio_id == self.portfolio_id)
.all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
return plans_data
def get_recommendations_from_db(self, plan_ids):
# Get default recommendations for those plans, linked by recommendation.plan_id
recommendations_query = (
self.session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanModel,
PlanModel.id == Recommendation.plan_id, # access scenario_id
)
.filter(
Recommendation.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
return recommendations_data
def make_mds_measure_matrix(self, scenario_recommendations):
all_measures = list(self.MDS_MEASURE_MAPPING.values())
# Collect rows in a list
rows = []
# Populate the rows list
for idx, row in scenario_recommendations.iterrows():
property_id = row["property_id"]
measure_type = row["measure_type"]
# Get the label for the current type
measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)
# If the property_id already exists in the collected rows, update it
existing_row = next(
(item for item in rows if item["property_id"] == property_id), None
)
if existing_row is None:
# Create a new row if the property_id doesn't exist
new_row = {measure: None for measure in all_measures}
new_row["property_id"] = property_id
rows.append(new_row)
else:
new_row = existing_row
# Set the corresponding measure label in the row
new_row[measure_label] = measure_label
# Convert the list of dictionaries to a DataFrame
matrix = pd.DataFrame(rows)
# Reset the index for cleanliness
matrix.reset_index(drop=True, inplace=True)
return matrix
def export_mds(self):
"""
This function will export the data in the MDS format
Core data required:
- Property address
- Property postcode
- uprn
- recommended measures
- pre-EPC
- pre-SAP
- pre Heat Demand
- Property Type
- Built form
- Wall type
- Tenure
- Fuel type
- Estimated bill
- Recommended measures
- Post EPC
- Post heat demand
- Bill savings
- Kwh savings
"""
self.session.begin()
properties_data = self.get_properties_from_db()
plans_data = self.get_plans_from_db()
plan_ids = [plan["id"] for plan in plans_data]
recommendations_data = self.get_recommendations_from_db(plan_ids)
self.session.close()
# Convert these tables to dataframes
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
scenario_ids = plans_df["scenario_id"].unique()
# We start to create the MDS sheet
mds = (
properties_df[
[
"property_id",
"address",
"postcode",
"uprn",
"current_epc_rating",
"current_sap_points",
"primary_energy_consumption",
"property_type",
"built_form",
"total_floor_area",
"walls",
"tenure",
"mainfuel",
# The bills columns are split out - we include them and aggregate, without appliances
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
]
.copy()
.rename(
columns={
"address": "Address",
"postcode": "Postcode",
"uprn": "UPRN",
"current_epc_rating": "Pre EPC",
"current_sap_points": "EPC Source",
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
"property_type": "Property Type",
"built_form": "Built Form",
"total_floor_area": "Floor area m2 (If known)",
"walls": "Wall Type (Mandatory field)",
"tenure": "Tenure",
}
)
)
mds["Estimated bill (£ per year)"] = (
mds["heating_cost_current"]
+ mds["hot_water_cost_current"]
+ mds["lighting_cost_current"]
+ mds["gas_standing_charge"]
+ mds["electricity_standing_charge"]
)
mds = mds.drop(
columns=[
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
)
# Formatting - Pre EPC is an enum
mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
mds["Wall Type (Mandatory field)"] = (
mds["Wall Type (Mandatory field)"].str.split(",").str[0]
)
# Remove average thermal transmittance field
mds["Wall Type (Mandatory field)"] = np.where(
mds["Wall Type (Mandatory field)"].str.contains(
"Average thermal transmittance"
),
"",
mds["Wall Type (Mandatory field)"],
)
mds = mds.merge(
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
["clean_description", "fuel_type"]
],
left_on="mainfuel",
right_on="clean_description",
how="left",
)
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
columns=["clean_description", "mainfuel"]
)
mds["Existing Fuel Type"].value_counts()
mds_output_by_scenario = {}
for scenario_id in scenario_ids:
scenario_recommendations = recommendations_df[
recommendations_df["Scenario ID"] == scenario_id
]
# For each measure, we create the measure matrix
scenario_measure_matrix = self.make_mds_measure_matrix(
scenario_recommendations
)
# Calculate the predicted impact on: SAP, heat demand, bills, kwh
recommendation_impacts = (
scenario_recommendations.groupby("property_id")[
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
]
.sum()
.reset_index()
)
scenario_mds = mds.merge(
scenario_measure_matrix, how="left", on="property_id"
).merge(recommendation_impacts, how="left", on="property_id")
# If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
for col in to_clean:
scenario_mds[col].fillna(0, inplace=True)
scenario_mds.fillna(0, inplace=True)
scenario_mds["Post SAP"] = (
scenario_mds["EPC Source"] + scenario_mds["sap_points"]
)
# Round Post SAP down to the nearest integer
scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
lambda x: sap_to_epc(x)
)
scenario_mds["Heating Demand Kwh/m2/y"] = (
scenario_mds["Existing Heating Demand Kwh/m2/y"]
- scenario_mds["heat_demand"]
)
scenario_mds = scenario_mds.rename(
columns={
"sap_points": "Predicted SAP Points",
"kwh_savings": "Energy Saving (Kwh)",
"energy_cost_savings": "Bill Reduction (£ per yr)",
}
)
mds_output_by_scenario[scenario_id] = scenario_mds
# We now save them to s3 as excels
for scenario_id, scenario_mds in mds_output_by_scenario.items():
save_excel_to_s3(
df=scenario_mds,
file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
bucket_name="retrofit-data-dev",
)
def export(self):
"""
This function will export the data in the required format
"""
if self.format == "mds":
self.export_mds()
raise NotImplementedError("Export format not implemented")