import msgpack import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from datetime import datetime from utils.s3 import read_from_s3, save_excel_to_s3 from backend.app.utils import sap_to_epc from backend.app.db.connection import db_engine from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import ( Recommendation, PlanModel, PlanRecommendations, ) class Outputs: FORMATS = ["mds"] MDS_MEASURE_MAPPING = { "external_wall_insulation": "EWI (Trad Const)", "cavity_wall_insulation": "CWI", "loft_insulation": "LI", "party_wall_insulation": "Party Wall Insu", "internal_wall_insulation": "IWI (POA - Prov Sum Only)", "suspended_floor_insulation": "U/F Insu (Manual install)", "solid_floor_insulation": "Solid floor insl (Out of scope - Prov sum only)", "air_source_heat_pump": "ASHP Htg", "ground_source_heat_pump": "GSHP Htg", "shared_ground_loops": "Shared ground loops", "communal_heat_networks": "Communal heat networks", "district_heating_networks": "District heating networks", "high_heat_retention_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)", "low_energy_lighting": "Low Energy Bulbs", "cylinder_insulation": "Cyl Insulation", "smart_controls": "Smart controls", "zone_controls": "Zone controls", "trvs": "Upgrade TRV's", "solar_pv": "Solar PV", "solar_thermal": "Solar Thermal", "double_glazing": "Double Glazing (POA - Prov sum only)", "draught_proofing": "Draught Proofing", "mechanical_ventilation": "Ventilation upgrade", "gas_boiler": "Gas Boiler Replacement", "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)", "room_in_roof_insulation": "RIR (POA - Prov sum only)", "ev_charging": "EV Charging", "battery": "Battery", } def __init__(self, format, portfolio_id): """ This class handles the creation of standard outputs for the backend. For example, creation of an excel output, to be used for the MDS data sheet, required by E.ON :param format: The format of the output, e.g. mds :param portfolio_id: The id of the portfolio for which the output is being created """ if format not in self.FORMATS: raise ValueError("Invalid format, should be one of {}".format(self.FORMATS)) self.format = format self.portfolio_id = portfolio_id self.today = datetime.now().strftime("%Y-%m-%d") # Connect to the database self.session = sessionmaker(bind=db_engine)() # Download cleaned data self.cleaned_epc_lookup = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-dev", ) self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False) def get_properties_from_db(self): # Get properties and their details for a specific portfolio properties_query = ( self.session.query(PropertyModel, PropertyDetailsEpcModel) .join( PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id, ) .filter( PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID ) .all() ) # Transform properties data to include all fields dynamically properties_data = [ { **{ col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns }, **{ col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in PropertyDetailsEpcModel.__table__.columns }, } for prop in properties_query ] return properties_data def get_plans_from_db(self): plans_query = ( self.session.query(PlanModel) .filter(PlanModel.portfolio_id == self.portfolio_id) .all() ) # Transform plans data to include all fields dynamically plans_data = [ {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] return plans_data def get_recommendations_from_db(self, plan_ids): # Get recommendations through PlanRecommendations for those plans and that are default recommendations_query = ( self.session.query(Recommendation, PlanModel.scenario_id) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) .join( PlanModel, PlanModel.id == PlanRecommendations.plan_id, # Join with Plan to access scenario_id ) .filter( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default == True, # Filtering for default recommendations ) .all() ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ { **{ col.name: ( getattr(rec.Recommendation, col.name) if hasattr(rec, "Recommendation") else getattr(rec, col.name) ) for col in Recommendation.__table__.columns }, "Scenario ID": rec.scenario_id, } for rec in recommendations_query ] return recommendations_data def make_mds_measure_matrix(self, scenario_recommendations): all_measures = list(self.MDS_MEASURE_MAPPING.values()) # Collect rows in a list rows = [] # Populate the rows list for idx, row in scenario_recommendations.iterrows(): property_id = row["property_id"] measure_type = row["measure_type"] # Get the label for the current type measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None) # If the property_id already exists in the collected rows, update it existing_row = next( (item for item in rows if item["property_id"] == property_id), None ) if existing_row is None: # Create a new row if the property_id doesn't exist new_row = {measure: None for measure in all_measures} new_row["property_id"] = property_id rows.append(new_row) else: new_row = existing_row # Set the corresponding measure label in the row new_row[measure_label] = measure_label # Convert the list of dictionaries to a DataFrame matrix = pd.DataFrame(rows) # Reset the index for cleanliness matrix.reset_index(drop=True, inplace=True) return matrix def export_mds(self): """ This function will export the data in the MDS format Core data required: - Property address - Property postcode - uprn - recommended measures - pre-EPC - pre-SAP - pre Heat Demand - Property Type - Built form - Wall type - Tenure - Fuel type - Estimated bill - Recommended measures - Post EPC - Post heat demand - Bill savings - Kwh savings """ self.session.begin() properties_data = self.get_properties_from_db() plans_data = self.get_plans_from_db() plan_ids = [plan["id"] for plan in plans_data] recommendations_data = self.get_recommendations_from_db(plan_ids) self.session.close() # Convert these tables to dataframes properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) recommendations_df = pd.DataFrame(recommendations_data) scenario_ids = plans_df["scenario_id"].unique() # We start to create the MDS sheet mds = ( properties_df[ [ "property_id", "address", "postcode", "uprn", "current_epc_rating", "current_sap_points", "primary_energy_consumption", "property_type", "built_form", "total_floor_area", "walls", "tenure", "mainfuel", # The bills columns are split out - we include them and aggregate, without appliances "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", "gas_standing_charge", "electricity_standing_charge", ] ] .copy() .rename( columns={ "address": "Address", "postcode": "Postcode", "uprn": "UPRN", "current_epc_rating": "Pre EPC", "current_sap_points": "EPC Source", "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", "property_type": "Property Type", "built_form": "Built Form", "total_floor_area": "Floor area m2 (If known)", "walls": "Wall Type (Mandatory field)", "tenure": "Tenure", } ) ) mds["Estimated bill (£ per year)"] = ( mds["heating_cost_current"] + mds["hot_water_cost_current"] + mds["lighting_cost_current"] + mds["gas_standing_charge"] + mds["electricity_standing_charge"] ) mds = mds.drop( columns=[ "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", "gas_standing_charge", "electricity_standing_charge", ] ) # Formatting - Pre EPC is an enum mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values] mds["Wall Type (Mandatory field)"] = ( mds["Wall Type (Mandatory field)"].str.split(",").str[0] ) # Remove average thermal transmittance field mds["Wall Type (Mandatory field)"] = np.where( mds["Wall Type (Mandatory field)"].str.contains( "Average thermal transmittance" ), "", mds["Wall Type (Mandatory field)"], ) mds = mds.merge( pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[ ["clean_description", "fuel_type"] ], left_on="mainfuel", right_on="clean_description", how="left", ) mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop( columns=["clean_description", "mainfuel"] ) mds["Existing Fuel Type"].value_counts() mds_output_by_scenario = {} for scenario_id in scenario_ids: scenario_recommendations = recommendations_df[ recommendations_df["Scenario ID"] == scenario_id ] # For each measure, we create the measure matrix scenario_measure_matrix = self.make_mds_measure_matrix( scenario_recommendations ) # Calculate the predicted impact on: SAP, heat demand, bills, kwh recommendation_impacts = ( scenario_recommendations.groupby("property_id")[ ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] ] .sum() .reset_index() ) scenario_mds = mds.merge( scenario_measure_matrix, how="left", on="property_id" ).merge(recommendation_impacts, how="left", on="property_id") # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN to_clean = [c for c in recommendation_impacts.columns if c != "property_id"] for col in to_clean: scenario_mds[col].fillna(0, inplace=True) scenario_mds.fillna(0, inplace=True) scenario_mds["Post SAP"] = ( scenario_mds["EPC Source"] + scenario_mds["sap_points"] ) # Round Post SAP down to the nearest integer scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x)) scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply( lambda x: sap_to_epc(x) ) scenario_mds["Heating Demand Kwh/m2/y"] = ( scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"] ) scenario_mds = scenario_mds.rename( columns={ "sap_points": "Predicted SAP Points", "kwh_savings": "Energy Saving (Kwh)", "energy_cost_savings": "Bill Reduction (£ per yr)", } ) mds_output_by_scenario[scenario_id] = scenario_mds # We now save them to s3 as excels for scenario_id, scenario_mds in mds_output_by_scenario.items(): save_excel_to_s3( df=scenario_mds, file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx", bucket_name="retrofit-data-dev", ) def export(self): """ This function will export the data in the required format """ if self.format == "mds": self.export_mds() raise NotImplementedError("Export format not implemented")