Model/backend/Outputs.py

import msgpack
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from datetime import datetime

from utils.s3 import read_from_s3, save_excel_to_s3
from backend.app.utils import sap_to_epc
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import (
    Recommendation,
    PlanModel,
)


class Outputs:
    FORMATS = ["mds"]

    MDS_MEASURE_MAPPING = {
        "external_wall_insulation": "EWI (Trad Const)",
        "cavity_wall_insulation": "CWI",
        "loft_insulation": "LI",
        "party_wall_insulation": "Party Wall Insu",
        "internal_wall_insulation": "IWI (POA - Prov Sum Only)",
        "suspended_floor_insulation": "U/F Insu (Manual install)",
        "solid_floor_insulation": "Solid floor insl (Out of scope - Prov sum only)",
        "air_source_heat_pump": "ASHP Htg",
        "ground_source_heat_pump": "GSHP Htg",
        "shared_ground_loops": "Shared ground loops",
        "communal_heat_networks": "Communal heat networks",
        "district_heating_networks": "District heating networks",
        "high_heat_retention_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)",
        "low_energy_lighting": "Low Energy Bulbs",
        "cylinder_insulation": "Cyl Insulation",
        "smart_controls": "Smart controls",
        "zone_controls": "Zone controls",
        "trvs": "Upgrade TRV's",
        "solar_pv": "Solar PV",
        "solar_thermal": "Solar Thermal",
        "double_glazing": "Double Glazing (POA - Prov sum only)",
        "draught_proofing": "Draught Proofing",
        "mechanical_ventilation": "Ventilation upgrade",
        "gas_boiler": "Gas Boiler Replacement",
        "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
        "room_in_roof_insulation": "RIR (POA - Prov sum only)",
        "ev_charging": "EV Charging",
        "battery": "Battery",
    }

    def __init__(self, format, portfolio_id):
        """
        This class handles the creation of standard outputs for the backend. For example, creation of
        an excel output, to be used for the MDS data sheet, required by E.ON

        :param format: The format of the output, e.g. mds
        :param portfolio_id: The id of the portfolio for which the output is being created
        """

        if format not in self.FORMATS:
            raise ValueError("Invalid format, should be one of {}".format(self.FORMATS))

        self.format = format
        self.portfolio_id = portfolio_id
        self.today = datetime.now().strftime("%Y-%m-%d")

        # Connect to the database
        self.session = sessionmaker(bind=db_engine)()

        # Download cleaned data
        self.cleaned_epc_lookup = read_from_s3(
            s3_file_name="cleaned_epc_data/cleaned.bson",
            bucket_name="retrofit-data-dev",
        )

        self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)

    def get_properties_from_db(self):
        # Get properties and their details for a specific portfolio
        properties_query = (
            self.session.query(PropertyModel, PropertyDetailsEpcModel)
            .join(
                PropertyDetailsEpcModel,
                PropertyModel.id == PropertyDetailsEpcModel.property_id,
            )
            .filter(
                PropertyModel.portfolio_id
                == self.portfolio_id  # Filter by portfolio ID
            )
            .all()
        )

        # Transform properties data to include all fields dynamically
        properties_data = [
            {
                **{
                    col.name: getattr(prop.PropertyModel, col.name)
                    for col in PropertyModel.__table__.columns
                },
                **{
                    col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
                    for col in PropertyDetailsEpcModel.__table__.columns
                },
            }
            for prop in properties_query
        ]

        return properties_data

    def get_plans_from_db(self):

        plans_query = (
            self.session.query(PlanModel)
            .filter(PlanModel.portfolio_id == self.portfolio_id)
            .all()
        )
        # Transform plans data to include all fields dynamically
        plans_data = [
            {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
            for plan in plans_query
        ]

        return plans_data

    def get_recommendations_from_db(self, plan_ids):
        # Get default recommendations for those plans, linked by recommendation.plan_id
        recommendations_query = (
            self.session.query(Recommendation, PlanModel.scenario_id)
            .join(
                PlanModel,
                PlanModel.id == Recommendation.plan_id,  # access scenario_id
            )
            .filter(
                Recommendation.plan_id.in_(plan_ids),
                Recommendation.default == True,  # Filtering for default recommendations
            )
            .all()
        )

        # Transform recommendations data to include all fields dynamically and include scenario_id
        recommendations_data = [
            {
                **{
                    col.name: (
                        getattr(rec.Recommendation, col.name)
                        if hasattr(rec, "Recommendation")
                        else getattr(rec, col.name)
                    )
                    for col in Recommendation.__table__.columns
                },
                "Scenario ID": rec.scenario_id,
            }
            for rec in recommendations_query
        ]

        return recommendations_data

    def make_mds_measure_matrix(self, scenario_recommendations):
        all_measures = list(self.MDS_MEASURE_MAPPING.values())

        # Collect rows in a list
        rows = []

        # Populate the rows list
        for idx, row in scenario_recommendations.iterrows():
            property_id = row["property_id"]
            measure_type = row["measure_type"]

            # Get the label for the current type
            measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)

            # If the property_id already exists in the collected rows, update it
            existing_row = next(
                (item for item in rows if item["property_id"] == property_id), None
            )
            if existing_row is None:
                # Create a new row if the property_id doesn't exist
                new_row = {measure: None for measure in all_measures}
                new_row["property_id"] = property_id
                rows.append(new_row)
            else:
                new_row = existing_row

            # Set the corresponding measure label in the row
            new_row[measure_label] = measure_label

        # Convert the list of dictionaries to a DataFrame
        matrix = pd.DataFrame(rows)

        # Reset the index for cleanliness
        matrix.reset_index(drop=True, inplace=True)

        return matrix

    def export_mds(self):
        """
        This function will export the data in the MDS format
        Core data required:
        - Property address
        - Property postcode
        - uprn
        - recommended measures
        - pre-EPC
        - pre-SAP
        - pre Heat Demand
        - Property Type
        - Built form
        - Wall type
        - Tenure
        - Fuel type
        - Estimated bill
        - Recommended measures
        - Post EPC
        - Post heat demand
        - Bill savings
        - Kwh savings
        """

        self.session.begin()
        properties_data = self.get_properties_from_db()

        plans_data = self.get_plans_from_db()
        plan_ids = [plan["id"] for plan in plans_data]

        recommendations_data = self.get_recommendations_from_db(plan_ids)
        self.session.close()

        # Convert these tables to dataframes
        properties_df = pd.DataFrame(properties_data)
        plans_df = pd.DataFrame(plans_data)
        recommendations_df = pd.DataFrame(recommendations_data)

        scenario_ids = plans_df["scenario_id"].unique()

        # We start to create the MDS sheet
        mds = (
            properties_df[
                [
                    "property_id",
                    "address",
                    "postcode",
                    "uprn",
                    "current_epc_rating",
                    "current_sap_points",
                    "primary_energy_consumption",
                    "property_type",
                    "built_form",
                    "total_floor_area",
                    "walls",
                    "tenure",
                    "mainfuel",
                    # The bills columns are split out - we include them and aggregate, without appliances
                    "heating_cost_current",
                    "hot_water_cost_current",
                    "lighting_cost_current",
                    "gas_standing_charge",
                    "electricity_standing_charge",
                ]
            ]
            .copy()
            .rename(
                columns={
                    "address": "Address",
                    "postcode": "Postcode",
                    "uprn": "UPRN",
                    "current_epc_rating": "Pre EPC",
                    "current_sap_points": "EPC Source",
                    "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
                    "property_type": "Property Type",
                    "built_form": "Built Form",
                    "total_floor_area": "Floor area m2 (If known)",
                    "walls": "Wall Type (Mandatory field)",
                    "tenure": "Tenure",
                }
            )
        )

        mds["Estimated bill (£ per year)"] = (
            mds["heating_cost_current"]
            + mds["hot_water_cost_current"]
            + mds["lighting_cost_current"]
            + mds["gas_standing_charge"]
            + mds["electricity_standing_charge"]
        )

        mds = mds.drop(
            columns=[
                "heating_cost_current",
                "hot_water_cost_current",
                "lighting_cost_current",
                "gas_standing_charge",
                "electricity_standing_charge",
            ]
        )

        # Formatting - Pre EPC is an enum
        mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
        mds["Wall Type (Mandatory field)"] = (
            mds["Wall Type (Mandatory field)"].str.split(",").str[0]
        )
        # Remove average thermal transmittance field
        mds["Wall Type (Mandatory field)"] = np.where(
            mds["Wall Type (Mandatory field)"].str.contains(
                "Average thermal transmittance"
            ),
            "",
            mds["Wall Type (Mandatory field)"],
        )

        mds = mds.merge(
            pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
                ["clean_description", "fuel_type"]
            ],
            left_on="mainfuel",
            right_on="clean_description",
            how="left",
        )
        mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
            columns=["clean_description", "mainfuel"]
        )

        mds["Existing Fuel Type"].value_counts()

        mds_output_by_scenario = {}
        for scenario_id in scenario_ids:
            scenario_recommendations = recommendations_df[
                recommendations_df["Scenario ID"] == scenario_id
            ]

            # For each measure, we create the measure matrix
            scenario_measure_matrix = self.make_mds_measure_matrix(
                scenario_recommendations
            )

            # Calculate the predicted impact on: SAP, heat demand, bills, kwh
            recommendation_impacts = (
                scenario_recommendations.groupby("property_id")[
                    ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
                ]
                .sum()
                .reset_index()
            )

            scenario_mds = mds.merge(
                scenario_measure_matrix, how="left", on="property_id"
            ).merge(recommendation_impacts, how="left", on="property_id")
            # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
            to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
            for col in to_clean:
                scenario_mds[col].fillna(0, inplace=True)
            scenario_mds.fillna(0, inplace=True)
            scenario_mds["Post SAP"] = (
                scenario_mds["EPC Source"] + scenario_mds["sap_points"]
            )
            # Round Post SAP down to the nearest integer
            scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
            scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
                lambda x: sap_to_epc(x)
            )
            scenario_mds["Heating Demand Kwh/m2/y"] = (
                scenario_mds["Existing Heating Demand Kwh/m2/y"]
                - scenario_mds["heat_demand"]
            )

            scenario_mds = scenario_mds.rename(
                columns={
                    "sap_points": "Predicted SAP Points",
                    "kwh_savings": "Energy Saving (Kwh)",
                    "energy_cost_savings": "Bill Reduction (£ per yr)",
                }
            )

            mds_output_by_scenario[scenario_id] = scenario_mds

        # We now save them to s3 as excels
        for scenario_id, scenario_mds in mds_output_by_scenario.items():
            save_excel_to_s3(
                df=scenario_mds,
                file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
                bucket_name="retrofit-data-dev",
            )

    def export(self):
        """
        This function will export the data in the required format
        """
        if self.format == "mds":
            self.export_mds()

        raise NotImplementedError("Export format not implemented")