Model/etl/customers/newhaven/slides.py

from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from utils.s3 import read_csv_from_s3


def get_data(portfolio_id, scenario_ids):
    session = sessionmaker(bind=db_engine)()
    session.begin()

    # Get properties and their details for a specific portfolio
    properties_query = session.query(
        PropertyModel,
        PropertyDetailsEpcModel
    ).join(
        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
    ).filter(
        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
    ).all()

    # Transform properties data to include all fields dynamically
    properties_data = [
        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
            PropertyDetailsEpcModel.__table__.columns}}
        for prop in properties_query
    ]

    # Get property IDs from fetched properties

    # Get plans linked to the fetched properties
    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()

    # Transform plans data to include all fields dynamically
    plans_data = [
        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
        for plan in plans_query
    ]

    # Extract plan IDs for filtering recommendations through PlanRecommendations
    plan_ids = [plan['id'] for plan in plans_data]

    # Get recommendations through PlanRecommendations for those plans and that are default
    recommendations_query = session.query(
        Recommendation,
        Plan.scenario_id
    ).join(
        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
    ).join(
        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
    ).filter(
        PlanRecommendations.plan_id.in_(plan_ids),
        Recommendation.default == True  # Filtering for default recommendations
    ).all()

    # Transform recommendations data to include all fields dynamically and include scenario_id
    recommendations_data = [
        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
                                                                                                           col.name) for
            col in Recommendation.__table__.columns},
         "Scenario ID": rec.scenario_id}
        for rec in recommendations_query
    ]

    session.close()

    return properties_data, plans_data, recommendations_data


def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids):
    # properties_starting_with_electric_heating = properties_df[
    #     properties_df["mainfuel"].isin(
    #         ["Electricity not community", "Electricity electricity unspecified tariff"]
    #     )
    # ]["id"].tolist()

    # Get the recommendations for the scenario, default
    scenario_comparison_df = []
    scenario_comparison_df_2 = []
    cost_per_kwh_saved_table = []
    for scenario_id in scenario_ids:
        # Get the recommendations for the scenario, default
        scenario_recommendations = recommendations_df[
            (recommendations_df["Scenario ID"] == scenario_id) &
            (recommendations_df["default"] == True)
            ].copy()

        scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
            lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
            axis=1)
        scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
            lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)

        # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
        scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
            lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
                'kwh_savings'], axis=1)

        # We need to determine if any of the properties start with electric heating or end with it
        # property_electric_heating = []
        # for pid, recs in scenario_recommendations.groupby("property_id"):
        #     has_ashp = recs[recs["description"].str.contains("air source heat pump")]
        #     if not has_ashp.empty:
        #         property_electric_heating.append(pid)
        #         continue
        #     has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")]
        #     if not has_heating_rec.empty:
        #         property_electric_heating.append(pid)
        #         continue

        grouped_data = scenario_recommendations.groupby(['property_id']).agg({
            'Estimated Kwh Savings': 'sum',
            'ligting_kwh': 'sum',
            'solar_kwh': 'sum',
            "estimated_cost": "sum"
        }).reset_index()

        comparison = properties_df.drop_duplicates().merge(
            grouped_data, on=["property_id"], how="left"
        )

        comparison["Post Retrofit Heating & Hotwater kwh"] = (
            comparison["current_energy_demand_heating_hotwater"] - \
            comparison["Estimated Kwh Savings"]
        )

        avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()

        # We now, for properties that have a plan, do a before and after
        with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]

        avgs2 = with_savings[
            ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
        avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
            "Post Retrofit Heating & Hotwater kwh"]
        avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]

        # We also calculate the cost per kwh saves
        total_kwh_saved = (
            with_savings["Estimated Kwh Savings"].sum() +
            with_savings["ligting_kwh"].sum() +
            with_savings["solar_kwh"].sum()
        )
        total_cost = with_savings["estimated_cost"].sum()
        cost_per_kwh_saved = total_cost / total_kwh_saved

        scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
        scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
        cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved})

    scenario_comparison_population = pd.DataFrame(scenario_comparison_df)
    scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2)
    cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table)

    return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table


def slides():
    # Prepares the information required for the slides

    # Right now this is the second version of the nehaven portfolio
    portfolio_id = 90
    # Look at one scenario at a time, otherwise this is agony
    scenario_ids = [47, 48, 49, 50, 51]

    properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)

    properties_df = pd.DataFrame(properties_data)
    plans_df = pd.DataFrame(plans_data)
    recommendations_df = pd.DataFrame(recommendations_data)

    if properties_df.shape[0] != 2553:
        raise ValueError("The number of unique properties is not 2553")

    # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
    heating_hotwater_kwh = (
        properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']]
        .mean()
    )

    # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire
    # popoulation (incl those without retrofit) and for just those being retrofit
    # We also calculat the cost per kwh saved
    scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = (
        estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids)
    )

    # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
    # We need to take recommndations that affect just the heating and hot water

    # By property

    recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace(
        {
            "loft_insulation": "roof_insulation",
            "room_roof_insulation": "roof_insulation",
            "flat_roof_insulation": "roof_insulation",
            "hot_water_tank_insulation": "other",
            "cylinder_thermostat": "other",
            "sealing_open_fireplace": "other",
            "suspended_floor_insulation": "floor_insulation",
            "solid_floor_insulation": "floor_insulation",
        }
    )

    recommendations_df["type_mapped"] = np.where(
        recommendations_df["description"].str.contains("air source heat pump"),
        "air_source_heat_pump",
        recommendations_df["type_mapped"]
    )

    # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
    recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby(
        ['Scenario ID', 'type_mapped']
    ).agg({
        'property_id': 'nunique'
    }).reset_index()

    recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties']
    recommendation_summary["Percentage of Properties"] = 100 * (
        recommendation_summary["Number of Properties"] / properties_df["id"].nunique()
    )

    recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])]

    # MVP implementation of funding estimation for the most basic scenario, using GBIS

    project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv")

    def find_abs(sap_movement, starting_sap, floor_area):
        starting_band = find_band(starting_sap)
        finishing_band = find_band(starting_sap + sap_movement)
        if starting_band == finishing_band:
            return 0

        if floor_area <= 72:
            floor_area_segment = '0-72'
        elif (floor_area > 72) and (floor_area <= 97):
            floor_area_segment = "73-97"
        elif (floor_area > 97) and (floor_area <= 199):
            floor_area_segment = "98-199"
        else:
            floor_area_segment = "200+"

        return project_scores_matrix[
            (project_scores_matrix["Floor Area Segment"] == floor_area_segment) &
            (project_scores_matrix["Starting Band"] == starting_band) &
            (project_scores_matrix["Finishing Band"] == finishing_band)
            ].squeeze()["Cost Savings"]

    eco4_scores_sap_table = [
        {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
        {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
        {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
        {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
        {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
        {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
        {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
        {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
        {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
        {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
        {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
        {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
        {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
        {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
    ]
    eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table)

    def find_band(value):
        # Iterate through each row in the DataFrame to find the correct band
        value_floored = np.floor(value)
        return eco4_scores_sap_table[
            (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored)
            ].squeeze()["Band"]

    def identify_funding_measure(p, p_recs, is_social):
        measures = ["cavity_wall_insulation", "loft_insulation"]
        property_abs = []
        for m in measures:
            funding_measure = p_recs[p_recs["type"] == m]
            if not funding_measure.empty:
                funding_measure = funding_measure.squeeze()
                project_abs = find_abs(
                    sap_movement=funding_measure["sap_points"],
                    starting_sap=p["current_sap_points"],
                    floor_area=p["total_floor_area"]
                )
                property_abs.append({
                    "property_id": p["property_id"],
                    "measure": funding_measure["type"],
                    "cost": funding_measure["estimated_cost"],
                    "abs": project_abs,
                    "is_social": is_social
                })

        if not property_abs:
            return None

        property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False)
        property_abs = property_abs.head(1).to_dict(orient="records")[0]
        return property_abs

    social_tenure = ["rental (social)", "Rented (social)"]
    scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])]

    funding = []
    for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)):
        p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]]
        if p_recs.empty:
            continue

        if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69):
            f = identify_funding_measure(p, p_recs, True)
            if f:
                funding.append(f)
                continue

        if p["current_sap_points"] < 69:
            f = identify_funding_measure(p, p_recs, False)
            if f:
                funding.append(f)
                continue

    funding = pd.DataFrame(funding)
    conservative_abs = 20
    funding["expected_funding"] = funding["abs"] * conservative_abs
    # We take rows where the expected funding is higher than the cost of the works + 10%
    funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)]

    # From the owner of the properties, the funding that they see is just the cost of the works. The actual funding
    # recieved will go to the installer
    # We now look at the social funding
    social_funding = funding[funding["is_social"]]["cost"].sum()
    # For the private funding, we need to scale this to consider the fact that only a proportion of the properties
    # will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants
    # will meet the benefits criteria
    private_funding = funding[~funding["is_social"]]["cost"].sum()

    # 51% of households are recipients of benefits in the South East, in the UK
    # (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)

    # We also need to deduce the % of properties in council tax bands A - D
    # 2023 council tax bands:
    # https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties
    # -statistical-commentary
    band_a_proportion = 0.239
    band_b_proportion = 0.195
    band_c_proportion = 0.219
    band_d_proportion = 0.156
    a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion

    benefits_proportion = 0.51

    # Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits,
    # however we retain the regional average to be conservative
    # We scale the private funding based on these two factors
    private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion

    n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion)

    # Look at the impact of EWI for scenario

    ewi_jobs = recommendations_df[
        (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation")
        ]
    ewi_jobs["estimated_cost"].sum()

    has_cavity = recommendations_df[
        (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47)
        ]
    # Take the some properties in this
    cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)]

    cavity_units[cavity_units.index == 3][["uprn", "property_id"]]

    z = recommendations_df[recommendations_df["property_id"] == 24525]

    # Recommenation type by kwh savings per unit
    recommendations_final_scenario = recommendations_df[
        recommendations_df["Scenario ID"].isin([51]) &
        (recommendations_df["default"] == True)
        ].copy()
    # Merge on floor area
    recommendations_final_scenario = recommendations_final_scenario.merge(
        properties_df[["property_id", "total_floor_area"]], on="property_id", how="left"
    )
    recommendations_final_scenario = recommendations_final_scenario[
        ~pd.isnull(recommendations_final_scenario["total_floor_area"])]
    recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \
                                                             recommendations_final_scenario["total_floor_area"]

    recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace(
        {
            "room_roof_insulation": "roof_insulation",
            "flat_roof_insulation": "roof_insulation",
            "hot_water_tank_insulation": "other",
            "cylinder_thermostat": "other",
            "sealing_open_fireplace": "other",
            "suspended_floor_insulation": "floor_insulation",
            "solid_floor_insulation": "floor_insulation",
        }
    )

    aggs = recommendations_final_scenario.groupby("type_mapped")[
        ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values(
        "kwh_savings_per_unit", ascending=False
    )
    aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"]
    # Show more columns with pandas
    pd.set_option('display.max_columns', None)
    # Show more rows with pandas
    pd.set_option('display.max_rows', None)
    # Show more characters in a column
    pd.set_option('display.max_colwidth', None)


def lewes_outputs():
    """
    preparing of this data for the following 2 needs:
    1) dataset to share with Nextgen heating
    2) Breakdown of results by property type
    :return:
    """

    # get the asset list
    asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv")
    asset_list = pd.DataFrame(asset_list)
    # Get non-invasive recommendations
    non_intrusive_recommendations = read_csv_from_s3(
        bucket_name="retrofit-plan-inputs-dev",
        filepath="8/90/non_invasive_recommendations.csv"
    )
    non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations)

    # Right now this is the second version of the nehaven portfolio
    portfolio_id = 90
    # Look at one scenario at a time, otherwise this is agony
    scenario_ids = [47, 48, 49, 50, 51]
    properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
    properties_df = pd.DataFrame(properties_data)
    recommendations_df = pd.DataFrame(recommendations_data)

    # Unnest this
    import ast
    survey_recs = []
    for _, row in non_intrusive_recommendations.iterrows():
        recs = ast.literal_eval(row["recommendations"])
        ashp_rec = next((r for r in recs if r["type"] == "air_source_heat_pump"), None)
        solar_rec = next((r for r in recs if r["type"] == "solar_pv"), None)
        to_append = {
            "uprn": row["uprn"]
        }
        if ashp_rec["suitable"]:
            to_append = {
                **to_append,
                "ashp_suitable": True,
                "ashp_size_kw": ashp_rec["size"],
                "ashp_cost": ashp_rec["cost"],
            }

        if solar_rec["suitable"]:
            to_append = {
                **to_append,
                "solar_suitable": True,
                "solar_size_kwp": solar_rec["array_wattage"],
                "solar_cost": solar_rec["cost"],
            }
        survey_recs.append(to_append)
    survey_recs = pd.DataFrame(survey_recs)

    asset_list["uprn"] = asset_list["uprn"].astype(int)
    survey_recs["uprn"] = survey_recs["uprn"].astype(int)

    vital_kwh = 7597
    domna_kwh = 10850
    scaling_factor = vital_kwh / domna_kwh

    next_gen_dataset = properties_df[[
        "uprn", "address", "postcode",
        "property_type", "built_form", "current_energy_demand_heating_hotwater",
        "mainfuel", "total_floor_area", "floor_height"
    ]].rename(
        columns={
            "mainfuel": "primary_fuel_type",
            "total_floor_area": "gross_floor_area",
            "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh"
        }
    ).merge(
        asset_list[["uprn", "number_of_floors"]],
        how="left",
        on="uprn"
    ).merge(
        survey_recs,
        how="left",
        on="uprn"
    )
    next_gen_dataset["estimated_heating_hotwater_kwh_scaled"] = (
        next_gen_dataset["estimated_heating_hotwater_kwh"] * scaling_factor
    )

    next_gen_dataset["ashp_suitable"] = next_gen_dataset["ashp_suitable"].fillna(False)
    next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna(False)

    # We prepare the scenario outputs by property type
    grouped_data = next_gen_dataset.copy()
    grouped_data["property_sub_type"] = grouped_data["built_form"].copy()
    # If a property is a flat, re-map sub_type just to flat
    grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = "Flat"
    # Same for maisonettes
    grouped_data.loc[grouped_data["property_type"] == "Maisonette", "property_sub_type"] = "Maisonette"

    # We now pull out the recommendations impact by property type and sub type

    # Exclude sealing open fireplaces
    recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"]

    # We update the type column so that if type == heating, and the description contains "air source heat pump",
    # the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else
    # if the description contains "condensing boiler, the type is updated to "boiler_upgrade"
    recommendations_df["type"] = np.where(
        recommendations_df["type"] == "heating",
        np.where(
            recommendations_df["description"].str.contains("air source heat pump"),
            "Air Source Heat Pump",
            np.where(
                recommendations_df["description"].str.contains("high heat retention"),
                "High Heat Retention Storage",
                np.where(
                    recommendations_df["description"].str.contains("condensing boiler"),
                    "Boiler Upgrade",
                    recommendations_df["type"]
                )
            )
        ),
        recommendations_df["type"]
    )

    recommendation_types = recommendations_df["type"].unique().tolist()
    rename_dict = {
        'hot_water_tank_insulation': 'Hot Water Tank Insulation',
        'windows_glazing': 'Windows Glazing',
        'secondary_heating': 'Secondary Heating',
        'cavity_wall_insulation': 'Cavity Wall Insulation',
        'flat_roof_insulation': 'Flat Roof Insulation',
        'mechanical_ventilation': 'Mechanical Ventilation',
        'loft_insulation': 'Loft Insulation',
        'cylinder_thermostat': 'Cylinder Thermostat',
        'room_roof_insulation': 'Room Roof Insulation',
        'low_energy_lighting': 'Low Energy Lighting',
        'external_wall_insulation': 'External Wall Insulation',
        'solar_pv': 'Solar PV',
        'heating_control': 'Heating Control',
        'solid_floor_insulation': 'Solid Floor Insulation',
        'suspended_floor_insulation': 'Suspended Floor Insulation',
        'internal_wall_insulation': 'Internal Wall Insulation'
    }

    property_scenario_impact = []
    for scenario_id in tqdm(scenario_ids):
        # Get the recommendations for the scenario, default
        scenario_recommendations = recommendations_df[
            (recommendations_df["Scenario ID"] == scenario_id) &
            (recommendations_df["default"] == True)
            ].copy()

        scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply(
            lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
            axis=1)
        scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply(
            lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)

        # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
        scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply(
            lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
                'kwh_savings'], axis=1)

        scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({
            'Estimated Heating Demand kWh Savings': 'sum',
            'Estimated Lighting kWh Savings': 'sum',
            'Estimated Solar kWh Savings': 'sum',
            "estimated_cost": "sum"
        }).reset_index()

        comparison = properties_df.drop_duplicates()[
            ["uprn", "property_id", "current_energy_demand_heating_hotwater"]
        ].merge(
            scenario_grouped_data, on=["property_id"], how="left"
        )
        comparison["Estimated Heating Demand kWh Savings"] = (
            comparison["Estimated Heating Demand kWh Savings"].fillna(0)
        )
        comparison["Estimated Lighting kWh Savings"] = (
            comparison["Estimated Lighting kWh Savings"].fillna(0)
        )
        comparison["Estimated Solar kWh Savings"] = (
            comparison["Estimated Solar kWh Savings"].fillna(0)
        )
        comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0)

        comparison["post_scenario_heating_hotwater_kwh"] = (
            comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"]
        )

        # For each scenario, we create a measure matrix
        measure_matrix = scenario_recommendations.pivot_table(
            index='property_id',
            columns='type',
            values='id',  # Using 'id' just as a placeholder for the pivot
            aggfunc=lambda x: True,  # If an ID exists for a given type, mark as True
            fill_value=False  # Fill other entries as False
        ).reset_index()

        non_zero_heat_demand_impact = comparison[
            (comparison["Estimated Heating Demand kWh Savings"] > 0) |
            (comparison["Estimated Lighting kWh Savings"] > 0) |
            (comparison["Estimated Solar kWh Savings"] > 0)
            ]
        measure_matrix = measure_matrix[
            measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values)
        ]
        measure_matrix = measure_matrix.rename(columns=rename_dict)

        comparison = comparison.merge(
            measure_matrix, on="property_id", how="left"
        )
        comparison["scenario_id"] = scenario_id

        property_scenario_impact.append(comparison)

    property_scenario_impact = pd.concat(property_scenario_impact)
    # property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"])
    for v in list(rename_dict.values()) + ["Air Source Heat Pump", "High Heat Retention Storage", "Boiler Upgrade"]:
        # Fill NaNs with False
        property_scenario_impact[v] = property_scenario_impact[v].fillna(False)

    # Scale
    property_scenario_impact["post_scenario_heating_hotwater_kwh_scaled"] = (
        property_scenario_impact["post_scenario_heating_hotwater_kwh"] * scaling_factor
    )

    grouped_data = grouped_data.merge(
        property_scenario_impact, how="left", on="uprn"
    )

    # Agg the data
    grouped_data = grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]).agg({
        "estimated_heating_hotwater_kwh": "mean",
        "estimated_heating_hotwater_kwh_scaled": "mean",
        "estimated_cost": "mean",
        "post_scenario_heating_hotwater_kwh": "mean",
        "post_scenario_heating_hotwater_kwh_scaled": "mean"
    }).reset_index()

    scenario_names = pd.DataFrame(
        [
            {
                "scenario_id": 47,
                "scenario": "Demand Reduction – cavity & roof insulation",
            },
            {
                "scenario_id": 48,
                "scenario": "Demand reduction – no solid wall, floors or heating/renewables",
            },
            {
                "scenario_id": 49,
                "scenario": "Demand reduction – no decant"
            },
            {
                "scenario_id": 50,
                "scenario": "Demand reduction – no decant + heating & solar",
            },
            {
                "scenario_id": 51,
                "scenario": "Whole house retrofit"
            }
        ]

    )

    grouped_data = grouped_data.merge(
        scenario_names, how="left", on="scenario_id"
    )

    if not grouped_data[
        grouped_data["estimated_heating_hotwater_kwh"] < grouped_data["post_scenario_heating_hotwater_kwh"]].empty:
        raise Exception("someting went wrong")

    if not grouped_data[grouped_data["estimated_heating_hotwater_kwh_scaled"] < grouped_data[
        "post_scenario_heating_hotwater_kwh_scaled"]].empty:
        raise Exception("someting went wrong")

    # Reorder the columns
    grouped_data = grouped_data[
        [
            'property_type',
            'property_sub_type',
            'scenario',
            'estimated_heating_hotwater_kwh',
            'post_scenario_heating_hotwater_kwh',
            'estimated_heating_hotwater_kwh_scaled',
            'post_scenario_heating_hotwater_kwh_scaled',
            'estimated_cost',
        ]
    ]

    grouped_data = grouped_data.rename(
        columns={
            "property_type": "Property Type",
            "property_sub_type": "Property Sub Type",
            "scenario": "Scenario",
            "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
            "post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh",
            "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)",
            "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)",
            "estimated_cost": "Estimated Cost or Retrofit",
        }
    )

    # grouped_data.to_excel(
    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property "
    #     "Type.xlsx",
    #     index=False
    # )

    property_scenario_impact = property_scenario_impact.merge(
        scenario_names, how="left", on="scenario_id"
    )

    lewes_data = next_gen_dataset.merge(
        property_scenario_impact, how="left", on="uprn"
    )

    lewes_data = lewes_data.sort_values(
        ["postcode", "uprn", "scenario_id"], ascending=True
    )

    # Rearrange,  rename columns and drop what we don't need
    # TODO - remap the heating type
    lewes_data = lewes_data[
        [
            'uprn', 'address', 'postcode', 'property_type', 'built_form',
            # 'estimated_heating_hotwater_kwh',
            'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable',
            'ashp_size_kw',
            'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost',
            'scenario',
            'estimated_heating_hotwater_kwh_scaled',
            'post_scenario_heating_hotwater_kwh_scaled',
            # 'property_id',  - dropped
            # 'current_energy_demand_heating_hotwater',
            'Estimated Heating Demand kWh Savings',
            'Estimated Lighting kWh Savings',
            'Estimated Solar kWh Savings',
            'estimated_cost',
            'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat',
            'Flat Roof Insulation',
            'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation',
            # 'scenario_id', - dropped
            'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation',
            'Heating Control',
            'Solar PV',
            'Air Source Heat Pump', 'Boiler Upgrade', 'High Heat Retention Storage',
            'Internal Wall Insulation',
            'Solid Floor Insulation',
            'Suspended Floor Insulation',
        ]
    ].rename(
        columns={
            "primary_fuel_type": "Primary Fuel Type",
            "gross_floor_area": "Gross Floor Area",
            "floor_height": "Floor Height",
            "number_of_floors": "Number of Floors",
            "ashp_suitable": "Is an ASHP Suitable?",
            "ashp_size_kw": "ASHP Size (kW)",
            "ashp_cost": "ASHP Cost",
            "solar_suitable": "Is Solar PV Suitable?",
            "solar_size_kwp": "Solar PV Size (kWp)",
            "solar_cost": "Solar PV Cost",
            # "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
            "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh",
            "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh",
            "estimated_cost": "Estimated Cost of Scenario"
        }
    )

    # We save this dataset, which will be shared with Lewes Council
    lewes_data.to_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", index=False
    )

    df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario',
                                                    values=['post_scenario_heating_hotwater_kwh',
                                                            'post_scenario_heating_hotwater_kwh_scaled'])

    # Flattening multi-index columns
    df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns]

    # Reset the index to have a clean dataframe
    df_pivot.reset_index(inplace=True)

    next_gen_dataset = next_gen_dataset.merge(
        df_pivot, how="left", on="uprn"
    )

    next_gen_dataset.to_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False
    )