Model/etl/customers/mod/pilot/2. Create Excel Model.py

from pprint import pprint
import pandas as pd
import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import (
    Recommendation,
    PlanModel,
    PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel


def get_data(portfolio_id, scenario_ids):
    session = sessionmaker(bind=db_engine)()
    session.begin()

    # Get properties and their details for a specific portfolio
    properties_query = (
        session.query(PropertyModel, PropertyDetailsEpcModel)
        .join(
            PropertyDetailsEpcModel,
            PropertyModel.id == PropertyDetailsEpcModel.property_id,
        )
        .filter(PropertyModel.portfolio_id == portfolio_id)  # Filter by portfolio ID
        .all()
    )

    # Transform properties data to include all fields dynamically
    properties_data = [
        {
            **{
                col.name: getattr(prop.PropertyModel, col.name)
                for col in PropertyModel.__table__.columns
            },
            **{
                col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
                for col in PropertyDetailsEpcModel.__table__.columns
            },
        }
        for prop in properties_query
    ]

    # Get property IDs from fetched properties

    # Get plans linked to the fetched properties
    plans_query = (
        session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
    )

    # Transform plans data to include all fields dynamically
    plans_data = [
        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

    # Extract plan IDs for filtering recommendations through PlanRecommendations
    plan_ids = [plan["id"] for plan in plans_data]

    # Get recommendations through PlanRecommendations for those plans and that are default
    recommendations_query = (
        session.query(Recommendation, PlanModel.scenario_id)
        .join(
            PlanRecommendations,
            Recommendation.id == PlanRecommendations.recommendation_id,
        )
        .join(
            PlanModel,
            PlanModel.id
            == PlanRecommendations.plan_id,  # Join with Plan to access scenario_id
        )
        .filter(
            PlanRecommendations.plan_id.in_(plan_ids),
            Recommendation.default == True,  # Filtering for default recommendations
        )
        .all()
    )

    # Transform recommendations data to include all fields dynamically and include scenario_id
    recommendations_data = [
        {
            **{
                col.name: (
                    getattr(rec.Recommendation, col.name)
                    if hasattr(rec, "Recommendation")
                    else getattr(rec, col.name)
                )
                for col in Recommendation.__table__.columns
            },
            "Scenario ID": rec.scenario_id,
        }
        for rec in recommendations_query
    ]

    session.close()

    return properties_data, plans_data, recommendations_data


def app():
    """
    Given a portfolio and a scenario, this function prepares an excel model to present the data
    """

    # Set the inputs:
    portfolio_id = 139
    scenario_ids = [237, 238]

    properties_data, plans_data, recommendations_data = get_data(
        portfolio_id=portfolio_id, scenario_ids=scenario_ids
    )

    properties_df = pd.DataFrame(properties_data)
    plans_df = pd.DataFrame(plans_data)
    recommendations_df = pd.DataFrame(recommendations_data)

    # Merge on the orignal data
    mod_property_data = pd.read_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD property data.csv"
    )

    property_asset_data = properties_df.merge(
        mod_property_data.drop(columns=["address", "postcode", "tenure"]),
        how="left",
        on="uprn",
    )

    property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains(
        "pitched", case=False
    )
    property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
    property_asset_data["wall_type"] = (
        property_asset_data["walls"].str.split(" ").str[0].str.strip()
    )
    property_asset_data["is_insulated"] = property_asset_data["walls"].str.split(
        ","
    ).str[1].str.strip().isin(
        [
            "filled cavity",
            "with external insulation",
            "filled cavity and external insulation",
        ]
    ) | property_asset_data[
        "walls"
    ].str.split(
        ","
    ).str[
        2
    ].str.strip().isin(
        ["insulated"]
    )
    property_asset_data["is_insulated"] = np.where(
        property_asset_data["is_insulated"], "Insulated", "Uninsulated"
    )
    property_asset_data["is_pitched"] = np.where(
        property_asset_data["is_pitched"], "Pitched roof", "Not Pitched Roof"
    )
    property_asset_data["pre_1970"] = np.where(
        property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
    )

    archetype_variables = [
        "property_type",
        "wall_type",
        "is_insulated",
        "is_pitched",
        "pre_1970",
    ]

    assigned_archetypes = (
        property_asset_data.groupby(archetype_variables)
        .size()
        .reset_index()
        .rename(columns={0: "n_properties"})
        .sort_values("n_properties", ascending=False)
    )

    # Make the archetype ID a concatenation of the variables
    assigned_archetypes["archetype_id"] = assigned_archetypes[
        archetype_variables
    ].apply(lambda x: "_".join(x.astype(str)), axis=1)

    # Most prominent archetypes
    prominent_archetypes = assigned_archetypes.head(6)
    other_archetypes = assigned_archetypes.tail(-6)
    # 2 or fewer properties in the other archetypes

    property_asset_data = property_asset_data.merge(
        assigned_archetypes[archetype_variables + ["archetype_id"]],
        how="left",
        on=archetype_variables,
    )

    # Create age bands:
    # 1960-1969
    # 1970-1979
    # 1980-1989
    # 1990-1999
    # 2000+
    property_asset_data["age_band"] = pd.cut(
        property_asset_data["BUILD_YEAR"],
        bins=[1959, 1969, 1979, 1989, 1999, 2022],
        labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"],
    )

    # Create floor area bands
    # 0-73
    # 74-97
    # 98-199
    # 200+
    property_asset_data["floor_area_band"] = pd.cut(
        property_asset_data["total_floor_area"],
        bins=[0, 73, 97, 199, 10000],
        labels=["0-73", "74-97", "98-199", "200+"],
    )

    property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
    property_asset_data["archetype_group"] = np.where(
        property_asset_data["archetype_id"].isin(
            other_archetypes["archetype_id"].values
        ),
        "other",
        property_asset_data["archetype_group"],
    )

    # For colour
    wall_types = (
        property_asset_data[["wall_type"]]
        .value_counts()
        .to_frame()
        .reset_index()
        .rename(columns={"wall_type": "Wall Type"})
    )
    # Group into age bands
    ages = (
        property_asset_data[["age_band"]]
        .value_counts()
        .to_frame()
        .reset_index()
        .sort_values("age_band", ascending=True)
        .rename(columns={"age_band": "Age Band"})
    )
    floor_area_bands = (
        property_asset_data[["floor_area_band"]]
        .value_counts()
        .to_frame()
        .reset_index()
        .sort_values("floor_area_band", ascending=True)
        .rename(columns={"floor_area_band": "Floor Area Band"})
    )
    archetype_counts = (
        property_asset_data[["archetype_group"]]
        .value_counts()
        .to_frame()
        .reset_index()
        .rename(columns={"archetype_group": "Archetype"})
    )
    property_types = (
        (
            property_asset_data["property_type"]
            + ": "
            + property_asset_data["built_form"]
        )
        .value_counts()
        .to_frame()
        .reset_index()
        .rename(columns={"index": "Property Type", 0: "Count"})
    )

    # epc breakdown
    epc_breakdown = (
        property_asset_data["current_epc_rating"]
        .apply(lambda x: x.value)
        .value_counts()
        .to_frame()
        .reset_index()
    )

    # Figures for the deck
    # Carbon per property
    totals = property_asset_data[
        [
            "Total_household_members",
            "co2_emissions",
            "current_energy_demand",
            "current_energy_demand_heating_hotwater",
            "heating_cost_current",
            "hot_water_cost_current",
            "lighting_cost_current",
            "appliances_cost_current",
            "gas_standing_charge",
            "electricity_standing_charge",
        ]
    ].copy()
    totals["total_cost"] = (
        totals["heating_cost_current"]
        + totals["hot_water_cost_current"]
        + totals["lighting_cost_current"]
        + totals["appliances_cost_current"]
        + totals["gas_standing_charge"]
        + totals["electricity_standing_charge"]
    )
    print(
        totals[
            [
                "Total_household_members",
                "co2_emissions",
                "current_energy_demand",
                "total_cost",
            ]
        ].mean()
    )

    # Store these to an excel
    # with pd.ExcelWriter(
    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD archetype breakdowns.xlsx"
    # ) as writer:
    #     wall_types.to_excel(writer, sheet_name="Wall Types", index=False)
    #     ages.to_excel(writer, sheet_name="Ages", index=False)
    #     floor_area_bands.to_excel(writer, sheet_name="Floor Area Bands", index=False)
    #     archetype_counts.to_excel(writer, sheet_name="Archetype Counts", index=False)
    #     epc_breakdown.to_excel(writer, sheet_name="EPC Rating", index=False)

    contingency = 0.26

    # We prepare the outputs, by scenario
    scenario_data = {}
    for scenario in scenario_ids:

        scenario_recommendations_df = recommendations_df[
            recommendations_df["Scenario ID"] == scenario
        ].copy()

        scenario_recommendations_df["contingency"] = (
            contingency * scenario_recommendations_df["estimated_cost"]
        )
        scenario_recommendations_df["total_cost"] = (
            scenario_recommendations_df["estimated_cost"]
            + scenario_recommendations_df["contingency"]
        )

        recommended_measures_df = scenario_recommendations_df[
            ["property_id", "measure_type", "estimated_cost", "default"]
        ]

        recommended_measures_df = recommended_measures_df[
            recommended_measures_df["default"]
        ]
        recommended_measures_df = recommended_measures_df.drop(columns=["default"])

        # Metrics by property ID
        aggregated_metrics = scenario_recommendations_df[
            [
                "property_id",
                "type",
                "default",
                "sap_points",
                "energy_cost_savings",
                "kwh_savings",
                "co2_equivalent_savings",
                "estimated_cost",
                "contingency",
                "total_cost",
            ]
        ]
        aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
        aggregated_metrics = (
            aggregated_metrics.groupby("property_id")[
                [
                    "sap_points",
                    "co2_equivalent_savings",
                    "energy_cost_savings",
                    "kwh_savings",
                    "estimated_cost",
                    "total_cost",
                    "contingency",
                ]
            ]
            .sum()
            .reset_index()
        )

        recommendations_measures_pivot = recommended_measures_df.pivot(
            index="property_id", columns="measure_type", values="estimated_cost"
        )
        recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
        recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)

        # We flag with boolean if the measure is recommended
        for c in recommendations_measures_pivot.columns:
            if c == "property_id":
                continue
            recommendations_measures_pivot["Recommendation: " + c] = (
                recommendations_measures_pivot[c] > 0
            )

        # We now create a final output
        df = (
            properties_df[
                [
                    "property_id",
                    "uprn",
                    "address",
                    "postcode",
                    "property_type",
                    "walls",
                    "roof",
                    "heating",
                    "windows",
                    "current_epc_rating",
                    "current_sap_points",
                    "total_floor_area",
                    "number_of_rooms",
                    "co2_emissions",
                    "current_energy_demand",
                    "current_energy_demand_heating_hotwater",
                    "heating_cost_current",
                    "hot_water_cost_current",
                    "lighting_cost_current",
                    "appliances_cost_current",
                    "gas_standing_charge",
                    "electricity_standing_charge",
                ]
            ]
            .merge(recommendations_measures_pivot, how="left", on="property_id")
            .merge(aggregated_metrics, how="left", on="property_id")
        )

        df["bills_total_cost"] = (
            df["heating_cost_current"]
            + df["hot_water_cost_current"]
            + df["lighting_cost_current"]
            + df["appliances_cost_current"]
            + df["gas_standing_charge"]
            + df["electricity_standing_charge"]
        )

        df = df.drop(columns=["property_id"])
        for c in [
            "sap_points",
            "co2_equivalent_savings",
            "energy_cost_savings",
            "kwh_savings",
        ]:
            df[c] = df[c].fillna(0)

        df = df.rename(
            columns={
                "uprn": "UPRN",
                "address": "Address",
                "postcode": "Postcode",
                "walls": "Walls",
                "roof": "Roof",
                "heating": "Heating",
                "windows": "Windows",
                "current_epc_rating": "Current EPC Rating",
                "current_sap_points": "Current SAP Points",
                "total_floor_area": "Total Floor Area",
                "number_of_rooms": "Number of Habitable Rooms",
                "floor_height": "Floor Height",
            }
        )

        # Calculate post SAP
        df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
        df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
            lambda x: sap_to_epc(x)
        )

        # Calculate the relative savings on carbon, kwh, and bills
        df["relative_carbon_savings"] = (
            df["co2_equivalent_savings"] / df["co2_emissions"]
        )
        df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
        df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]

        # Add on the archetype
        df = df.merge(
            property_asset_data[["uprn", "archetype_group"]],
            how="left",
            left_on="UPRN",
            right_on="uprn",
        )

        # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
        # the bills go up recommending HHRSH, so it doesn't make it to EPC B
        # For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump?
        # DO it manually???

        # Doesn't make it
        # misses = df[df["Predicted Post Works EPC"] == "C"]
        # # 5 of them are flats and so are difficult to get to EPC B without renewables. Possibly not worth it from an
        # # ROI perspective
        #
        # misses[["UPRN", "Address", "Postcode", "property_type"]]

        #              UPRN                               Address  Postcode property_type
        # 2    100120988937              13 Sidbury Circular Road   SP9 7HX          Flat  No further action
        # 3    100120988998              74 Sidbury Circular Road   SP9 7JA          Flat  No further action
        # 4    100120989416                       47 Zouch Avenue   SP9 7LR          Flat  No further action
        # 6    100060585002  42, Muscott Close, Shipton Bellinger   SP9 7TX         House  Can probably take a heat pump
        # 37    10000801072        34 Luffenham Place, Chicksands  SG17 5XH         House  Already surveyed as having
        # an ASHP - should be looked at
        # 121  100120988259                      8, Karachi Close   SP9 7LW          Flat
        # 122  100121101217                     599, Pepper Place  BA12 0DW          Flat
        # 140  100021455241         33 Blenheim Crescent, Ruislip   HA4 7HA         House - Solar isnt recommended
        # due to bug
        # 149  100120915656            10 Bower Green, Shrivenham   SN6 8TU         House - Solar isn't recommended
        # due to bug

        scenario_data[scenario] = df

    printing_scenario_id = scenario_ids[0]
    # EPC breakdown
    print(
        scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts()
    )
    # Cost
    # Total cost
    print(scenario_data[printing_scenario_id]["total_cost"].sum())
    # Base cost
    print(scenario_data[printing_scenario_id]["estimated_cost"].sum())
    # Contingency
    print(scenario_data[printing_scenario_id]["contingency"].sum())
    # Costs averaged per unit
    print(scenario_data[printing_scenario_id]["total_cost"].mean())
    print(scenario_data[printing_scenario_id]["estimated_cost"].mean())
    print(scenario_data[printing_scenario_id]["contingency"].mean())

    # Average relative savings
    print(scenario_data[printing_scenario_id]["relative_carbon_savings"].mean())
    print(scenario_data[printing_scenario_id]["relative_kwh_savings"].mean())
    print(scenario_data[printing_scenario_id]["relative_bill_savings"].mean())

    measure_details = {}
    for scenario in scenario_ids:
        measure_details[scenario] = {}
        recommendation_cols = [
            c for c in scenario_data[scenario].columns if "Recommendation:" in c
        ]
        measure_details[scenario]["count"] = (
            scenario_data[scenario][recommendation_cols].sum().to_dict()
        )
        # Get average cost per measure
        measure_columns = [
            c.split("Recommendation: ")[1]
            for c in scenario_data[scenario].columns
            if "Recommendation:" in c
        ]
        # Take the mean, drop zero columns
        measure_costs = {}
        for m in measure_columns:
            measure_costs[m] = float(
                scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()
            )
        measure_details[scenario]["cost_per_measure"] = measure_costs

    pprint(measure_details[scenario_ids[0]]["count"])
    pprint(measure_details[scenario_ids[1]]["count"])

    # Cost per measures
    pprint(measure_details[scenario_ids[0]]["cost_per_measure"])
    pprint(measure_details[scenario_ids[1]]["cost_per_measure"])

    # Do not get to EPC B:
    # 5 are flats
    # 1) 34 Luffenham Place, Chicksands SG17 5XH, has been surveyed as having a low performing heat pump -
    # should be looked at but several surrounding properties have been surveyed in a similar fashion
    # 2) 42, Muscott Close, Shipton Bellinger SP9 7TX, has an oil boiler and the bills go up recommending HHRSH.
    #    we could non-intrusively recommend a heat pump.
    # 3) 33 Blenheim Crescent, Ruislip, HA4 7HA, 100021455241 Solar potential modelling returned nothing -
    # manual review indicates that there are multiple trees surrouding the south facing side of the property
    # 4) 10 Bower Green, Shrivenham, SN6 8TU - Solar isn't recommended without further survey due to the local
    # area being surrounded by trees

    # Scenario adjustments:
    # Exclude: boiler_upgrade
    # Make ASHP COP 3.5

    # Metrics we need by scenario:
    # Cost
    # contingency
    # Carbon
    # kwh
    # bill savings
    scenario_metrics = {}
    for scenario in scenario_ids:
        df = scenario_data[scenario].copy()

        avg_savings = (
            df[
                [
                    "sap_points",
                    "co2_equivalent_savings",
                    "energy_cost_savings",
                    "kwh_savings",
                    "estimated_cost",
                    "total_cost",
                    "contingency",
                ]
            ]
            .mean()
            .to_dict()
        )
        avg_savings["cost_per_sap_point"] = (
            avg_savings["total_cost"] / avg_savings["sap_points"]
        )
        avg_savings["cost_per_carbon"] = (
            avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
        )
        scenario_metrics[scenario] = avg_savings

    pprint(scenario_metrics[scenario_ids[0]])
    pprint(scenario_metrics[scenario_ids[1]])

    scenario_data[scenario_ids[0]]["loft_insulation"][
        scenario_data[scenario_ids[0]]["loft_insulation"] > 0
    ].mean()

    scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
        scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
    ].mean()

    # Testing checking floor risk

    import requests

    def get_flood_risk(lat, lon, radius_km=1):
        url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
        params = {"lat": lat, "long": lon, "dist": radius_km}  # search radius in km

        response = requests.get(url, params=params)
        response.raise_for_status()
        data = response.json()

        flood_warnings = data.get("items", [])

        if not flood_warnings:
            print("No active flood warnings near this location.")
        else:
            print(f"{len(flood_warnings)} warning(s) found near the location:")
            for warning in flood_warnings:
                print(f"- Area: {warning.get('description')}")
                print(
                    f"  Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})"
                )
                print(f"  Message changed at: {warning.get('timeMessageChanged')}")
                print()

        return flood_warnings

    from shapely.geometry import shape, Point

    def get_flood_areas_near_point(lat, lon, radius_km=2):
        url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
        params = {"lat": lat, "long": lon, "dist": radius_km}

        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json().get("items", [])

    def point_in_flood_area(lat, lon):
        flood_areas = get_flood_areas_near_point(lat, lon, radius_km=1)
        point = Point(lon, lat)  # GeoJSON uses (lon, lat) format

        for area in flood_areas:
            polygon_url = area.get("polygon")
            if not polygon_url:
                continue

            polygon_response = requests.get(polygon_url)
            polygon_response.raise_for_status()
            polygon_geojson = polygon_response.json()

            features = polygon_geojson.get("features", [])
            if not features:
                continue

            flood_polygon = shape(features[0]["geometry"])

            try:
                is_inside = flood_polygon.contains(point)
            except:
                is_inside = False

            if is_inside:
                print(
                    f"📍 Point is inside flood area: {area['label']} ({area['notation']})"
                )
                return area

    from tqdm import tqdm

    floor_warnings_data = []
    for _, property in tqdm(
        property_asset_data.iterrows(), total=len(property_asset_data)
    ):
        # warnings = floor_warnings_data.extend(
        #     get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
        # )

        resp = point_in_flood_area(lat=property["LATITUDE"], lon=property["LONGITUDE"])
        if resp:
            floor_warnings_data.append(
                {
                    "uprn": property["uprn"],
                    "address": property["address"],
                    "postcode": property["postcode"],
                    "area": resp,
                }
            )
            continue

    import plotly.graph_objects as go

    labels = [
        "House_Cavity_Insulated_Pitched roof_Pre 1970",
        "House_Cavity_Insulated_Pitched roof_Post 1970",
        "House_Cavity_Uninsulated_Pitched roof_Pre 1970",
        "House_Cavity_Uninsulated_Pitched roof_Post 1970",
        "other",
        "House_System_Uninsulated_Pitched roof_Pre 1970",
        "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970",
    ]

    values = [62, 36, 21, 16, 16, 4, 2]

    hovertext = [
        "Loft insulation, draft proofing",
        "Top-up loft insulation",
        "Cavity wall insulation, loft insulation",
        "Cavity wall insulation, ventilation",
        "Bespoke retrofit measures",
        "External wall insulation, roof insulation",
        "Flat roof insulation, internal wall insulation",
    ]

    fig = go.Figure(
        go.Treemap(
            labels=labels,
            parents=[""] * len(labels),  # No root
            values=values,
            hovertext=hovertext,
            hoverinfo="text",
            textinfo="none",
            marker=dict(
                line=dict(color="white", width=4), colors=values, colorscale="Blues"
            ),
        )
    )

    fig.update_layout(
        margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white"
    )

    fig.show()

    # Get the recommended measures by scenario id
    recommendation_cols = [
        c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c
    ]
    measure_counts_by_scenario = (
        scenario_data[scenario_ids[1]]
        .groupby("archetype_group")[recommendation_cols]
        .sum()
        .reset_index()
    )

    measure_counts_by_scenario.to_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
    )

    # Estimate average valuation improvment by scenarios
    valuation_data = pd.read_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/property_valuation.csv"
    )

    from backend.ml_models.Valuation import PropertyValuation

    uplift = []
    for _, x in valuation_data.iterrows():
        uprn = x["uprn"]

        to_append = {"uprn": uprn}
        for _id in scenario_ids:
            scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze()

            val = PropertyValuation.estimate_valuation_improvement(
                current_value=x["valuation"],
                current_epc=scenario["Current EPC Rating"].value,
                target_epc=scenario["Predicted Post Works EPC"],
                total_cost=None,
            )

            to_append[_id] = val["average_increase"]

        uplift.append(to_append)

    uplift = pd.DataFrame(uplift)
    print(uplift[scenario_ids[0]].mean())
    # £8,161
    print(uplift[scenario_ids[1]].mean())
    # £16,938