implementing new sequenced scoring approach for recommendations

2026-07-27 23:35:01 +00:00 · 2024-02-15 19:32:33 +00:00 · 2024-02-15 19:32:33 +00:00 · 82d19fc6fc
commit 82d19fc6fc
parent b39e9c9899
7 changed files with 94 additions and 45 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
  <component name="PythonCompatibilityInspectionAdvertiser">
    <option name="version" value="3" />
  </component>
--- a/backend/Property.py
+++ b/backend/Property.py
@ -1,20 +1,15 @@
-from datetime import datetime
-import re
 import os
-
-import numpy as np
+from itertools import groupby
 import pandas as pd

-from etl.epc.DataProcessor import EPCDataProcessor
 from etl.epc.Dataset import TrainingDataset
-from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, \
-    BUILT_FORM_REMAP
+from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
 from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet
 from etl.epc.settings import DATA_ANOMALY_MATCHES
-from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
+from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
 from recommendations.recommendation_utils import (
    estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
 )
@ -172,11 +167,28 @@ class Property:
            previous_phase_representatives = [
                r for r in property_representative_recommendations if r["phase"] in previous_phases
            ]
+            # For solid wall insulation, we will actually have 2 representative recommendations, since we consider
+            # both internal and external wall insulation as possible measures. We will use the representative that
+            # has the lowest efficiency.
+            # Take the representative with the lowest efficiency, by phase
+
+            # To be safe, we sort by phase
+            previous_phase_representatives = sorted(previous_phase_representatives, key=lambda x: x['phase'])
+
+            previous_phase_representatives = [
+                min(group, key=lambda x: x['efficiency']) for _, group in groupby(
+                    previous_phase_representatives, key=lambda x: x['phase']
+                )
+            ]
+
            recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy()

            for rec in property_recommendations_by_phase:
                # We simulate the impact of the recommendation at this current phase, and all of the prior phases

+                if rec["type"] == "mechanical_ventilation":
+                    continue
+
                scoring_dict = self.create_recommendation_scoring_data(
                    property_id=self.id,
                    recommendation_record=recommendation_record,
@ -230,9 +242,10 @@ class Property:
                    output["external_insulation"] = False
                    output["internal_insulation"] = True

+                # TODO: perhaps detrimental
                # When making a recommendation for the wall, we will also update the ventilation
-                if output["mechanical_ventilation_ending"] == 'natural':
-                    output["mechanical_ventilation_ending"] = 'mechanical, extract only'
+                # if output["mechanical_ventilation_ending"] == 'natural':
+                #     output["mechanical_ventilation_ending"] = 'mechanical, extract only'

            else:
                if output["walls_thermal_transmittance_ending"] is None:
@ -328,7 +341,7 @@ class Property:
                output["photo_supply_ending"] = recommendation["photo_supply"]

            if recommendation["type"] not in [
-                "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
+                "sealing_open_fireplace", "low_energy_lighting",
                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@ -130,8 +130,14 @@ class ModelApi:
                )
            )

-            predictions_df["predictions"] = predictions_df["predictions"].astype(float).round(1)
+            predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
            predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
+            # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
+            # string split on phase= and then grab the second element of the resulting list. We could also use a
+            # regular expression to do this but we use the string split method here, for safety.
+            predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0]
+            # Convert back to int
+            predictions_df['phase'] = predictions_df['phase'].astype(int)

            predictions[model_prefix] = predictions_df

--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@ -4,6 +4,9 @@ from recommendations.Costs import Costs


 class LightingRecommendations:
+    # We introduce a SAP limit to lighting, which is based on empirical findings. We do see cases where lighting is
+    # worth more than 2 points, but this is unlikely in the context of other upgrades that can be made to the property
+    SAP_LIMIT = 2

    def __init__(self, property_instance: Property, materials: List):
        """
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -1,5 +1,6 @@
 from backend.Property import Property
 from typing import List
+from itertools import groupby
 from recommendations.FloorRecommendations import FloorRecommendations
 from recommendations.WallRecommendations import WallRecommendations
 from recommendations.RoofRecommendations import RoofRecommendations
@ -134,23 +135,32 @@ class Recommendations:
            has_u_value = recommendations_by_type[0].get("new_u_value") is not None
            has_sap_points = recommendations_by_type[0].get("sap_points") is not None

-            if has_u_value:
-                # We sort by the cost per U-value improvement - the lower the better
-                recommendations_by_type.sort(
-                    key=lambda x: x["total"] / x["starting_u_value"] - x["new_u_value"]
-                )
-            elif not has_u_value and has_sap_points:
-                # Sort the options by the cost per SAP point improvement - the lower the better
-                recommendations_by_type.sort(
-                    key=lambda x: x["total"] / x["sap_points"]
-                )
-            else:
-                # Sort the options by cost - the lower the better
-                recommendations_by_type.sort(
-                    key=lambda x: x["total"]
-                )
+            # When check if these recommendations have two different types, such as solid wall insulation
+            # If we have multiple types, we group by type and then select the best recommendation for each type

-            property_representative_recommendations.append(recommendations_by_type[0])
+            recommendations_by_type = sorted(recommendations_by_type, key=lambda x: x["type"])
+            representative_recommendations = []
+            for type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):
+                recommendations = list(recommendations)
+                # We also create an efficiency key, which is used to sort the recommendations
+                if has_u_value:
+                    # We sort by the cost per U-value improvement - the lower the better
+                    for rec in recommendations:
+                        rec["efficiency"] = rec["total"] / rec["starting_u_value"] - rec["new_u_value"]
+                elif not has_u_value and has_sap_points:
+                    # Sort the options by the cost per SAP point improvement - the lower the better
+                    for rec in recommendations:
+                        rec["efficiency"] = rec["total"] / rec["sap_points"]
+                else:
+                    # Sort the options by cost - the lower the better
+                    for rec in recommendations:
+                        rec["efficiency"] = rec["total"]
+
+                recommendations.sort(
+                    key=lambda x: x["efficiency"]
+                )
+                representative_recommendations.append(recommendations[0])
+            property_representative_recommendations.extend(representative_recommendations)

        return property_representative_recommendations

@ -168,7 +178,7 @@ class Recommendations:

        for recs in property_recommendations:
            for rec in recs:
-                rec["recommendation_id"] = idx
+                rec["recommendation_id"] = f"{str(idx)}_phase={str(rec['phase'])}"
                idx += 1

        return property_recommendations
@ -198,11 +208,16 @@ class Recommendations:

        property_recommendations = recommendations[property_instance.id].copy()

+        # We calculate the impact by phase
+        sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index()
+        heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index()
+        carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index()
+
        for recommendations_by_type in property_recommendations:
            for rec in recommendations_by_type:

-                # We don't use the model for low energy lighting at the moment
-                if rec["type"] == "low_energy_lighting":
+                if rec["type"] == "mechanical_ventilation":
+                    # We don't have a percieved sap impact of mechanical ventilation
                    continue

                new_heat_demand = property_heat_predictions[property_heat_predictions["recommendation_id"] == str(
@ -216,22 +231,37 @@ class Recommendations:
                new_sap = property_sap_predictions[property_sap_predictions["recommendation_id"] == str(
                    rec["recommendation_id"]
                )]["predictions"].values[0]
-                rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"])

-                if rec["type"] == "mechanical_ventilation":
+                if rec["phase"] == 0:
+                    rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"])
+                    rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
+                    rec["heat_demand"] = property_instance.floor_area * (
+                        float(property_instance.data["energy-consumption-current"]) - new_heat_demand
+                    )
+                else:
+
+                    previous_phase = rec["phase"] - 1
+                    rec["sap_points"] = (
+                        new_sap - sap_phase_impact[sap_phase_impact["phase"] == previous_phase]["predictions"].values[0]
+                    )
+                    rec["co2_equivalent_savings"] = (
+                        carbon_phase_impact[carbon_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
+                        new_carbon
+                    )
+                    rec["heat_demand"] = property_instance.floor_area * (
+                        heat_phase_impact[heat_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
+                        new_heat_demand
+                    )
+
+                if rec["type"] == "low_energy_lighting":
                    # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
-                    rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)
+                    rec["sap_points"] = min(rec["sap_points"], LightingRecommendations.SAP_LIMIT)

                # Round to 2 decimal places
                rec["sap_points"] = round(rec["sap_points"], 2)
-                rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon

                # Energy consumption current is per meter squared, so we need to multiply by the floor area to get
                # an absolute figure for the home
-                rec["heat_demand"] = (
-                    (float(property_instance.data["energy-consumption-current"]) - new_heat_demand
-                     ) * property_instance.floor_area)
-
                rec["energy_cost_savings"] = AnnualBillSavings.estimate(rec["heat_demand"])

                if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@ -15,9 +15,6 @@ class VentilationRecommendations(Definitions):
        'mechanical, supply and extract'
    ]

-    # We introduce a SAP limit, to prevent over-predicting the SAP impact of mechanical ventilation
-    SAP_LIMIT = 2
-
    def __init__(
        self,
        property_instance: Property,
@ -68,7 +65,7 @@ class VentilationRecommendations(Definitions):
                "description": f"Install {n_units} {part[0]['description']} units",
                "starting_u_value": None,
                "new_u_value": None,
-                "sap_points": None,
+                "sap_points": 0,
                "total": estimated_cost,
                # We use a very simple and rough estimate of 4 hours per unit
                "labour_hours": 4 * n_units,