From 292da782a076d3da1e90d9ff4c081f50a64524e5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 16 Mar 2025 18:47:41 +0000
Subject: [PATCH] changing simulation methodology to use corrected floor area

---
 backend/Property.py                       | 34 +++++++------
 backend/app/db/models/materials.py        |  1 +
 backend/app/plan/router.py                | 35 +++++++++-----
 backend/ml_models/api.py                  | 37 +++++++++++---
 etl/costs/app.py                          |  6 ++-
 recommendations/Costs.py                  | 10 ++--
 recommendations/Recommendations.py        |  1 +
 recommendations/RoofRecommendations.py    | 31 ++++++------
 recommendations/SecondaryHeating.py       | 18 +------
 recommendations/SolarPvRecommendations.py | 59 +++--------------------
 10 files changed, 105 insertions(+), 127 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index b9c88bc2..e6e43efe 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -70,6 +70,10 @@ class Property:
     # Contains the solar panel optimisation results from the Google Solar API
     solar_panel_configuration = None
 
+    # If true, indicates the floor area has actually been given to us by the owner, and we should use this figure
+    # instead of the one in the EPC, when we simulate
+    owner_floor_area = False
+
     def __init__(
         self,
         id,
@@ -241,6 +245,10 @@ class Property:
         insulation_wall_area = kwargs.get("insulation_wall_area", None)
         insulation_wall_area = float(insulation_wall_area) if insulation_wall_area not in [None, ""] else None
 
+        # We allow for the asset owner to provide us with total floor area, in the event of it being incorrect
+        floor_area = kwargs.get("floor_area", None)
+        floor_area = float(floor_area) if floor_area not in [None, ""] else None
+
         return {
             "n_bathrooms": n_bathrooms,
             "n_bedrooms": n_bedrooms,
@@ -248,12 +256,15 @@ class Property:
             "insulation_floor_area": insulation_floor_area,
             "insulation_wall_area": insulation_wall_area,
             "building_id": kwargs.get("building_id", None),
+            "floor_area": floor_area
         }
 
     def parse_kwargs(self, kwargs):
         # We extract the elements from kwargs that we recognise. Anything additional is ignored
         for arg, val in kwargs.items():
             if val is not None:
+                if arg == "floor_area":
+                    self.owner_floor_area = True
                 setattr(self, arg, val)
 
     def create_base_difference_epc_record(self, cleaned_lookup: dict):
@@ -263,14 +274,7 @@ class Property:
         It will be the same starting and ending EPC, as we don't have the expected EPC yet
         """
 
-        # difference_record = self.epc_record - self.epc_record
-
-        # TODO: change these lower and replace in the settings file
-        # print(
-        #     "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
-        # )
         fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD
-        # print("NEED TO CHANGE THE DASH TO LOWER CASE")
         fixed_data_col_names = [
             x.lower().replace("_", "-") for x in fixed_data_col_names
         ]
@@ -281,8 +285,6 @@ class Property:
             if k in fixed_data_col_names
         }
 
-        # difference_record.append_fixed_data(fixed_data)
-
         difference_record = self.epc_record.create_EPCDifferenceRecord(
             self.epc_record, fixed_data
         )
@@ -291,10 +293,11 @@ class Property:
             datasets=[difference_record], cleaned_lookup=cleaned_lookup
         )
 
-        # TODO: adjust the base difference record with the previously calculated u values + features
-        # estimated_perimeter is different to the perimeter in the epc record
-
-        # self.base_difference_record.df
+        # If we have variables that have been given to us by the landlord that we know are correct, whereas the EPC
+        # may not be, we use them
+        if self.owner_floor_area is not None:
+            self.base_difference_record.df["total_floor_area_ending"] = self.floor_area
+            self.base_difference_record.df["estimated_perimeter_ending"] = self.perimeter
 
     def simulate_all_representative_recommendations(
         self, property_representative_recommendations,
@@ -1254,7 +1257,10 @@ class Property:
         # If the property is in a conservation area, is listed or is a heriage building, solar panels
         # become a difficult measure to generally get through planning restrictions and so we do not recommend
         # solar panels
-        if self.restricted_measures:
+        if self.is_listed or self.is_heritage:
+            # If the property is in a conservation area, we can still recommend solar panels
+            # but they need to be done in a way that is sympathetic to the building. E.g. the panels
+            # may be installed such that they are not visible from the street
             return False
 
         is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"]
diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py
index f0af3343..9f8abbf4 100644
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@@ -19,6 +19,7 @@ class MaterialType(enum.Enum):
     flat_roof_insulation = "flat_roof_insulation"
     room_roof_insulation = "room_roof_insulation"
     windows_glazing = "windows_glazing"
+    cavity_wall_extraction = "cavity_wall_extraction"
 
     iwi_wall_demolition = "iwi_wall_demolition"
     iwi_vapour_barrier = "iwi_vapour_barrier"
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4f2b578e..b6b576b3 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -445,16 +445,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
 
-        # Set up model api and warm up the lambdas
-        model_api = ModelApi(
-            portfolio_id=body.portfolio_id,
-            timestamp=created_at,
-            prediction_buckets=get_prediction_buckets()
-        )
-        await model_api.async_warm_up_lambdas(
-            model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
-        )
-
         input_properties = []
         for config in tqdm(plan_input):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
@@ -549,6 +539,17 @@ async def trigger_plan(body: PlanTriggerRequest):
         if not input_properties:
             return Response(status_code=204)
 
+        # Set up model api and warm up the lambdas
+        model_api = ModelApi(
+            portfolio_id=body.portfolio_id,
+            timestamp=created_at,
+            prediction_buckets=get_prediction_buckets(),
+            max_retries=1
+        )
+        await model_api.async_warm_up_lambdas(
+            model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
+        )
+
         # The materials data could be cached or local so we don't need to make
         # consistent requests to the backend for
         # the same data
@@ -699,7 +700,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_instance.current_energy_bill = property_current_energy_bill
 
         # Insert the predictions into the recommendations and run the optimiser
-
         for p in input_properties:
             if not recommendations.get(p.id):
                 continue
@@ -712,8 +712,13 @@ async def trigger_plan(body: PlanTriggerRequest):
             else:
 
                 current_sap_points = int(p.data["current-energy-efficiency"])
-                target_sap_points = epc_to_sap_lower_bound(body.goal_value)
-                sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
+                ventilation_impact = next(
+                    (r[0]["sap_points"] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
+                    0
+                )
+                sap_gain = CostOptimiser.calculate_sap_gain_with_slack(
+                    epc_to_sap_lower_bound(body.goal_value) - current_sap_points
+                ) + abs(ventilation_impact)
 
                 if not body.optimise:
                     if body.goal != "Increasing EPC":
@@ -778,6 +783,10 @@ async def trigger_plan(body: PlanTriggerRequest):
             final_recommendations = [
                 rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
             ]
+            # Get defaults
+            defaults = [r for r in final_recommendations if r["default"]]
+            sum([r['sap_points'] for r in defaults])
+
             recommendations[p.id] = final_recommendations
 
         # when we have buildings, we tweak our solar PV recommendations as if one unit needs it, we apply it to all
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index c2f2dcd9..c108f1b7 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -39,6 +39,7 @@ class ModelApi:
         timestamp,
         prediction_buckets,
         base_url="https://api.dev.hestia.homes",
+        max_retries=2,
     ):
         """
         This class handles the communication with the Model APIs. These models include SAP change, heat demain change
@@ -54,6 +55,8 @@ class ModelApi:
         self.timestamp = timestamp
         self.prediction_buckets = prediction_buckets
 
+        self.max_retries = max_retries
+
     @staticmethod
     def predictions_template():
         return {
@@ -295,15 +298,33 @@ class ModelApi:
 
         async def run_batches():
             for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
-                predictions_dict = await self.predict_all_async(
-                    df=data.iloc[chunk:chunk + batch_size],
-                    bucket=bucket,
-                    model_prefixes=model_prefixes,
-                    extract_ids=extract_ids
-                )
 
-                for key, scored in predictions_dict.items():
-                    all_predictions[key] = pd.concat([all_predictions[key], scored])
+                attempts = 0
+                success = False
+                while attempts <= self.max_retries and not success:
+                    try:
+                        predictions_dict = await self.predict_all_async(
+                            df=data.iloc[chunk:chunk + batch_size],
+                            bucket=bucket,
+                            model_prefixes=model_prefixes,
+                            extract_ids=extract_ids
+                        )
+
+                        for key, scored in predictions_dict.items():
+                            all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+                        success = True
+                    except Exception as e:
+                        attempts += 1
+                        logger.error(
+                            f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}/{self.max_retries}). "
+                            f"Error: {e}"
+                        )
+
+                        if attempts > self.max_retries:
+                            logger.error(
+                                f"Skipping batch {chunk}-{chunk + batch_size} after {self.max_retries} failed attempts."
+                            )
 
         # Check if there is an existing event loop
         try:
diff --git a/etl/costs/app.py b/etl/costs/app.py
index 797191d2..f2bf365b 100644
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@@ -11,7 +11,7 @@ import inspect
 
 src_file_path = inspect.getfile(lambda: None)
 
-DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20240917 Hestia Materials.xlsx"
+DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20250316 Domna Materials.xlsx"
 # Environment file is at the same level as this file
 ENV_FILE = Path(src_file_path).parent / "etl" / "costs" / ".env"
 dotenv.load_dotenv(ENV_FILE)
@@ -91,6 +91,7 @@ def app():
     lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
     flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
     window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
+    rir_insulation_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="room_roof_insulation", header=0)
 
     # Form a single table to be uploaded
     costs = pd.concat(
@@ -104,7 +105,8 @@ def app():
             ewi_costs,
             lel_costs,
             flat_roof_costs,
-            window_costs
+            window_costs,
+            rir_insulation_costs,
         ]
     )
 
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 4d25ec18..5a39bee3 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -101,10 +101,10 @@ INSTALLER_ASHP_COSTS = [
 BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
 
 INSTALLER_SOLAR_BATTERY_COSTS = [
-    {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 2700.00, 'installer': 'CEG'},
-    {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
-    {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
-    {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
+    {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 2030.40, 'installer': 'CEG'},
+    # {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
+    # {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
+    # {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
 ]
 
 # This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
@@ -149,7 +149,7 @@ CONDENSING_BOILER_COSTS = {
 ELECTRIC_BOILER_COSTS = 1800
 
 # Assumes 1 hours to remove each heater (including re-decorating)
-ROOM_HEATER_REMOVAL_COST = 50
+ROOM_HEATER_REMOVAL_COST = 25
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
 
 # This is a cost quoted by Jim for a system flush - existig system will run more efficiently
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 813f5a80..8a6b01ab 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -461,6 +461,7 @@ class Recommendations:
         :param property_instance: Instance of the Property class, for the home associated to property_id
         :param all_predictions: dictionary of predictions from the model apis
         :param recommendations: dictionary of recommendations for the property
+        :param representative_recommendations: dictionary of representative recommendations for the property
         :return:
         """
 
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index b7e34406..5f9707d9 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -52,6 +52,10 @@ class RoofRecommendations:
             part for part in materials if part["type"] == "flat_roof_insulation"
         ]
 
+        self.room_roof_insulation_materials = [
+            part for part in materials if part["type"] == "room_roof_insulation"
+        ]
+
         # Extract the insulation thickness from the roof, which is used throughout this method
         self.insulation_thickness = convert_thickness_to_numeric(
             self.property.roof["insulation_thickness"],
@@ -496,29 +500,22 @@ class RoofRecommendations:
         :return:
         """
 
-        # TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor
-        roof_roof_insulation_materials = [
-            {
-                "type": "room_roof_insulation",
-                "measure_type": "room_roof_insulation",
-                "description": "Insulating the ceiling of the roof roof and re-decorate",
-                "depths": [100],
-                "depth_unit": "mm",
-                "r_value_per_mm": 0.038,
-                "thermal_conductivity": 0.022,
-                "cost": [180],
-            }
-        ]
+        # We have a list of materials that can be used for room roof insulation
+        # We will iterate over these materials and recommend them based on the current u-value of the roof
+        # and the cost of the materials
 
         rir_non_invasive_recommendation = next(
             (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
         )
 
+        insulation_materials = pd.DataFrame(self.room_roof_insulation_materials)
+
         # lowest_selected_u_value = None
         recommendations = []
-        for material in roof_roof_insulation_materials:
-            for depth, cost_per_unit in zip(material["depths"], material["cost"]):
-                part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"])
+        for _, material_group in insulation_materials.groupby("description"):
+            for material in material_group.itertuples():
+
+                part_u_value = r_value_per_mm_to_u_value(material.depth, material.r_value_per_mm)
 
                 _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
                 new_u_value = math.ceil(new_u_value * 100.0) / 100.0
@@ -526,7 +523,7 @@ class RoofRecommendations:
                 # We allow a small tolerance for error so we don't discount the recommendation entirely
 
                 estimated_cost = (
-                    cost_per_unit * self.property.insulation_floor_area if
+                    material.total_cost * self.property.insulation_floor_area if
                     rir_non_invasive_recommendation.get("cost") is None else
                     rir_non_invasive_recommendation.get("cost")
                 )
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
index a9d5de04..e63951d9 100644
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@@ -9,12 +9,6 @@ class SecondaryHeating:
     system.
     """
 
-    # The list of existing heating systems that are accepted
-    ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas", "Electric storage heaters"]
-    ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric", 'Portable electric heaters (assumed)']
-    # These are the heaters where works are required to remove them
-    FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
-
     def __init__(self, property_instance: Property):
         self.property = property_instance
         self.costs = Costs(self.property)
@@ -25,18 +19,10 @@ class SecondaryHeating:
         # Reset
         self.recommendation = []
 
-        if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS:
-            return
-
-        # TODO: We need to clean secondary data
-        if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS:
-            return
-
-        if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS:
-            # We have an associated cost otherwise, there is no cost
+        if self.property.data['number-habitable-rooms'] > self.property.data['number-heated-rooms']:
             n_rooms = self.property.data['number-habitable-rooms'] - self.property.data['number-heated-rooms']
         else:
-            n_rooms = 0
+            n_rooms = self.property.data["number-heated-rooms"]
 
         costs = self.costs.heater_removal(n_rooms=n_rooms)
 
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 77e8fd10..ee07ff28 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -7,14 +7,6 @@ from recommendations.recommendation_utils import override_costs, estimate_pitche
 
 
 class SolarPvRecommendations:
-    # Solar panel specs based on Eurener 400s solar panels
-    # https://midsummerwholesale.co.uk/buy/eurener/eurener-400w-mepv-zebra-ab-half-cut-mono
-    # Approximate area of the solar panels
-    SOLAR_PANEL_AREA = 1.79
-    # Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
-    # This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group
-    SOLAR_PANEL_WATTAGE = 400
-
     # For domestic properties, we don't recommend a solar PV system with wattage outside of these
     # bounds
     MAX_SYSTEM_WATTAGE = 6000
@@ -65,46 +57,6 @@ class SolarPvRecommendations:
 
         return trimmed_list
 
-    def mds_recommend(self, phase=None, solar_pv_percentage=0.5):
-        # For specific usage within the mds report
-
-        solar_pv_roof_area = self.property.get_solar_pv_roof_area(solar_pv_percentage)
-
-        number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
-        solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
-
-        solar_panel_wattage = np.clip(
-            a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
-        )
-
-        # We now have a property which is potentially suitable for solar PV
-        roof_coverage_percent = round(solar_pv_percentage * 100)
-        # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
-        # of solar PV installations
-        cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=False)
-        kw = np.floor(solar_panel_wattage / 100) / 10
-
-        description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
-                       f"anel system on {round(roof_coverage_percent)}% the roof.")
-
-        return [
-            {
-                "phase": phase,
-                "parts": [],
-                "type": "solar_pv",
-                "description": description,
-                "starting_u_value": None,
-                "new_u_value": None,
-                "sap_points": None,
-                "already_installed": False,
-                **cost_result,
-                # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
-                # back up here
-                "photo_supply": roof_coverage_percent,
-                "has_battery": False
-            }
-        ]
-
     def recommend_building_analysis(self, phase):
         """
         This recommendation approach handles the case of producing solar PV recommendations at the building level,
@@ -258,11 +210,14 @@ class SolarPvRecommendations:
                 )
                 kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
                 if has_battery:
-                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
-                                   f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
+                    description = (
+                        f"Install a {kw} kilowatt-peak (kWp) solar panel system, with a battery."
+                    )
                 else:
-                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
-                                   f"anel system on {round(roof_coverage_percent)}% the roof.")
+                    description = f"Install a {kw} kilowatt-peak (kWp) solar panel system."
+
+                if self.property.in_conservation_area:
+                    description += " Property is in a consevation area - please check with local planning authority."
 
                 already_installed = "solar_pv" in self.property.already_installed
                 if already_installed: