From a0bbdadd1f621401e9ba6465e69f03a97e4c3446 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Feb 2024 15:13:04 +0000
Subject: [PATCH] Adding in pipline to test model simulations

---
 backend/Property.py                      |  14 +
 etl/testing_data/sap_model_simulation.py | 385 +++++++++++++++++++++++
 2 files changed, 399 insertions(+)
 create mode 100644 etl/testing_data/sap_model_simulation.py

diff --git a/backend/Property.py b/backend/Property.py
index de87099b..d1f5a1e2 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -184,6 +184,15 @@ class Property:
             recommendation_record["walls_thermal_transmittance_ending"] = recommendation["new_u_value"]
             recommendation_record["walls_insulation_thickness_ending"] = "above average"
             recommendation_record["walls_energy_eff_ending"] = "Good"
+
+            if recommendation["type"] == "external_wall_insulation":
+                recommendation_record["external_insulation"] = True
+                recommendation_record["internal_insulation"] = False
+
+            if recommendation["type"] == "internal_wall_insulation":
+                recommendation_record["external_insulation"] = False
+                recommendation_record["internal_insulation"] = True
+
         else:
             if recommendation_record["walls_thermal_transmittance_ending"] is None:
                 raise ValueError("We should not have a None value for the u value")
@@ -269,6 +278,11 @@ class Property:
             else:
                 raise ValueError("Invalid glazing type - implement me")
 
+            if is_secondary_glazing:
+                recommendation_record["glazed_type_ending"] = "secondary glazing"
+            else:
+                recommendation_record["glazed_type_ending"] = "double glazing installed during or after 2002 "
+
         if recommendation["type"] == "solar_pv":
             recommendation_record["photo_supply_ending"] = recommendation["photo_supply"]
 
diff --git a/etl/testing_data/sap_model_simulation.py b/etl/testing_data/sap_model_simulation.py
new file mode 100644
index 00000000..31cc21e7
--- /dev/null
+++ b/etl/testing_data/sap_model_simulation.py
@@ -0,0 +1,385 @@
+import pandas as pd
+from utils.s3 import read_dataframe_from_s3_parquet
+from backend.Property import Property
+
+
+def app():
+    dataset = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-datalake-dev",
+        file_key="dataset_with0perm_all.parquet"
+    )
+
+    thresholds = dataset["total_floor_area_starting"].quantile(
+        [0.3, 0.6, 0.9]
+    ).values
+
+    dataset["floor_area_quantile"] = pd.cut(
+        dataset["total_floor_area_starting"],
+        bins=[0] + list(thresholds) + [float('inf')],
+        labels=False,
+        include_lowest=True
+    )
+
+    # We want to set up some tests to deduce the following:
+    # For different property types, of various sizes, what is the impact of the various measures that we recommend
+    # 1) Insulating the loft. We test the impact of bringing the loft to 270mm insulation and 300mm insulation
+    property_types = dataset[
+        ["property_type", "built_form", "floor_area_quantile", "construction_age_band"]
+    ].drop_duplicates()
+
+    property_types = property_types.sort_values(
+        ["property_type", "built_form", "floor_area_quantile", "construction_age_band"]
+    )
+
+    # For each property type congifuration, we take an example property with different starting loft thresholds. We take
+    # the value with the lowest U-value, since when simulating, we often work with particularly low u-values
+
+    # TODOS
+    # 1) When simulating with loft insulation, make sure is_loft is definitely true, because the roof could start as
+    #    pitched, but is_loft false
+
+    # TODO: We have a description: "Pitched, loft insulation", which seems to have its insulation thickness set to
+    #  "none"
+    #       Example UPRN: 100021359753, 10001204228
+
+    # TODO: For windows, we have glazing_type and glazed_type. When simulating, we don't set glazed_type_ending which
+    #       could be set to "double glazing installed during or after 2002" (THIS HAS BEEN ADDED!)
+
+    # TODO: When simulating external wall insulation vs internal wall insulation, I need to set the external_insulation
+    #       or internal_insulation boolean values to true (THIS HAS BEEN ADDED!)
+
+    # TODO: We could probably re-map some of the values of glazed_type_ending
+
+    # For simulating
+    # 1) loft insulation - we take the lowest u-value when loft insulation is 270mm and 300mm, the values we most
+    # commonly simulate to - For loft insulation, these values are in-line with
+    best_270mm_uvalue = dataset[dataset["roof_insulation_thickness"] == "270"]["roof_thermal_transmittance"].min()
+    best_300mm_uvalue = dataset[dataset["roof_insulation_thickness"] == "300"]["roof_thermal_transmittance"].min()
+
+    # 2) Intenal wall insulation - we take the lowest u-value when simulating internal wall insulation
+    best_internal_wall_uvalue = dataset[
+        dataset["internal_insulation"] & dataset["is_solid_brick"]
+        ]["walls_thermal_transmittance"].min()
+
+    # 3) External wall insulation - we take the lowest u-value when simulating external wall insulation
+    best_external_wall_uvalue = dataset[
+        dataset["external_insulation"] & dataset["is_solid_brick"]
+        ]["walls_thermal_transmittance"].min()
+
+    # 4) Cavity wall insulation - we take the lowest u-value when simulating cavity wall insulation
+    # This is 0.28, which is a sufficiently low value
+    best_cavity_wall_uvalue = dataset[
+        dataset["is_cavity_wall"] & dataset["is_filled_cavity"] & (~dataset["external_insulation"]) & (
+            ~dataset["internal_insulation"])
+        ]["walls_thermal_transmittance"].min()
+
+    loft_insulation_testing_data = []
+    solid_wall_testing_data = []
+    cavity_wall_testing_data = []
+    solid_floor_testing_data = []
+    suspended_floor_testing_data = []
+    single_glazed_testing_data = []
+    partial_double_glazed_testing_data = []
+    partial_secondary_glazed_testing_data = []
+    for property_config in property_types.itertuples():
+        # Take a sample row
+        population = dataset[
+            (dataset["property_type"] == property_config.property_type) &
+            (dataset["built_form"] == property_config.built_form) &
+            (dataset["floor_area_quantile"] == property_config.floor_area_quantile) &
+            (dataset["construction_age_band"] == property_config.construction_age_band)
+            ]
+
+        # 1) Loft insulation
+
+        # For loft insulation, there are two scenarios we test.
+        # 1) Loft insulation to 270mm
+        # 2) Lost insulation to 300mm
+
+        for insulation_thickness in ["none", "12", "50", "75", "100", "150", "200", "250"]:
+            if insulation_thickness == "none":
+                row = population[
+                    (population["roof_insulation_thickness"] == "none") &
+                    (population["is_pitched"])
+                    ].sample(1)
+
+            else:
+                row = population[
+                    (population["roof_insulation_thickness"] == insulation_thickness) &
+                    (population["is_pitched"])
+                    ].sample(1)
+
+            loft_insulation_270mm_simulation = Property.create_recommendation_scoring_data(
+                property_id=row["uprn"].values[0],
+                recommendation_record=row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "loft_insulation_270mm",
+                    "type": "loft_insulation",
+                    "new_u_value": best_270mm_uvalue,
+                    "parts": [
+                        {"depth": 270}
+                    ]
+                }
+            )
+
+            loft_insulation_300mm_simulation = Property.create_recommendation_scoring_data(
+                property_id=row["uprn"].values[0],
+                recommendation_record=row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "loft_insulation_300mm",
+                    "type": "loft_insulation",
+                    "new_u_value": best_300mm_uvalue,
+                    "parts": [
+                        {"depth": 300}
+                    ]
+                }
+            )
+
+            # Insert simulation specific configuration details
+            loft_insulation_270mm_simulation = {
+                "simulation_ending_insulation_thickness": "270",
+                "simulation_starting_insulation_thickness": insulation_thickness,
+                **loft_insulation_270mm_simulation
+            }
+
+            loft_insulation_300mm_simulation = {
+                "simulation_ending_insulation_thickness": "300",
+                "simulation_starting_insulation_thickness": insulation_thickness,
+                **loft_insulation_300mm_simulation
+            }
+
+            loft_insulation_testing_data.append(loft_insulation_270mm_simulation)
+            loft_insulation_testing_data.append(loft_insulation_300mm_simulation)
+
+        # 2) Solid wall insulation
+        solid_wall_sample = population[
+            population["is_solid_brick"] & (population["walls_insulation_thickness"] == "none")
+            ]
+
+        # We take 1 sample for each value of walls_thermal_transmittance
+        for uvalue in solid_wall_sample["walls_thermal_transmittance"].unique():
+            row = solid_wall_sample[
+                solid_wall_sample["walls_thermal_transmittance"] == uvalue
+                ].sample(1)
+
+            # Simulated IWI
+            internal_wall_insulation_simulation = Property.create_recommendation_scoring_data(
+                property_id=row["uprn"].values[0].to_dict("records")[0],
+                recommendation_record=row.copy(),
+                recommendation={
+                    "recommendation_id": "internal_wall_insulation",
+                    "type": "internal_wall_insulation",
+                    "new_u_value": best_internal_wall_uvalue,
+                    "parts": []
+                }
+            )
+
+            # Simulated EWI
+            best_external_wall_uvalue_wall_insulation_simulation = Property.create_recommendation_scoring_data(
+                property_id=row["uprn"].values[0],
+                recommendation_record=row.copy(),
+                recommendation={
+                    "recommendation_id": "external_wall_insulation",
+                    "type": "external_wall_insulation",
+                    "new_u_value": best_external_wall_uvalue,
+                    "parts": []
+                }
+            )
+
+            # The iww/ewi simulations will be next to each other, so we can see how they differ for the same property
+            solid_wall_testing_data.append(internal_wall_insulation_simulation)
+            solid_wall_testing_data.append(best_external_wall_uvalue_wall_insulation_simulation)
+
+        # 3) Cavity wall insulation
+        cavity_wall_sample = population[
+            population["is_cavity_wall"] & (~population["is_filled_cavity"]) & (
+                ~population["external_insulation"]
+            ) & (~population["internal_insulation"])
+            ]
+
+        # We take 1 sample for each value of walls_thermal_transmittance
+        for uvalue in cavity_wall_sample["walls_thermal_transmittance"].unique():
+            row = cavity_wall_sample[
+                cavity_wall_sample["walls_thermal_transmittance"] == uvalue
+                ].sample(1)
+
+            # Simulated filled cavity
+            filled_cavity_wall_insulation_simulation = Property.create_recommendation_scoring_data(
+                property_id=row["uprn"].values[0],
+                recommendation_record=row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "cavity_wall_insulation",
+                    "type": "cavity_wall_insulation",
+                    "new_u_value": best_cavity_wall_uvalue,
+                    "parts": []
+                }
+            )
+
+            cavity_wall_testing_data.append(filled_cavity_wall_insulation_simulation)
+
+        # 4) Solid floor insulation
+        solid_floor_sample = population[
+            population["is_solid"] & (population["floor_insulation_thickness"] == "none")
+            ]
+
+        # We have many different values of u-value for solid floors, we we'll take a sample at the 25%, 50% and 75%
+        # values
+        # We must take a value that is in one of the unique values for floor_thermal_transmittance
+        for uvalue in solid_floor_sample["floor_thermal_transmittance"].quantile([0.25, 0.5, 0.75]).values:
+            nearest_value = solid_floor_sample['floor_thermal_transmittance'].sub(uvalue).abs().idxmin()
+            nearest_row = solid_floor_sample.loc[[nearest_value]].sample(1)
+
+            # Simulated solid floor insulation
+            solid_floor_insulation_simulation = Property.create_recommendation_scoring_data(
+                property_id=nearest_row["uprn"].values[0],
+                recommendation_record=nearest_row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "solid_floor_insulation",
+                    "type": "solid_floor_insulation",
+                    "new_u_value": None,  # This doesn't matter at the moment
+                    "parts": []
+                }
+            )
+
+            solid_floor_testing_data.append(solid_floor_insulation_simulation)
+
+        # 5) Suspended floor insulation
+        suspended_floor_sample = population[
+            population["is_suspended"] & (population["floor_insulation_thickness"] == "none")
+            ]
+
+        # We take the same approach as for solid floors
+        for uvalue in suspended_floor_sample["floor_thermal_transmittance"].quantile([0.25, 0.5, 0.75]).values:
+            nearest_value = suspended_floor_sample['floor_thermal_transmittance'].sub(uvalue).abs().idxmin()
+            nearest_row = suspended_floor_sample.loc[[nearest_value]].sample(1)
+
+            # Simulated suspended floor insulation
+            suspended_floor_insulation_simulation = Property.create_recommendation_scoring_data(
+                property_id=nearest_row["uprn"].values[0],
+                recommendation_record=nearest_row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "suspended_floor_insulation",
+                    "type": "suspended_floor_insulation",
+                    "new_u_value": None,  # This doesn't matter at the moment
+                    "parts": []
+                }
+            )
+
+            suspended_floor_testing_data.append(suspended_floor_insulation_simulation)
+
+        # 6) Windows - single glazing
+        single_glazing_sample = population[
+            (population["glazing_type"] == "single")
+        ]
+
+        # We take multiple values for multi_glaze_proportion_starting. We definitely need zero, but then we also
+        # take the 25%, 50% and 75% values
+        multi_glaze_values = [0] + list(
+            single_glazing_sample["multi_glaze_proportion_starting"].quantile([0.25, 0.5, 0.75]).values
+        )
+        multi_glaze_values = set(multi_glaze_values)
+
+        for value in multi_glaze_values:
+            row = single_glazing_sample[
+                single_glazing_sample["multi_glaze_proportion_starting"] == value
+                ].sample(1)
+
+            # For single glazed windows, we can recommend double glazing or secondary glazing
+
+            # Simulated double glazing
+            double_glazing_simulation = Property.create_recommendation_scoring_data(
+                property_id=row["uprn"].values[0],
+                recommendation_record=row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "windows_glazing",
+                    "type": "windows_glazing",
+                    "new_u_value": None,  # This doesn't matter at the moment
+                    "parts": [],
+                    "is_secondary_glazing": False
+                }
+            )
+
+            # Simulated secondary glazing
+            secondary_glazing_simulation = Property.create_recommendation_scoring_data(
+                property_id=row["uprn"].values[0],
+                recommendation_record=row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "windows_glazing",
+                    "type": "windows_glazing",
+                    "new_u_value": None,  # This doesn't matter at the moment
+                    "parts": [],
+                    "is_secondary_glazing": True
+                }
+            )
+
+            # Add in simulation specific details
+            # Add to the beginning of the dictionary
+            double_glazing_simulation = {
+                "simulation_ending_window_finish": "double",
+                **double_glazing_simulation
+            }
+            secondary_glazing_simulation = {
+                "simulation_ending_window_finish": "secondary",
+                **secondary_glazing_simulation
+            }
+
+            single_glazed_testing_data.append(double_glazing_simulation)
+            single_glazed_testing_data.append(secondary_glazing_simulation)
+
+        # 7) Windows - partial double glazed
+        partial_double_glazing_sample = population[
+            (population["glazing_type"] == "double") & (population["multi_glaze_proportion_starting"] > 0) & (
+                population["multi_glaze_proportion_starting"] < 100
+            )
+            ]
+
+        for value in partial_double_glazing_sample["multi_glaze_proportion_starting"].quantile(
+            [0.25, 0.5, 0.75]
+        ).values:
+            nearest_value = partial_double_glazing_sample['multi_glaze_proportion_starting'].sub(value).abs().idxmin()
+            nearest_row = partial_double_glazing_sample.loc[[nearest_value]].sample(1)
+            # If we start with partial double glazing, we recommend completing the job
+            # Simulated double glazing
+            double_glazing_simulation = Property.create_recommendation_scoring_data(
+                property_id=nearest_row["uprn"].values[0],
+                recommendation_record=nearest_row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "windows_glazing",
+                    "type": "windows_glazing",
+                    "new_u_value": None,  # This doesn't matter at the moment
+                    "parts": [],
+                    "is_secondary_glazing": False
+                }
+            )
+
+            partial_double_glazed_testing_data.append(double_glazing_simulation)
+
+        # 8) Windows - partial secondary glazed
+        partial_secondary_glazing_sample = population[
+            (population["glazing_type"] == "secondary") & (population["multi_glaze_proportion_starting"] > 0) & (
+                population["multi_glaze_proportion_starting"] < 100
+            )
+            ]
+
+        for value in partial_secondary_glazing_sample["multi_glaze_proportion_starting"].quantile(
+            [0.25, 0.5, 0.75]
+        ).values:
+            nearest_value = partial_secondary_glazing_sample['multi_glaze_proportion_starting'].sub(
+                value).abs().idxmin()
+            nearest_row = partial_secondary_glazing_sample.loc[[nearest_value]].sample(1)
+
+            # If we start with partial secondary glazing, we recommend completing the job
+            # Simulated secondary glazing
+            secondary_glazing_simulation = Property.create_recommendation_scoring_data(
+                property_id=nearest_row["uprn"].values[0],
+                recommendation_record=nearest_row.copy().to_dict("records")[0],
+                recommendation={
+                    "recommendation_id": "windows_glazing",
+                    "type": "windows_glazing",
+                    "new_u_value": None,  # This doesn't matter at the moment
+                    "parts": [],
+                    "is_secondary_glazing": True
+                }
+            )
+
+            partial_secondary_glazed_testing_data.append(secondary_glazing_simulation)