From a0bbdadd1f621401e9ba6465e69f03a97e4c3446 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Feb 2024 15:13:04 +0000 Subject: [PATCH] Adding in pipline to test model simulations --- backend/Property.py | 14 + etl/testing_data/sap_model_simulation.py | 385 +++++++++++++++++++++++ 2 files changed, 399 insertions(+) create mode 100644 etl/testing_data/sap_model_simulation.py diff --git a/backend/Property.py b/backend/Property.py index de87099b..d1f5a1e2 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -184,6 +184,15 @@ class Property: recommendation_record["walls_thermal_transmittance_ending"] = recommendation["new_u_value"] recommendation_record["walls_insulation_thickness_ending"] = "above average" recommendation_record["walls_energy_eff_ending"] = "Good" + + if recommendation["type"] == "external_wall_insulation": + recommendation_record["external_insulation"] = True + recommendation_record["internal_insulation"] = False + + if recommendation["type"] == "internal_wall_insulation": + recommendation_record["external_insulation"] = False + recommendation_record["internal_insulation"] = True + else: if recommendation_record["walls_thermal_transmittance_ending"] is None: raise ValueError("We should not have a None value for the u value") @@ -269,6 +278,11 @@ class Property: else: raise ValueError("Invalid glazing type - implement me") + if is_secondary_glazing: + recommendation_record["glazed_type_ending"] = "secondary glazing" + else: + recommendation_record["glazed_type_ending"] = "double glazing installed during or after 2002 " + if recommendation["type"] == "solar_pv": recommendation_record["photo_supply_ending"] = recommendation["photo_supply"] diff --git a/etl/testing_data/sap_model_simulation.py b/etl/testing_data/sap_model_simulation.py new file mode 100644 index 00000000..31cc21e7 --- /dev/null +++ b/etl/testing_data/sap_model_simulation.py @@ -0,0 +1,385 @@ +import pandas as pd +from utils.s3 import read_dataframe_from_s3_parquet +from backend.Property import Property + + +def app(): + dataset = read_dataframe_from_s3_parquet( + bucket_name="retrofit-datalake-dev", + file_key="dataset_with0perm_all.parquet" + ) + + thresholds = dataset["total_floor_area_starting"].quantile( + [0.3, 0.6, 0.9] + ).values + + dataset["floor_area_quantile"] = pd.cut( + dataset["total_floor_area_starting"], + bins=[0] + list(thresholds) + [float('inf')], + labels=False, + include_lowest=True + ) + + # We want to set up some tests to deduce the following: + # For different property types, of various sizes, what is the impact of the various measures that we recommend + # 1) Insulating the loft. We test the impact of bringing the loft to 270mm insulation and 300mm insulation + property_types = dataset[ + ["property_type", "built_form", "floor_area_quantile", "construction_age_band"] + ].drop_duplicates() + + property_types = property_types.sort_values( + ["property_type", "built_form", "floor_area_quantile", "construction_age_band"] + ) + + # For each property type congifuration, we take an example property with different starting loft thresholds. We take + # the value with the lowest U-value, since when simulating, we often work with particularly low u-values + + # TODOS + # 1) When simulating with loft insulation, make sure is_loft is definitely true, because the roof could start as + # pitched, but is_loft false + + # TODO: We have a description: "Pitched, loft insulation", which seems to have its insulation thickness set to + # "none" + # Example UPRN: 100021359753, 10001204228 + + # TODO: For windows, we have glazing_type and glazed_type. When simulating, we don't set glazed_type_ending which + # could be set to "double glazing installed during or after 2002" (THIS HAS BEEN ADDED!) + + # TODO: When simulating external wall insulation vs internal wall insulation, I need to set the external_insulation + # or internal_insulation boolean values to true (THIS HAS BEEN ADDED!) + + # TODO: We could probably re-map some of the values of glazed_type_ending + + # For simulating + # 1) loft insulation - we take the lowest u-value when loft insulation is 270mm and 300mm, the values we most + # commonly simulate to - For loft insulation, these values are in-line with + best_270mm_uvalue = dataset[dataset["roof_insulation_thickness"] == "270"]["roof_thermal_transmittance"].min() + best_300mm_uvalue = dataset[dataset["roof_insulation_thickness"] == "300"]["roof_thermal_transmittance"].min() + + # 2) Intenal wall insulation - we take the lowest u-value when simulating internal wall insulation + best_internal_wall_uvalue = dataset[ + dataset["internal_insulation"] & dataset["is_solid_brick"] + ]["walls_thermal_transmittance"].min() + + # 3) External wall insulation - we take the lowest u-value when simulating external wall insulation + best_external_wall_uvalue = dataset[ + dataset["external_insulation"] & dataset["is_solid_brick"] + ]["walls_thermal_transmittance"].min() + + # 4) Cavity wall insulation - we take the lowest u-value when simulating cavity wall insulation + # This is 0.28, which is a sufficiently low value + best_cavity_wall_uvalue = dataset[ + dataset["is_cavity_wall"] & dataset["is_filled_cavity"] & (~dataset["external_insulation"]) & ( + ~dataset["internal_insulation"]) + ]["walls_thermal_transmittance"].min() + + loft_insulation_testing_data = [] + solid_wall_testing_data = [] + cavity_wall_testing_data = [] + solid_floor_testing_data = [] + suspended_floor_testing_data = [] + single_glazed_testing_data = [] + partial_double_glazed_testing_data = [] + partial_secondary_glazed_testing_data = [] + for property_config in property_types.itertuples(): + # Take a sample row + population = dataset[ + (dataset["property_type"] == property_config.property_type) & + (dataset["built_form"] == property_config.built_form) & + (dataset["floor_area_quantile"] == property_config.floor_area_quantile) & + (dataset["construction_age_band"] == property_config.construction_age_band) + ] + + # 1) Loft insulation + + # For loft insulation, there are two scenarios we test. + # 1) Loft insulation to 270mm + # 2) Lost insulation to 300mm + + for insulation_thickness in ["none", "12", "50", "75", "100", "150", "200", "250"]: + if insulation_thickness == "none": + row = population[ + (population["roof_insulation_thickness"] == "none") & + (population["is_pitched"]) + ].sample(1) + + else: + row = population[ + (population["roof_insulation_thickness"] == insulation_thickness) & + (population["is_pitched"]) + ].sample(1) + + loft_insulation_270mm_simulation = Property.create_recommendation_scoring_data( + property_id=row["uprn"].values[0], + recommendation_record=row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "loft_insulation_270mm", + "type": "loft_insulation", + "new_u_value": best_270mm_uvalue, + "parts": [ + {"depth": 270} + ] + } + ) + + loft_insulation_300mm_simulation = Property.create_recommendation_scoring_data( + property_id=row["uprn"].values[0], + recommendation_record=row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "loft_insulation_300mm", + "type": "loft_insulation", + "new_u_value": best_300mm_uvalue, + "parts": [ + {"depth": 300} + ] + } + ) + + # Insert simulation specific configuration details + loft_insulation_270mm_simulation = { + "simulation_ending_insulation_thickness": "270", + "simulation_starting_insulation_thickness": insulation_thickness, + **loft_insulation_270mm_simulation + } + + loft_insulation_300mm_simulation = { + "simulation_ending_insulation_thickness": "300", + "simulation_starting_insulation_thickness": insulation_thickness, + **loft_insulation_300mm_simulation + } + + loft_insulation_testing_data.append(loft_insulation_270mm_simulation) + loft_insulation_testing_data.append(loft_insulation_300mm_simulation) + + # 2) Solid wall insulation + solid_wall_sample = population[ + population["is_solid_brick"] & (population["walls_insulation_thickness"] == "none") + ] + + # We take 1 sample for each value of walls_thermal_transmittance + for uvalue in solid_wall_sample["walls_thermal_transmittance"].unique(): + row = solid_wall_sample[ + solid_wall_sample["walls_thermal_transmittance"] == uvalue + ].sample(1) + + # Simulated IWI + internal_wall_insulation_simulation = Property.create_recommendation_scoring_data( + property_id=row["uprn"].values[0].to_dict("records")[0], + recommendation_record=row.copy(), + recommendation={ + "recommendation_id": "internal_wall_insulation", + "type": "internal_wall_insulation", + "new_u_value": best_internal_wall_uvalue, + "parts": [] + } + ) + + # Simulated EWI + best_external_wall_uvalue_wall_insulation_simulation = Property.create_recommendation_scoring_data( + property_id=row["uprn"].values[0], + recommendation_record=row.copy(), + recommendation={ + "recommendation_id": "external_wall_insulation", + "type": "external_wall_insulation", + "new_u_value": best_external_wall_uvalue, + "parts": [] + } + ) + + # The iww/ewi simulations will be next to each other, so we can see how they differ for the same property + solid_wall_testing_data.append(internal_wall_insulation_simulation) + solid_wall_testing_data.append(best_external_wall_uvalue_wall_insulation_simulation) + + # 3) Cavity wall insulation + cavity_wall_sample = population[ + population["is_cavity_wall"] & (~population["is_filled_cavity"]) & ( + ~population["external_insulation"] + ) & (~population["internal_insulation"]) + ] + + # We take 1 sample for each value of walls_thermal_transmittance + for uvalue in cavity_wall_sample["walls_thermal_transmittance"].unique(): + row = cavity_wall_sample[ + cavity_wall_sample["walls_thermal_transmittance"] == uvalue + ].sample(1) + + # Simulated filled cavity + filled_cavity_wall_insulation_simulation = Property.create_recommendation_scoring_data( + property_id=row["uprn"].values[0], + recommendation_record=row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "cavity_wall_insulation", + "type": "cavity_wall_insulation", + "new_u_value": best_cavity_wall_uvalue, + "parts": [] + } + ) + + cavity_wall_testing_data.append(filled_cavity_wall_insulation_simulation) + + # 4) Solid floor insulation + solid_floor_sample = population[ + population["is_solid"] & (population["floor_insulation_thickness"] == "none") + ] + + # We have many different values of u-value for solid floors, we we'll take a sample at the 25%, 50% and 75% + # values + # We must take a value that is in one of the unique values for floor_thermal_transmittance + for uvalue in solid_floor_sample["floor_thermal_transmittance"].quantile([0.25, 0.5, 0.75]).values: + nearest_value = solid_floor_sample['floor_thermal_transmittance'].sub(uvalue).abs().idxmin() + nearest_row = solid_floor_sample.loc[[nearest_value]].sample(1) + + # Simulated solid floor insulation + solid_floor_insulation_simulation = Property.create_recommendation_scoring_data( + property_id=nearest_row["uprn"].values[0], + recommendation_record=nearest_row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "solid_floor_insulation", + "type": "solid_floor_insulation", + "new_u_value": None, # This doesn't matter at the moment + "parts": [] + } + ) + + solid_floor_testing_data.append(solid_floor_insulation_simulation) + + # 5) Suspended floor insulation + suspended_floor_sample = population[ + population["is_suspended"] & (population["floor_insulation_thickness"] == "none") + ] + + # We take the same approach as for solid floors + for uvalue in suspended_floor_sample["floor_thermal_transmittance"].quantile([0.25, 0.5, 0.75]).values: + nearest_value = suspended_floor_sample['floor_thermal_transmittance'].sub(uvalue).abs().idxmin() + nearest_row = suspended_floor_sample.loc[[nearest_value]].sample(1) + + # Simulated suspended floor insulation + suspended_floor_insulation_simulation = Property.create_recommendation_scoring_data( + property_id=nearest_row["uprn"].values[0], + recommendation_record=nearest_row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "suspended_floor_insulation", + "type": "suspended_floor_insulation", + "new_u_value": None, # This doesn't matter at the moment + "parts": [] + } + ) + + suspended_floor_testing_data.append(suspended_floor_insulation_simulation) + + # 6) Windows - single glazing + single_glazing_sample = population[ + (population["glazing_type"] == "single") + ] + + # We take multiple values for multi_glaze_proportion_starting. We definitely need zero, but then we also + # take the 25%, 50% and 75% values + multi_glaze_values = [0] + list( + single_glazing_sample["multi_glaze_proportion_starting"].quantile([0.25, 0.5, 0.75]).values + ) + multi_glaze_values = set(multi_glaze_values) + + for value in multi_glaze_values: + row = single_glazing_sample[ + single_glazing_sample["multi_glaze_proportion_starting"] == value + ].sample(1) + + # For single glazed windows, we can recommend double glazing or secondary glazing + + # Simulated double glazing + double_glazing_simulation = Property.create_recommendation_scoring_data( + property_id=row["uprn"].values[0], + recommendation_record=row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "windows_glazing", + "type": "windows_glazing", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "is_secondary_glazing": False + } + ) + + # Simulated secondary glazing + secondary_glazing_simulation = Property.create_recommendation_scoring_data( + property_id=row["uprn"].values[0], + recommendation_record=row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "windows_glazing", + "type": "windows_glazing", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "is_secondary_glazing": True + } + ) + + # Add in simulation specific details + # Add to the beginning of the dictionary + double_glazing_simulation = { + "simulation_ending_window_finish": "double", + **double_glazing_simulation + } + secondary_glazing_simulation = { + "simulation_ending_window_finish": "secondary", + **secondary_glazing_simulation + } + + single_glazed_testing_data.append(double_glazing_simulation) + single_glazed_testing_data.append(secondary_glazing_simulation) + + # 7) Windows - partial double glazed + partial_double_glazing_sample = population[ + (population["glazing_type"] == "double") & (population["multi_glaze_proportion_starting"] > 0) & ( + population["multi_glaze_proportion_starting"] < 100 + ) + ] + + for value in partial_double_glazing_sample["multi_glaze_proportion_starting"].quantile( + [0.25, 0.5, 0.75] + ).values: + nearest_value = partial_double_glazing_sample['multi_glaze_proportion_starting'].sub(value).abs().idxmin() + nearest_row = partial_double_glazing_sample.loc[[nearest_value]].sample(1) + # If we start with partial double glazing, we recommend completing the job + # Simulated double glazing + double_glazing_simulation = Property.create_recommendation_scoring_data( + property_id=nearest_row["uprn"].values[0], + recommendation_record=nearest_row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "windows_glazing", + "type": "windows_glazing", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "is_secondary_glazing": False + } + ) + + partial_double_glazed_testing_data.append(double_glazing_simulation) + + # 8) Windows - partial secondary glazed + partial_secondary_glazing_sample = population[ + (population["glazing_type"] == "secondary") & (population["multi_glaze_proportion_starting"] > 0) & ( + population["multi_glaze_proportion_starting"] < 100 + ) + ] + + for value in partial_secondary_glazing_sample["multi_glaze_proportion_starting"].quantile( + [0.25, 0.5, 0.75] + ).values: + nearest_value = partial_secondary_glazing_sample['multi_glaze_proportion_starting'].sub( + value).abs().idxmin() + nearest_row = partial_secondary_glazing_sample.loc[[nearest_value]].sample(1) + + # If we start with partial secondary glazing, we recommend completing the job + # Simulated secondary glazing + secondary_glazing_simulation = Property.create_recommendation_scoring_data( + property_id=nearest_row["uprn"].values[0], + recommendation_record=nearest_row.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "windows_glazing", + "type": "windows_glazing", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "is_secondary_glazing": True + } + ) + + partial_secondary_glazed_testing_data.append(secondary_glazing_simulation)