From 7a219285fc192145aedb337c54803fa4f442a142 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Feb 2024 19:16:10 +0000 Subject: [PATCH] looking into loft insulation data --- etl/testing_data/sap_model_simulation.py | 116 ++++++++++++++++++++--- 1 file changed, 102 insertions(+), 14 deletions(-) diff --git a/etl/testing_data/sap_model_simulation.py b/etl/testing_data/sap_model_simulation.py index 0a044201..467b50db 100644 --- a/etl/testing_data/sap_model_simulation.py +++ b/etl/testing_data/sap_model_simulation.py @@ -543,24 +543,112 @@ def app(): loft_insulation_comparison_matrix["measure_impact"] = loft_insulation_comparison_matrix["predictions"] - \ loft_insulation_comparison_matrix["sap_starting"] + + # We create a sap band grouping, for every 10 points of sap. So 1-10, 11-20, 21-30 etc + loft_insulation_comparison_matrix["sap_band"] = pd.cut( + loft_insulation_comparison_matrix["sap_starting"], + bins=range(0, 101, 10), + labels=range(1, 11) + ) + # Perform a group by describe loft_insulation_describe = loft_insulation_comparison_matrix.groupby( - ["simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness"] + ["sap_band", "simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness"] )[["measure_impact"]].describe().reset_index() - z = loft_insulation_comparison_matrix[loft_insulation_comparison_matrix["measure_impact"] < 0] - z.head(1)[["uprn", "id"]] - error_row = loft_insulation_testing_df[ - (loft_insulation_testing_df["id"] == "100090292333+loft_insulation_150_270mm") - ] + for col in ["simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness"]: + loft_insulation_describe[col] = loft_insulation_describe[col].str.replace('none', "0") + loft_insulation_describe[col] = loft_insulation_describe[col].astype(int) - error_dataset = dataset[ - (dataset["uprn"] == "10070401239") & (dataset["roof_insulation_thickness"] == "250") + loft_insulation_describe = loft_insulation_describe.sort_values( + ["simulation_ending_insulation_thickness", "simulation_starting_insulation_thickness"], ascending=True + ) + + # In the training data, try and get just the rows that are loft insulation only + # Things that change: + # 1) roof_insulation_thickness + # 3) roof_thermal_transmittance + # 4) roof_energy_eff_ending + loft_insulation_training_data = dataset.copy() + loft_insulation_columns_we_need_the_same = [c for c in column_config.keys() if c not in [ + "roof_insulation_thickness_ending", "roof_thermal_transmittance_ending", "roof_energy_eff_ending", + "transaction_type_ending", "days_to_ending", "sap_ending", "heat_demand_ending", "carbon_ending", + "total_floor_area_ending", "floor_height_ending", "estimated_perimeter_ending" + ]] + + for ending_col in tqdm(loft_insulation_columns_we_need_the_same): + starting_col = column_config[ending_col] + loft_insulation_training_data = loft_insulation_training_data[ + loft_insulation_training_data[ending_col] == loft_insulation_training_data[starting_col] + ] + + # We get rows where the insulation starts at 200mm + insulation_200mm_starting = loft_insulation_training_data[ + (loft_insulation_training_data["roof_insulation_thickness"] == "200") & + (loft_insulation_training_data["roof_insulation_thickness_ending"] == "300") ] - changed_from_dataset = [] - for c in column_config: - ending_value = error_row[column_config[c]].values[0] - starting_value = error_row[column_config[c]].values[0] - error_dataset["roof_insulation_thickness"] - error_dataset["roof_insulation_thickness_ending"] + # Let's use the API to find exactly the record + from backend.SearchEpc import SearchEpc + searcher = SearchEpc( + address1="2 Darkfield Way", + postcode="TA7 8HY", + auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=", + os_api_key="" + ) + searcher.uprn = "10009320092" + searcher.find_property(skip_os=True) + + newest_epc = searcher.newest_epc + older_epc = [epc for epc in searcher.older_epcs if + epc["lmk-key"] == "5ae2f073004839510f9eeb1886160776a05697f8518b8b3b63d45f65686c4757"][0] + # Iterate through the keys in the newest_epc and find the values in older epc that are different to the newest epc + + differences = {} + for k, v in newest_epc.items(): + if v != older_epc[k]: + differences[k] = (v, older_epc[k]) + + testing_model_api = ModelApi(portfolio_id="simulation-testing-loft-example", timestamp=created_at) + testing_model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + ############################################################################################################ + # TODO:! + # Findings: 1) For uprn 10009320092, the number of rooms and number of heated rooms has changed and can change from + # epc to epc. We should therefore include a starting and ending value for this + + # Investigation 1) + testing_row = insulation_200mm_starting[insulation_200mm_starting["uprn"] == "10009320092"].copy() + testing_row["id"] = "testing-200mm-loft-insulation-starting-baseline+recommendation_id_baseline" + testing_row["recommendation_id"] = "recommendation_id_baseline" + # The testing row has 4 rooms + # Score in the model to see what we get + + baseline_prediction = testing_model_api.predict_all( + df=testing_row, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + baseline_pred_df = baseline_prediction["sap_change_predictions"] + impact = baseline_pred_df["predictions"].values[0] - testing_row["sap_starting"].values[0] + + # Changing this from 4 rooms to 5 rooms has NO impact!! + testing_row_5_rooms = testing_row.copy() + testing_row_5_rooms["id"] = "testing-200mm-loft-insulation-starting-baseline+recommendation_id_5_rooms" + testing_row_5_rooms["recommendation_id"] = "recommendation_id_5_rooms" + testing_row_5_rooms["number_habitable_rooms"] = float(5) + testing_row_5_rooms["number_heated_rooms"] = float(5) + + prediction_5_rooms = testing_model_api.predict_all( + df=testing_row_5_rooms, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + pred_df_5_rooms = prediction_5_rooms["sap_change_predictions"] + impact_5_rooms = pred_df_5_rooms["predictions"].values[0] - testing_row_5_rooms["sap_starting"].values[0]