From 9c140dc0553c0357a4d5a5e24fe03cb22fb14096 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 17:14:15 +0000 Subject: [PATCH] updated wall description to filled cavity --- etl/eligibility/ha_15_32/ha4_app.py | 147 +++++++++++++++++- .../epc_attributes/WallAttributes.py | 3 + .../test_data/test_wall_attributes_cases.py | 4 +- 3 files changed, 150 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index 1d924347..cc64dfad 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -64,6 +64,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): postcode=property_meta["Post Code"], size=1000 ) + searcher.search() if searcher.data is None: @@ -108,7 +109,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): ].to_dict("records") scoring_dictionary = prepare_model_data_row( - property_id=property_meta["row_id"], + property_id=eligibility.epc["uprn"], modelling_epc=eligibility.epc, cleaned=cleaned, cleaning_data=cleaning_data, @@ -120,7 +121,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): results.append( { - "row_id": property_meta["row_id"], + "uprn": epc["uprn"], "Location Name": property_meta["Location Name"], "Post Code": property_meta["Post Code"], "gbis_eligible": eligibility.gbis_warmfront, @@ -140,6 +141,131 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): } ) + scoring_df = pd.DataFrame(scoring_data) + + # Perform the same cleaning as in the model - first clean number of room variables though + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], + colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], + ) + + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"], + ).drop(columns=["LOCAL_AUTHORITY"]) + + scoring_df = DataProcessor.clean_missings_after_description_process( + scoring_df, + ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or ( + "insulation_thickness" in c) or ("ENERGY_EFF" in c)] + ) + + scoring_df = DataProcessor.clean_efficiency_variables(scoring_df) + + model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at) + all_predictions = model_api.predict_all( + df=scoring_df, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + "heat_demand_predictions": "retrofit-heat-predictions-dev", + "carbon_change_predictions": "retrofit-carbon-predictions-dev" + } + ) + + predictions = all_predictions["sap_change_predictions"].copy() + + results_df = pd.DataFrame(results) + + predictions = predictions.rename(columns={"property_id": "uprn"}).merge( + results_df[["uprn", "sap"]], how="left", on="uprn" + ) + predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] + predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index() + + results_df = results_df.merge( + predictions[["sap_uplift", "uprn"]], + how="left", + on="uprn" + ) + results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + + results_df = results_df[~pd.isnull(results_df["uprn"])] + + eligibility_assessment = [] + for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): + # The upgrade requirements are dependent on the current SAP + + # If the property is an F or G, it only needs to upgrade to an % + if row["sap"] <= 38: + if row["post_install_sap"] >= 57: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 55: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 53: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + else: + + if row["post_install_sap"] >= 71: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 69: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 67: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + + eligibility_assessment.append( + { + "uprn": row["uprn"], + "eligibility_classification": eligibility_classification + } + ) + + eligibility_assessment = pd.DataFrame(eligibility_assessment) + + results_df = results_df.merge( + eligibility_assessment, how="left", on="uprn" + ) + # We have some properties that are duplicated so we take just one instance + results_df = results_df.drop_duplicates(subset=["uprn"]) + + return results_df, scoring_data, nodata + + +def analyse_ha_4(results_df, data): + results_df_social = results_df[results_df["tenure"] == "Rented (social)"] + + results_df_social["tenure"].value_counts() + + n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum() + n_eco4 = results_df_social["eco4_eligible"].sum() + n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum() + + eco_eligibile = results_df_social[results_df_social["eco4_eligible"]] + eco_eligibile["walls"].value_counts() + eco_eligibile["roof"].value_counts() + + eco_eligibile[eco_eligibile["walls"] == "Cavity wall, as built, insulated"] + + results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts() + + results_df_social["eligibility_classification"].value_counts() + + future_possibilities_eco = results_df[ + (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + + future_possibilities_gbis = results_df[ + (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & ( + ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + def app(): data = load_ha_4() @@ -159,3 +285,20 @@ def app(): ) created_at = datetime.now().isoformat() + + results_df, scoring_data, nodata = get_ha_4_data( + data=data, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at + ) + + # Store the data locally as a pickle + # import pickle + # with open("ha_4.pickle", "wb") as f: + # pickle.dump( + # { + # "results_df": results_df, + # "scoring_data": scoring_data, + # "nodata": nodata + # }, f) diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index bfe600d5..09eac215 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -152,4 +152,7 @@ class WallAttributes(Definitions): else: result["insulation_thickness"] = "average" + if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"): + result["is_filled_cavity"] = True + return result diff --git a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py index 300702a7..96c545c1 100644 --- a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py @@ -550,7 +550,7 @@ wall_cases = [ 'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False}, {'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None, - 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'external_insulation': False, 'internal_insulation': False}, @@ -727,7 +727,7 @@ wall_cases = [ 'external_insulation': False, 'internal_insulation': False}, {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)', 'thermal_transmittance': None, - 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'external_insulation': False, 'internal_insulation': False},