From d557653129a3aac4b8b45d1c5d6a14d01ac6ac8b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 24 Jan 2024 14:16:33 +0000 Subject: [PATCH] patched issue with cleaned lookup --- .../ha_15_32/ha_analysis_batch_3.py | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 9143df5f..85486e17 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -609,6 +609,75 @@ def get_epc_data( } ) + scoring_df = pd.DataFrame(scoring_data) + scoring_df = scoring_df.drop( + columns=[ + "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending" + ] + ) + + model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at) + + all_predictions = model_api.predict_all( + df=scoring_df, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + "heat_demand_predictions": "retrofit-heat-predictions-dev", + "carbon_change_predictions": "retrofit-carbon-predictions-dev" + } + ) + + results_df = pd.DataFrame(results) + + predictions = all_predictions["sap_change_predictions"].copy() + + predictions = predictions.rename(columns={"property_id": "row_id"}).merge( + results_df[["row_id", "sap"]], how="left", on="row_id" + ) + predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] + predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() + + results_df = results_df.merge( + predictions[["sap_uplift", "row_id"]], + how="left", + on="row_id" + ) + results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + + eligibility_assessment = [] + for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): + # The upgrade requirements are dependent on the current SAP + + # If the property is an F or G, it only needs to upgrade to an % + if row["sap"] <= 38: + if row["post_install_sap"] >= 57: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 55: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 53: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + else: + + if row["post_install_sap"] >= 71: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 69: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 67: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + + eligibility_assessment.append( + { + "row_id": row["row_id"], + "eligibility_classification": eligibility_classification + } + ) + def analyse_ha_data(): """ @@ -706,6 +775,23 @@ def app(): ] ) + # We treat unknown loft insulation as no insulation + cleaned["roof-description"].extend( + [ + {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation', + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True, + 'is_roof_room': False, + 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'} + ] + ) + + # We patch this record because there is another property below + for x in cleaned["floor-description"]: + if x["original_description"] == '(Same dwelling below) insulated (assumed)': + x["another_property_below"] = True + x["thermal_transmittance"] = 0 + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", )