diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 28efadd0..3dc4d45f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1166,10 +1166,9 @@ def get_epc_data( ] # condition 1 - identified for gbis and not eligible - condition_1 = ( - identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[ - "eligible"] - ) & consider_penultimate_epc + condition_1 = (identified_for_gbis and not eligibility.gbis_warmfront + and not eligibility.eco4_warmfront["eligible"] + ) & consider_penultimate_epc # condition 2 - identified for eco4 and not eligible condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[ @@ -1246,10 +1245,12 @@ def get_epc_data( "uprn": eligibility.epc["uprn"], "is_estimated": searcher.newest_epc.get("estimated") is not None, "property_type": eligibility.epc["property-type"], - "gbis_eligible": eligibility.gbis_warmfront, "eco4_eligible": eligibility.eco4_warmfront["eligible"], "eco4_message": eligibility.eco4_warmfront["message"], "eco4_strict": eligibility.eco4_warmfront["strict"], + "gbis_eligible": eligibility.gbis_warmfront["eligible"], + "gbis_message": eligibility.gbis_warmfront["message"], + "gbis_strict": eligibility.gbis_warmfront["strict"], "sap": float(eligibility.epc["current-energy-efficiency"]), # Property components "roof": eligibility.roof["clean_description"], @@ -1279,24 +1280,32 @@ def get_epc_data( ) model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] - all_predictions = model_api.predict_all( - df=scoring_df, - bucket="retrofit-data-dev", - prediction_buckets={ - "sap_change_predictions": "retrofit-sap-predictions-dev", - "heat_demand_predictions": "retrofit-heat-predictions-dev", - "carbon_change_predictions": "retrofit-carbon-predictions-dev" - } - ) + scoring_df["id"] = scoring_df["id"] + "phase=0" + # We split up the scoring_df and score + predictions = [] + to_loop_over = range(0, scoring_df.shape[0], 400) + for chunk in tqdm(to_loop_over, total=len(to_loop_over)): + predictions_dict = model_api.predict_all( + df=scoring_df.iloc[chunk:chunk + 400], + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) - predictions = all_predictions["sap_change_predictions"].copy() + predictions.append(predictions_dict["sap_change_predictions"]) + + predictions = pd.concat(predictions) + predictions_size = predictions.shape[0] predictions = predictions.rename(columns={"property_id": "row_id"}).merge( results_df[["row_id", "sap"]], how="left", on="row_id" ) + if predictions.shape[0] != predictions_size: + raise ValueError("Predictions size has changed") predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] - predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() results_df = results_df.merge( predictions[["sap_uplift", "row_id"]], @@ -1339,9 +1348,12 @@ def get_epc_data( eligibility_assessment = pd.DataFrame(eligibility_assessment) + # Make sure the results haven't changed in size results_df = results_df.merge( eligibility_assessment, how="left", on="row_id" ) + if results_df.shape[0] != len(results): + raise ValueError("results has changed size") # We store the results in S3 as a pickle save_pickle_to_s3( @@ -1809,6 +1821,8 @@ def app(): loader.load() loader.ha_facts_and_figures() + loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False) + # We load in the additional data required to perform the analysis cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson",