From 8dbd69eef9140efdb3feab6933f195c762a2ba8c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 28 Mar 2024 15:54:31 +0000 Subject: [PATCH] Updating router for chunked scoring --- backend/Property.py | 2 +- backend/app/plan/router.py | 36 ++++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f86e33dc..d97ce8cf 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -233,7 +233,7 @@ class Property: output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"] # Setting the insulation thickness here to above average should be tested further because we # don't see a high volume of instances for this - output["walls_insulation_thickness_ending"] = "above average" + output["walls_insulation_thickness_ending"] = "average" output["walls_energy_eff_ending"] = "Good" # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e25c04a5..bcbc4332 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -41,6 +41,7 @@ from backend.ml_models.Valuation import PropertyValuation logger = setup_logger() BATCH_SIZE = 5 +SCORING_BATCH_SIZE = 400 def patch_epc(config, epc_records): @@ -164,7 +165,7 @@ async def trigger_plan(body: PlanTriggerRequest): recommendations = {} recommendations_scoring_data = [] representative_recommendations = {} - for p in input_properties: + for p in tqdm(input_properties): # Property recommendations p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) @@ -196,15 +197,30 @@ async def trigger_plan(body: PlanTriggerRequest): model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - all_predictions = model_api.predict_all( - df=recommendations_scoring_data, - bucket=get_settings().DATA_BUCKET, - prediction_buckets={ - "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, - "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, - "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET - } - ) + all_predictions = { + "sap_change_predictions": pd.DataFrame(), + "heat_demand_predictions": pd.DataFrame(), + "carbon_change_predictions": pd.DataFrame() + } + to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE) + for chunk in tqdm(to_loop_over, total=len(to_loop_over)): + predictions_dict = model_api.predict_all( + df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE], + bucket=get_settings().DATA_BUCKET, + prediction_buckets={ + "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, + "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, + "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET + } + ) + + # Append the predictions to the predictions dictionary + for key, scored in predictions_dict.items(): + all_predictions[key] = pd.concat([all_predictions[key], scored]) + + # TODO: TEMP + # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy() + # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy() # Insert the predictions into the recommendations and run the optimiser # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a