From 8dbd69eef9140efdb3feab6933f195c762a2ba8c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 28 Mar 2024 15:54:31 +0000
Subject: [PATCH] Updating router for chunked scoring

---
 backend/Property.py        |  2 +-
 backend/app/plan/router.py | 36 ++++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f86e33dc..d97ce8cf 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -233,7 +233,7 @@ class Property:
                 output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"]
                 # Setting the insulation thickness here to above average should be tested further because we
                 # don't see a high volume of instances for this
-                output["walls_insulation_thickness_ending"] = "above average"
+                output["walls_insulation_thickness_ending"] = "average"
                 output["walls_energy_eff_ending"] = "Good"
 
                 # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e25c04a5..bcbc4332 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -41,6 +41,7 @@ from backend.ml_models.Valuation import PropertyValuation
 logger = setup_logger()
 
 BATCH_SIZE = 5
+SCORING_BATCH_SIZE = 400
 
 
 def patch_epc(config, epc_records):
@@ -164,7 +165,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         recommendations = {}
         recommendations_scoring_data = []
         representative_recommendations = {}
-        for p in input_properties:
+        for p in tqdm(input_properties):
 
             # Property recommendations
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
@@ -196,15 +197,30 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        all_predictions = model_api.predict_all(
-            df=recommendations_scoring_data,
-            bucket=get_settings().DATA_BUCKET,
-            prediction_buckets={
-                "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
-                "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
-                "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
-            }
-        )
+        all_predictions = {
+            "sap_change_predictions": pd.DataFrame(),
+            "heat_demand_predictions": pd.DataFrame(),
+            "carbon_change_predictions": pd.DataFrame()
+        }
+        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = model_api.predict_all(
+                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
+                bucket=get_settings().DATA_BUCKET,
+                prediction_buckets={
+                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
+                }
+            )
+
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+        # TODO: TEMP
+        # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
+        # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy()
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a