Updating router for chunked scoring

2026-07-27 23:35:01 +00:00 · 2024-03-28 15:54:31 +00:00 · 2024-03-28 15:54:31 +00:00 · 8dbd69eef9
commit 8dbd69eef9
parent 22a3e21f52
2 changed files with 27 additions and 11 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -233,7 +233,7 @@ class Property:
                output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"]
                # Setting the insulation thickness here to above average should be tested further because we
                # don't see a high volume of instances for this
-                output["walls_insulation_thickness_ending"] = "above average"
+                output["walls_insulation_thickness_ending"] = "average"
                output["walls_energy_eff_ending"] = "Good"

                # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -41,6 +41,7 @@ from backend.ml_models.Valuation import PropertyValuation
 logger = setup_logger()

 BATCH_SIZE = 5
+SCORING_BATCH_SIZE = 400


 def patch_epc(config, epc_records):
@ -164,7 +165,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        recommendations = {}
        recommendations_scoring_data = []
        representative_recommendations = {}
-        for p in input_properties:
+        for p in tqdm(input_properties):

            # Property recommendations
            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
@ -196,15 +197,30 @@ async def trigger_plan(body: PlanTriggerRequest):

        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)

-        all_predictions = model_api.predict_all(
-            df=recommendations_scoring_data,
-            bucket=get_settings().DATA_BUCKET,
-            prediction_buckets={
-                "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
-                "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
-                "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
-            }
-        )
+        all_predictions = {
+            "sap_change_predictions": pd.DataFrame(),
+            "heat_demand_predictions": pd.DataFrame(),
+            "carbon_change_predictions": pd.DataFrame()
+        }
+        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = model_api.predict_all(
+                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
+                bucket=get_settings().DATA_BUCKET,
+                prediction_buckets={
+                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
+                }
+            )
+
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+        # TODO: TEMP
+        # all_predictions["heat_demand_predictions"] = all_predictions["sap_change_predictions"].copy()
+        # all_predictions["carbon_change_predictions"] = all_predictions["sap_change_predictions"].copy()

        # Insert the predictions into the recommendations and run the optimiser
        # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a