tidying up engine pipeline with rebaselining

2026-08-03 05:18:22 +00:00 · 2026-03-20 09:53:48 +00:00 · 2026-03-20 09:53:48 +00:00 · 7e253d500c
commit 7e253d500c
parent ed37059581
5 changed files with 22 additions and 19 deletions
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -54,11 +54,11 @@ class Settings(BaseSettings):
    SAP_PREDICTIONS_BUCKET: str = "changeme"
    CARBON_PREDICTIONS_BUCKET: str = "changeme"
    HEAT_PREDICTIONS_BUCKET: str = "changeme"
-    # LIGHTING_COST_PREDICTIONS_BUCKET: str
-    # HEATING_COST_PREDICTIONS_BUCKET: str
-    # HOT_WATER_COST_PREDICTIONS_BUCKET: str
    HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme"
    HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme"
+    SAP_BASELINE_PREDICTIONS_BUCKET: str = "changeme"
+    CARBON_BASELINE_PREDICTIONS_BUCKET: str = "changeme"
+    HEAT_BASELINE_PREDICTIONS_BUCKET: str = "changeme"

    # Other S3 buckts
    ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
@ -89,4 +89,9 @@ def get_prediction_buckets():
        "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
        "heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
        "hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
+
+        # Score model - SAP re-baselining model
+        "retrofit-sap-baseline-predictions": get_settings().SAP_BASELINE_PREDICTIONS_BUCKET,
+        "retrofit-carbon-baseline-predictions": get_settings().CARBON_BASELINE_PREDICTIONS_BUCKET,
+        "retrofit-heat-baseline-predictions": get_settings().HEAT_BASELINE_PREDICTIONS_BUCKET,
    }
--- a/backend/app/db/functions/tasks/Tasks.py
+++ b/backend/app/db/functions/tasks/Tasks.py
@ -64,7 +64,7 @@ class SubTaskInterface:
        self,
        subtask_id: UUID,
        status: str,
-        outputs: Optional[Dict[str, str]] = None,
+        outputs: Optional[Dict[str, str] | str] = None,
        cloud_logs_url: Optional[str] = None,
    ) -> SubTask:
        """
--- a/backend/app/plan/utils.py
+++ b/backend/app/plan/utils.py
@ -64,7 +64,7 @@ def extract_property_request_data(
                x
                for x in patches
                if (x["address"] == address.address)
-                and (x["postcode"] == address.postcode)
+                   and (x["postcode"] == address.postcode)
            ),
            {},
        )
@ -92,7 +92,7 @@ def extract_property_request_data(
                x
                for x in non_invasive_recommendations
                if (x["address"] == address.address)
-                and (x["postcode"] == address.postcode)
+                   and (x["postcode"] == address.postcode)
            ),
            {},
        )
@ -134,7 +134,7 @@ def extract_property_request_data(
                float(x["valuation"])
                for x in valuation_data
                if (x["address"] == address.address)
-                and (x["postcode"] == address.postcode)
+                   and (x["postcode"] == address.postcode)
            ),
            None,
        )
@ -241,7 +241,7 @@ def parse_eco_packages(
    return measures, mapped["target_sap"], mapped["plan_type"], already_installed


-def build_cloudwatch_log_url(start_ms: int) -> str:
+def build_cloudwatch_log_url(start_ms: Optional[int]) -> str:
    """
    Build a CloudWatch Logs URL for the current Lambda invocation,
    including timestamp window from start_ms to end_ms (epoch ms).
@ -271,7 +271,7 @@ def build_cloudwatch_log_url(start_ms: int) -> str:
 def handle_error(
    msg: str,
    exception: Exception,
-    subtask_id: str,
+    subtask_id: Optional[str],
    status_code: int = 500,
    start_ms: Optional[int] = None,
 ):
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -810,13 +810,9 @@ async def model_engine(body: PlanTriggerRequest):
        # TODO: MUST happen before setting features
        rebaselining_scoring_data = []
        for p in tqdm(input_properties):
-            # 1) EPC expired
-            # 2) Missing EPC
-            # 3) Materially different information from landlord vs EPC
-            # make the landlord remapping dictionar
+            # 1) EPC expired 2) Missing EPC 3) Different information from landlord vs EPC
            needs_rebaselining = p.epc_is_expired | p.epc_is_estimated | (len(p.epc_record.landlord_differences) > 0)

-            # Need to adjust p.data and p.epc_record.df?
            if needs_rebaselining:
                p.create_base_difference_epc_record(cleaned_lookup=cleaned)
                scoring_data = p.base_difference_record.df.copy()
@ -826,9 +822,7 @@ async def model_engine(body: PlanTriggerRequest):

        # Trigger re-scoring
        rebaselining_scoring_data["is_post_sap10_starting"] = True
-        # Score model - SAP re-baselining model
-        model_api.MODEL_URLS["retrofit-sap-baseline-predictions"] = "sapbaselinemodel"
-        model_api.prediction_buckets["retrofit-sap-baseline-predictions"] = "retrofit-sap-baseline-predictions-dev"
+        
        rebaselining_response = model_api.predict_all(
            df=rebaselining_scoring_data,
            bucket=get_settings().DATA_BUCKET,
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@ -1,7 +1,7 @@
 import aiohttp
 import asyncio
 import pandas as pd
-from typing import List
+from typing import List, Dict
 from tqdm import tqdm
 import requests
 from requests.exceptions import RequestException
@ -22,12 +22,16 @@ class ModelApi:

    KWH_MODEL_PREFIXES = ["heating_kwh_predictions", "hotwater_kwh_predictions"]

-    MODEL_URLS = {
+    MODEL_URLS: Dict[str, str] = {
        "sap_change_predictions": "sapmodel",
        "heat_demand_predictions": "heatmodel",
        "carbon_change_predictions": "carbonmodel",
        "hotwater_kwh_predictions": "hotwaterkwhmodel",
        "heating_kwh_predictions": "heatingkwhmodel",
+        # Baseline prediction models
+        "retrofit-sap-baseline-predictions": "sapbaselinemodel",
+        "retrofit-heat-baseline-predictions": "heatbaselinemodel",
+        "retrofit-carbon-baseline-predictions": "carbonbaselinemodel",
    }

    def __init__(