tidying up engine pipeline with rebaselining

This commit is contained in:
Khalim Conn-Kowlessar 2026-03-20 09:53:48 +00:00
parent ed37059581
commit 7e253d500c
5 changed files with 22 additions and 19 deletions

View file

@ -54,11 +54,11 @@ class Settings(BaseSettings):
SAP_PREDICTIONS_BUCKET: str = "changeme"
CARBON_PREDICTIONS_BUCKET: str = "changeme"
HEAT_PREDICTIONS_BUCKET: str = "changeme"
# LIGHTING_COST_PREDICTIONS_BUCKET: str
# HEATING_COST_PREDICTIONS_BUCKET: str
# HOT_WATER_COST_PREDICTIONS_BUCKET: str
HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme"
HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme"
SAP_BASELINE_PREDICTIONS_BUCKET: str = "changeme"
CARBON_BASELINE_PREDICTIONS_BUCKET: str = "changeme"
HEAT_BASELINE_PREDICTIONS_BUCKET: str = "changeme"
# Other S3 buckts
ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
@ -89,4 +89,9 @@ def get_prediction_buckets():
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
"heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
"hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
# Score model - SAP re-baselining model
"retrofit-sap-baseline-predictions": get_settings().SAP_BASELINE_PREDICTIONS_BUCKET,
"retrofit-carbon-baseline-predictions": get_settings().CARBON_BASELINE_PREDICTIONS_BUCKET,
"retrofit-heat-baseline-predictions": get_settings().HEAT_BASELINE_PREDICTIONS_BUCKET,
}

View file

@ -64,7 +64,7 @@ class SubTaskInterface:
self,
subtask_id: UUID,
status: str,
outputs: Optional[Dict[str, str]] = None,
outputs: Optional[Dict[str, str] | str] = None,
cloud_logs_url: Optional[str] = None,
) -> SubTask:
"""

View file

@ -64,7 +64,7 @@ def extract_property_request_data(
x
for x in patches
if (x["address"] == address.address)
and (x["postcode"] == address.postcode)
and (x["postcode"] == address.postcode)
),
{},
)
@ -92,7 +92,7 @@ def extract_property_request_data(
x
for x in non_invasive_recommendations
if (x["address"] == address.address)
and (x["postcode"] == address.postcode)
and (x["postcode"] == address.postcode)
),
{},
)
@ -134,7 +134,7 @@ def extract_property_request_data(
float(x["valuation"])
for x in valuation_data
if (x["address"] == address.address)
and (x["postcode"] == address.postcode)
and (x["postcode"] == address.postcode)
),
None,
)
@ -241,7 +241,7 @@ def parse_eco_packages(
return measures, mapped["target_sap"], mapped["plan_type"], already_installed
def build_cloudwatch_log_url(start_ms: int) -> str:
def build_cloudwatch_log_url(start_ms: Optional[int]) -> str:
"""
Build a CloudWatch Logs URL for the current Lambda invocation,
including timestamp window from start_ms to end_ms (epoch ms).
@ -271,7 +271,7 @@ def build_cloudwatch_log_url(start_ms: int) -> str:
def handle_error(
msg: str,
exception: Exception,
subtask_id: str,
subtask_id: Optional[str],
status_code: int = 500,
start_ms: Optional[int] = None,
):

View file

@ -810,13 +810,9 @@ async def model_engine(body: PlanTriggerRequest):
# TODO: MUST happen before setting features
rebaselining_scoring_data = []
for p in tqdm(input_properties):
# 1) EPC expired
# 2) Missing EPC
# 3) Materially different information from landlord vs EPC
# make the landlord remapping dictionar
# 1) EPC expired 2) Missing EPC 3) Different information from landlord vs EPC
needs_rebaselining = p.epc_is_expired | p.epc_is_estimated | (len(p.epc_record.landlord_differences) > 0)
# Need to adjust p.data and p.epc_record.df?
if needs_rebaselining:
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
scoring_data = p.base_difference_record.df.copy()
@ -826,9 +822,7 @@ async def model_engine(body: PlanTriggerRequest):
# Trigger re-scoring
rebaselining_scoring_data["is_post_sap10_starting"] = True
# Score model - SAP re-baselining model
model_api.MODEL_URLS["retrofit-sap-baseline-predictions"] = "sapbaselinemodel"
model_api.prediction_buckets["retrofit-sap-baseline-predictions"] = "retrofit-sap-baseline-predictions-dev"
rebaselining_response = model_api.predict_all(
df=rebaselining_scoring_data,
bucket=get_settings().DATA_BUCKET,

View file

@ -1,7 +1,7 @@
import aiohttp
import asyncio
import pandas as pd
from typing import List
from typing import List, Dict
from tqdm import tqdm
import requests
from requests.exceptions import RequestException
@ -22,12 +22,16 @@ class ModelApi:
KWH_MODEL_PREFIXES = ["heating_kwh_predictions", "hotwater_kwh_predictions"]
MODEL_URLS = {
MODEL_URLS: Dict[str, str] = {
"sap_change_predictions": "sapmodel",
"heat_demand_predictions": "heatmodel",
"carbon_change_predictions": "carbonmodel",
"hotwater_kwh_predictions": "hotwaterkwhmodel",
"heating_kwh_predictions": "heatingkwhmodel",
# Baseline prediction models
"retrofit-sap-baseline-predictions": "sapbaselinemodel",
"retrofit-heat-baseline-predictions": "heatbaselinemodel",
"retrofit-carbon-baseline-predictions": "carbonbaselinemodel",
}
def __init__(