integrating rebaselining

This commit is contained in:
Khalim Conn-Kowlessar 2026-03-24 22:56:53 +00:00
parent b22c7ac6e8
commit 28b39407d0

View file

@ -2,18 +2,17 @@ import time
import json
from copy import deepcopy
from datetime import datetime
from typing import Dict
from tqdm import tqdm
import pandas as pd
import numpy as np
from uuid import UUID
from tqdm import tqdm
from sqlalchemy.exc import IntegrityError, OperationalError
from starlette.responses import Response
from backend.SearchEpc import SearchEpc
from etl.epc.Record import EPCRecord
from sqlalchemy.exc import IntegrityError, OperationalError
from starlette.responses import Response
from backend.app.BatterySapScorer import BatterySAPScorer
from backend.app.config import get_settings, get_prediction_buckets
@ -122,14 +121,25 @@ def extract_portfolio_aggregation_data(
cost = sum([r["total"] for r in default_recommendations])
sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]):
# Fix ambiguous Series/DataFrame truth value for current_value
current_value = property_value_increase_ranges[p.id]["current_value"]
if isinstance(current_value, (pd.Series, pd.DataFrame)):
# Reduce to scalar
is_null = bool(
current_value.isnull().all().item() if
hasattr(current_value.isnull().all(), 'item') else current_value.isnull().all().all()
)
else:
is_null = bool(pd.isnull(current_value))
if (not is_null) and (current_value is not None):
lower_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
current_value
)
upper_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
current_value
)
else:
lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0
@ -419,11 +429,14 @@ def extract_address_data(config, body):
Simple helper to grab address data from the config
:return:
"""
uprn = config.get("uprn", None)
if pd.isnull(uprn):
try:
uprn = config.get("uprn", None)
if uprn is not None and pd.notnull(uprn):
uprn = int(float(uprn))
else:
uprn = None
except Exception:
uprn = None
if uprn:
uprn = int(float(uprn))
address1 = config.get("address", None)
# Handle domna address list format
@ -706,8 +719,8 @@ async def model_engine(body: PlanTriggerRequest):
# Otherwise, we use the newest EPC
# energy_assessment_is_newer will tell us if the energy assessment is newer than the newest EPC that
# has been publically lodged
epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records(
epc_searcher, energy_assessment
epc_records, energy_assessment_is_newer = create_epc_records(
epc_searcher, energy_assessment if energy_assessment is not None else {"epc": None}
)
req_data = extract_property_request_data(
@ -806,6 +819,7 @@ async def model_engine(body: PlanTriggerRequest):
# Rebaselining
# TODO: MUST happen before setting features
logger.info("Preparing rebaselining")
rebaselining_scoring_data = []
for p in tqdm(input_properties):
# 1) EPC expired 2) Missing EPC 3) Different information from landlord vs EPC
@ -817,6 +831,8 @@ async def model_engine(body: PlanTriggerRequest):
rebaselining_scoring_data.append(scoring_data)
rebaselining_scoring_data = pd.concat(rebaselining_scoring_data)
if not rebaselining_scoring_data.empty:
logger.info(f"{rebaselining_scoring_data.shape[0]} properties require re-baselineing")
# Trigger re-scoring
rebaselining_scoring_data["is_post_sap10_starting"] = True
@ -829,46 +845,100 @@ async def model_engine(body: PlanTriggerRequest):
extract_uprn=True
)
# TODO - Pull out predictions!!!
# TODO: TEMP: Compare values
# TODO: TEMP: Compare values - and summarise the differences
compare_scores = []
for x in rebaselining_scoring_data["uprn"].unique():
record = [p for p in input_properties if p.uprn == x][0].epc_record
original_sap = record.current_energy_efficiency
new_sap = rebaselining_response["retrofit-sap-baseline-predictions"][
rebaselining_response["retrofit-sap-baseline-predictions"]["uprn"] == x
new_sap = rebaselining_response["retrofit_sap_baseline_predictions"][
rebaselining_response["retrofit_sap_baseline_predictions"]["uprn"] == x
]["predictions"].values[0]
lodgement_date = record.lodgement_date
compare_scores.append({
ll_differences = record.landlord_differences
# 🔑 Normalise original keys to match LL format
original = {
k.replace("-", "_"): v
for k, v in record.original_epc.items()
if k.replace("-", "_") in ll_differences
}
row = {
"uprn": x,
"original_sap": original_sap,
"new_sap": new_sap,
"lodgement_date": lodgement_date
})
compare_scores = pd.DataFrame(compare_scores)
for uprn in rebaselining_scoring_data["uprn"].unique():
# Get the predictions
models = [
"retrofit-sap-baseline-predictions",
"retrofit-carbon-baseline-predictions",
"retrofit-heat-baseline-predictions",
]
property_prediction: Dict[str, float] = {
model: rebaselining_response[model][
rebaselining_response[model]["uprn"] == uprn
]["predictions"].values[0] for model in models
"differences": ll_differences,
"lodgement_date": lodgement_date,
}
# We now need to insert the new values into the epc_record
property_instance = next(p for p in input_properties if p.uprn == int(uprn))
property_instance.epc_record.insert_new_performance_values(
new_sap=property_prediction["retrofit-sap-baseline-predictions"],
new_epc=sap_to_epc(property_prediction["retrofit-sap-baseline-predictions"]),
new_carbon=property_prediction["retrofit-carbon-baseline-predictions"],
new_heat_demand=property_prediction["retrofit-heat-baseline-predictions"],
)
# 🔑 Add paired columns in order
for key in ll_differences.keys():
row[f"{key}_ori"] = original.get(key)
row[f"{key}_ll"] = ll_differences.get(key)
compare_scores.append(row)
compare_scores = pd.DataFrame(compare_scores)
df = compare_scores.copy()
ori_cols = [c for c in df.columns if c.endswith("_ori")]
for ori_col in ori_cols:
ll_col = ori_col.replace("_ori", "_ll")
if ll_col in df.columns:
# Handle NaNs properly
same = (
df[ori_col].fillna("NULL")
== df[ll_col].fillna("NULL")
)
df.loc[same, [ori_col, ll_col]] = None
# --- Refactored: Efficiently update EPC records with new model predictions ---
# Pre-index input_properties by UPRN for fast lookup
input_properties_by_uprn = {int(p.uprn): p for p in input_properties if p.uprn is not None}
# Pre-index predictions for each model by UPRN
model_names = [
"retrofit_sap_baseline_predictions",
"retrofit_carbon_baseline_predictions",
"retrofit_heat_baseline_predictions",
]
predictions_by_model_and_uprn = {}
for model in model_names:
df = rebaselining_response[model]
predictions_by_model_and_uprn[model] = dict(zip(df["uprn"].astype(int), df["predictions"]))
for uprn in rebaselining_scoring_data["uprn"].unique():
try:
uprn_int = int(uprn)
property_instance = input_properties_by_uprn.get(uprn_int)
if property_instance is None:
logger.warning(f"No property found for UPRN {uprn_int} during rebaselining update.")
continue
# Gather predictions for this UPRN
try:
new_sap = predictions_by_model_and_uprn["retrofit_sap_baseline_predictions"][uprn_int]
new_carbon = predictions_by_model_and_uprn["retrofit_carbon_baseline_predictions"][uprn_int]
new_heat_demand = predictions_by_model_and_uprn["retrofit_heat_baseline_predictions"][uprn_int]
except KeyError as e:
logger.warning(f"Missing prediction for UPRN {uprn_int}: {e}")
continue
# Update EPC record
property_instance.epc_record.insert_new_performance_values(
new_sap=new_sap,
new_epc=sap_to_epc(new_sap),
new_carbon=new_carbon,
new_heat_demand=new_heat_demand,
)
logger.info(f"Updated EPC record for UPRN {uprn_int} with new model predictions.")
except Exception as e:
logger.error(f"Error updating EPC record for UPRN {uprn}: {e}")
# --- End refactor ---
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
@ -1135,8 +1205,9 @@ async def model_engine(body: PlanTriggerRequest):
# We optimise and then we determine eligibility for funding, based on the measures selected
optimiser = (
GainOptimiser(
input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False
) if body.budget else CostOptimiser(input_measures, min_gain=gain)
input_measures, max_cost=body.budget, max_gain=float(gain) if gain is not None else 0,
allow_slack=False
) if body.budget else CostOptimiser(input_measures, min_gain=float(gain) if gain is not None else 0)
)
optimiser.setup()
optimiser.solve()
@ -1149,7 +1220,7 @@ async def model_engine(body: PlanTriggerRequest):
)
battery_sap_score = BatterySAPScorer.score(starting_sap=post_sap, pv_size=pv_size)
# We add the defauly already installed measures to the solution
# We add the defaulty already installed measures to the solution
selected = {r["id"] for r in solution + default_already_installed}
if property_required_measures: