diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 063e38d9..4454a709 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -2,18 +2,17 @@ import time import json from copy import deepcopy from datetime import datetime -from typing import Dict - -from tqdm import tqdm import pandas as pd import numpy as np from uuid import UUID +from tqdm import tqdm +from sqlalchemy.exc import IntegrityError, OperationalError +from starlette.responses import Response + from backend.SearchEpc import SearchEpc from etl.epc.Record import EPCRecord -from sqlalchemy.exc import IntegrityError, OperationalError -from starlette.responses import Response from backend.app.BatterySapScorer import BatterySAPScorer from backend.app.config import get_settings, get_prediction_buckets @@ -122,14 +121,25 @@ def extract_portfolio_aggregation_data( cost = sum([r["total"] for r in default_recommendations]) sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) - if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]): + # Fix ambiguous Series/DataFrame truth value for current_value + current_value = property_value_increase_ranges[p.id]["current_value"] + if isinstance(current_value, (pd.Series, pd.DataFrame)): + # Reduce to scalar + is_null = bool( + current_value.isnull().all().item() if + hasattr(current_value.isnull().all(), 'item') else current_value.isnull().all().all() + ) + else: + is_null = bool(pd.isnull(current_value)) + + if (not is_null) and (current_value is not None): lower_bound_valuation_uplift = ( property_value_increase_ranges[p.id]["lower_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] + current_value ) upper_bound_valuation_uplift = ( property_value_increase_ranges[p.id]["upper_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] + current_value ) else: lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0 @@ -419,11 +429,14 @@ def extract_address_data(config, body): Simple helper to grab address data from the config :return: """ - uprn = config.get("uprn", None) - if pd.isnull(uprn): + try: + uprn = config.get("uprn", None) + if uprn is not None and pd.notnull(uprn): + uprn = int(float(uprn)) + else: + uprn = None + except Exception: uprn = None - if uprn: - uprn = int(float(uprn)) address1 = config.get("address", None) # Handle domna address list format @@ -706,8 +719,8 @@ async def model_engine(body: PlanTriggerRequest): # Otherwise, we use the newest EPC # energy_assessment_is_newer will tell us if the energy assessment is newer than the newest EPC that # has been publically lodged - epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records( - epc_searcher, energy_assessment + epc_records, energy_assessment_is_newer = create_epc_records( + epc_searcher, energy_assessment if energy_assessment is not None else {"epc": None} ) req_data = extract_property_request_data( @@ -806,6 +819,7 @@ async def model_engine(body: PlanTriggerRequest): # Rebaselining # TODO: MUST happen before setting features + logger.info("Preparing rebaselining") rebaselining_scoring_data = [] for p in tqdm(input_properties): # 1) EPC expired 2) Missing EPC 3) Different information from landlord vs EPC @@ -817,6 +831,8 @@ async def model_engine(body: PlanTriggerRequest): rebaselining_scoring_data.append(scoring_data) rebaselining_scoring_data = pd.concat(rebaselining_scoring_data) + if not rebaselining_scoring_data.empty: + logger.info(f"{rebaselining_scoring_data.shape[0]} properties require re-baselineing") # Trigger re-scoring rebaselining_scoring_data["is_post_sap10_starting"] = True @@ -829,46 +845,100 @@ async def model_engine(body: PlanTriggerRequest): extract_uprn=True ) - # TODO - Pull out predictions!!! - - # TODO: TEMP: Compare values + # TODO: TEMP: Compare values - and summarise the differences compare_scores = [] + for x in rebaselining_scoring_data["uprn"].unique(): record = [p for p in input_properties if p.uprn == x][0].epc_record + original_sap = record.current_energy_efficiency - new_sap = rebaselining_response["retrofit-sap-baseline-predictions"][ - rebaselining_response["retrofit-sap-baseline-predictions"]["uprn"] == x + new_sap = rebaselining_response["retrofit_sap_baseline_predictions"][ + rebaselining_response["retrofit_sap_baseline_predictions"]["uprn"] == x ]["predictions"].values[0] + lodgement_date = record.lodgement_date - compare_scores.append({ + ll_differences = record.landlord_differences + + # 🔑 Normalise original keys to match LL format + original = { + k.replace("-", "_"): v + for k, v in record.original_epc.items() + if k.replace("-", "_") in ll_differences + } + + row = { "uprn": x, "original_sap": original_sap, "new_sap": new_sap, - "lodgement_date": lodgement_date - }) - compare_scores = pd.DataFrame(compare_scores) - - for uprn in rebaselining_scoring_data["uprn"].unique(): - # Get the predictions - models = [ - "retrofit-sap-baseline-predictions", - "retrofit-carbon-baseline-predictions", - "retrofit-heat-baseline-predictions", - ] - property_prediction: Dict[str, float] = { - model: rebaselining_response[model][ - rebaselining_response[model]["uprn"] == uprn - ]["predictions"].values[0] for model in models + "differences": ll_differences, + "lodgement_date": lodgement_date, } - # We now need to insert the new values into the epc_record - property_instance = next(p for p in input_properties if p.uprn == int(uprn)) - property_instance.epc_record.insert_new_performance_values( - new_sap=property_prediction["retrofit-sap-baseline-predictions"], - new_epc=sap_to_epc(property_prediction["retrofit-sap-baseline-predictions"]), - new_carbon=property_prediction["retrofit-carbon-baseline-predictions"], - new_heat_demand=property_prediction["retrofit-heat-baseline-predictions"], - ) + # 🔑 Add paired columns in order + for key in ll_differences.keys(): + row[f"{key}_ori"] = original.get(key) + row[f"{key}_ll"] = ll_differences.get(key) + + compare_scores.append(row) + + compare_scores = pd.DataFrame(compare_scores) + df = compare_scores.copy() + + ori_cols = [c for c in df.columns if c.endswith("_ori")] + + for ori_col in ori_cols: + ll_col = ori_col.replace("_ori", "_ll") + + if ll_col in df.columns: + # Handle NaNs properly + same = ( + df[ori_col].fillna("NULL") + == df[ll_col].fillna("NULL") + ) + + df.loc[same, [ori_col, ll_col]] = None + + # --- Refactored: Efficiently update EPC records with new model predictions --- + # Pre-index input_properties by UPRN for fast lookup + input_properties_by_uprn = {int(p.uprn): p for p in input_properties if p.uprn is not None} + + # Pre-index predictions for each model by UPRN + model_names = [ + "retrofit_sap_baseline_predictions", + "retrofit_carbon_baseline_predictions", + "retrofit_heat_baseline_predictions", + ] + predictions_by_model_and_uprn = {} + for model in model_names: + df = rebaselining_response[model] + predictions_by_model_and_uprn[model] = dict(zip(df["uprn"].astype(int), df["predictions"])) + + for uprn in rebaselining_scoring_data["uprn"].unique(): + try: + uprn_int = int(uprn) + property_instance = input_properties_by_uprn.get(uprn_int) + if property_instance is None: + logger.warning(f"No property found for UPRN {uprn_int} during rebaselining update.") + continue + # Gather predictions for this UPRN + try: + new_sap = predictions_by_model_and_uprn["retrofit_sap_baseline_predictions"][uprn_int] + new_carbon = predictions_by_model_and_uprn["retrofit_carbon_baseline_predictions"][uprn_int] + new_heat_demand = predictions_by_model_and_uprn["retrofit_heat_baseline_predictions"][uprn_int] + except KeyError as e: + logger.warning(f"Missing prediction for UPRN {uprn_int}: {e}") + continue + # Update EPC record + property_instance.epc_record.insert_new_performance_values( + new_sap=new_sap, + new_epc=sap_to_epc(new_sap), + new_carbon=new_carbon, + new_heat_demand=new_heat_demand, + ) + logger.info(f"Updated EPC record for UPRN {uprn_int} with new model predictions.") + except Exception as e: + logger.error(f"Error updating EPC record for UPRN {uprn}: {e}") + # --- End refactor --- kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True) @@ -1135,8 +1205,9 @@ async def model_engine(body: PlanTriggerRequest): # We optimise and then we determine eligibility for funding, based on the measures selected optimiser = ( GainOptimiser( - input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False - ) if body.budget else CostOptimiser(input_measures, min_gain=gain) + input_measures, max_cost=body.budget, max_gain=float(gain) if gain is not None else 0, + allow_slack=False + ) if body.budget else CostOptimiser(input_measures, min_gain=float(gain) if gain is not None else 0) ) optimiser.setup() optimiser.solve() @@ -1149,7 +1220,7 @@ async def model_engine(body: PlanTriggerRequest): ) battery_sap_score = BatterySAPScorer.score(starting_sap=post_sap, pv_size=pv_size) - # We add the defauly already installed measures to the solution + # We add the defaulty already installed measures to the solution selected = {r["id"] for r in solution + default_already_installed} if property_required_measures: