mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
integrating rebaselining
This commit is contained in:
parent
b22c7ac6e8
commit
28b39407d0
1 changed files with 117 additions and 46 deletions
|
|
@ -2,18 +2,17 @@ import time
|
|||
import json
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from typing import Dict
|
||||
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from uuid import UUID
|
||||
|
||||
from tqdm import tqdm
|
||||
from sqlalchemy.exc import IntegrityError, OperationalError
|
||||
from starlette.responses import Response
|
||||
|
||||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
from etl.epc.Record import EPCRecord
|
||||
from sqlalchemy.exc import IntegrityError, OperationalError
|
||||
from starlette.responses import Response
|
||||
from backend.app.BatterySapScorer import BatterySAPScorer
|
||||
|
||||
from backend.app.config import get_settings, get_prediction_buckets
|
||||
|
|
@ -122,14 +121,25 @@ def extract_portfolio_aggregation_data(
|
|||
cost = sum([r["total"] for r in default_recommendations])
|
||||
sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
|
||||
|
||||
if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]):
|
||||
# Fix ambiguous Series/DataFrame truth value for current_value
|
||||
current_value = property_value_increase_ranges[p.id]["current_value"]
|
||||
if isinstance(current_value, (pd.Series, pd.DataFrame)):
|
||||
# Reduce to scalar
|
||||
is_null = bool(
|
||||
current_value.isnull().all().item() if
|
||||
hasattr(current_value.isnull().all(), 'item') else current_value.isnull().all().all()
|
||||
)
|
||||
else:
|
||||
is_null = bool(pd.isnull(current_value))
|
||||
|
||||
if (not is_null) and (current_value is not None):
|
||||
lower_bound_valuation_uplift = (
|
||||
property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
|
||||
property_value_increase_ranges[p.id]["current_value"]
|
||||
current_value
|
||||
)
|
||||
upper_bound_valuation_uplift = (
|
||||
property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
|
||||
property_value_increase_ranges[p.id]["current_value"]
|
||||
current_value
|
||||
)
|
||||
else:
|
||||
lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0
|
||||
|
|
@ -419,11 +429,14 @@ def extract_address_data(config, body):
|
|||
Simple helper to grab address data from the config
|
||||
:return:
|
||||
"""
|
||||
uprn = config.get("uprn", None)
|
||||
if pd.isnull(uprn):
|
||||
try:
|
||||
uprn = config.get("uprn", None)
|
||||
if uprn is not None and pd.notnull(uprn):
|
||||
uprn = int(float(uprn))
|
||||
else:
|
||||
uprn = None
|
||||
except Exception:
|
||||
uprn = None
|
||||
if uprn:
|
||||
uprn = int(float(uprn))
|
||||
|
||||
address1 = config.get("address", None)
|
||||
# Handle domna address list format
|
||||
|
|
@ -706,8 +719,8 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
# Otherwise, we use the newest EPC
|
||||
# energy_assessment_is_newer will tell us if the energy assessment is newer than the newest EPC that
|
||||
# has been publically lodged
|
||||
epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records(
|
||||
epc_searcher, energy_assessment
|
||||
epc_records, energy_assessment_is_newer = create_epc_records(
|
||||
epc_searcher, energy_assessment if energy_assessment is not None else {"epc": None}
|
||||
)
|
||||
|
||||
req_data = extract_property_request_data(
|
||||
|
|
@ -806,6 +819,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
|
||||
# Rebaselining
|
||||
# TODO: MUST happen before setting features
|
||||
logger.info("Preparing rebaselining")
|
||||
rebaselining_scoring_data = []
|
||||
for p in tqdm(input_properties):
|
||||
# 1) EPC expired 2) Missing EPC 3) Different information from landlord vs EPC
|
||||
|
|
@ -817,6 +831,8 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
rebaselining_scoring_data.append(scoring_data)
|
||||
|
||||
rebaselining_scoring_data = pd.concat(rebaselining_scoring_data)
|
||||
if not rebaselining_scoring_data.empty:
|
||||
logger.info(f"{rebaselining_scoring_data.shape[0]} properties require re-baselineing")
|
||||
|
||||
# Trigger re-scoring
|
||||
rebaselining_scoring_data["is_post_sap10_starting"] = True
|
||||
|
|
@ -829,46 +845,100 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
extract_uprn=True
|
||||
)
|
||||
|
||||
# TODO - Pull out predictions!!!
|
||||
|
||||
# TODO: TEMP: Compare values
|
||||
# TODO: TEMP: Compare values - and summarise the differences
|
||||
compare_scores = []
|
||||
|
||||
for x in rebaselining_scoring_data["uprn"].unique():
|
||||
record = [p for p in input_properties if p.uprn == x][0].epc_record
|
||||
|
||||
original_sap = record.current_energy_efficiency
|
||||
new_sap = rebaselining_response["retrofit-sap-baseline-predictions"][
|
||||
rebaselining_response["retrofit-sap-baseline-predictions"]["uprn"] == x
|
||||
new_sap = rebaselining_response["retrofit_sap_baseline_predictions"][
|
||||
rebaselining_response["retrofit_sap_baseline_predictions"]["uprn"] == x
|
||||
]["predictions"].values[0]
|
||||
|
||||
lodgement_date = record.lodgement_date
|
||||
compare_scores.append({
|
||||
ll_differences = record.landlord_differences
|
||||
|
||||
# 🔑 Normalise original keys to match LL format
|
||||
original = {
|
||||
k.replace("-", "_"): v
|
||||
for k, v in record.original_epc.items()
|
||||
if k.replace("-", "_") in ll_differences
|
||||
}
|
||||
|
||||
row = {
|
||||
"uprn": x,
|
||||
"original_sap": original_sap,
|
||||
"new_sap": new_sap,
|
||||
"lodgement_date": lodgement_date
|
||||
})
|
||||
compare_scores = pd.DataFrame(compare_scores)
|
||||
|
||||
for uprn in rebaselining_scoring_data["uprn"].unique():
|
||||
# Get the predictions
|
||||
models = [
|
||||
"retrofit-sap-baseline-predictions",
|
||||
"retrofit-carbon-baseline-predictions",
|
||||
"retrofit-heat-baseline-predictions",
|
||||
]
|
||||
property_prediction: Dict[str, float] = {
|
||||
model: rebaselining_response[model][
|
||||
rebaselining_response[model]["uprn"] == uprn
|
||||
]["predictions"].values[0] for model in models
|
||||
"differences": ll_differences,
|
||||
"lodgement_date": lodgement_date,
|
||||
}
|
||||
|
||||
# We now need to insert the new values into the epc_record
|
||||
property_instance = next(p for p in input_properties if p.uprn == int(uprn))
|
||||
property_instance.epc_record.insert_new_performance_values(
|
||||
new_sap=property_prediction["retrofit-sap-baseline-predictions"],
|
||||
new_epc=sap_to_epc(property_prediction["retrofit-sap-baseline-predictions"]),
|
||||
new_carbon=property_prediction["retrofit-carbon-baseline-predictions"],
|
||||
new_heat_demand=property_prediction["retrofit-heat-baseline-predictions"],
|
||||
)
|
||||
# 🔑 Add paired columns in order
|
||||
for key in ll_differences.keys():
|
||||
row[f"{key}_ori"] = original.get(key)
|
||||
row[f"{key}_ll"] = ll_differences.get(key)
|
||||
|
||||
compare_scores.append(row)
|
||||
|
||||
compare_scores = pd.DataFrame(compare_scores)
|
||||
df = compare_scores.copy()
|
||||
|
||||
ori_cols = [c for c in df.columns if c.endswith("_ori")]
|
||||
|
||||
for ori_col in ori_cols:
|
||||
ll_col = ori_col.replace("_ori", "_ll")
|
||||
|
||||
if ll_col in df.columns:
|
||||
# Handle NaNs properly
|
||||
same = (
|
||||
df[ori_col].fillna("NULL")
|
||||
== df[ll_col].fillna("NULL")
|
||||
)
|
||||
|
||||
df.loc[same, [ori_col, ll_col]] = None
|
||||
|
||||
# --- Refactored: Efficiently update EPC records with new model predictions ---
|
||||
# Pre-index input_properties by UPRN for fast lookup
|
||||
input_properties_by_uprn = {int(p.uprn): p for p in input_properties if p.uprn is not None}
|
||||
|
||||
# Pre-index predictions for each model by UPRN
|
||||
model_names = [
|
||||
"retrofit_sap_baseline_predictions",
|
||||
"retrofit_carbon_baseline_predictions",
|
||||
"retrofit_heat_baseline_predictions",
|
||||
]
|
||||
predictions_by_model_and_uprn = {}
|
||||
for model in model_names:
|
||||
df = rebaselining_response[model]
|
||||
predictions_by_model_and_uprn[model] = dict(zip(df["uprn"].astype(int), df["predictions"]))
|
||||
|
||||
for uprn in rebaselining_scoring_data["uprn"].unique():
|
||||
try:
|
||||
uprn_int = int(uprn)
|
||||
property_instance = input_properties_by_uprn.get(uprn_int)
|
||||
if property_instance is None:
|
||||
logger.warning(f"No property found for UPRN {uprn_int} during rebaselining update.")
|
||||
continue
|
||||
# Gather predictions for this UPRN
|
||||
try:
|
||||
new_sap = predictions_by_model_and_uprn["retrofit_sap_baseline_predictions"][uprn_int]
|
||||
new_carbon = predictions_by_model_and_uprn["retrofit_carbon_baseline_predictions"][uprn_int]
|
||||
new_heat_demand = predictions_by_model_and_uprn["retrofit_heat_baseline_predictions"][uprn_int]
|
||||
except KeyError as e:
|
||||
logger.warning(f"Missing prediction for UPRN {uprn_int}: {e}")
|
||||
continue
|
||||
# Update EPC record
|
||||
property_instance.epc_record.insert_new_performance_values(
|
||||
new_sap=new_sap,
|
||||
new_epc=sap_to_epc(new_sap),
|
||||
new_carbon=new_carbon,
|
||||
new_heat_demand=new_heat_demand,
|
||||
)
|
||||
logger.info(f"Updated EPC record for UPRN {uprn_int} with new model predictions.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating EPC record for UPRN {uprn}: {e}")
|
||||
# --- End refactor ---
|
||||
|
||||
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
|
||||
|
||||
|
|
@ -1135,8 +1205,9 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
# We optimise and then we determine eligibility for funding, based on the measures selected
|
||||
optimiser = (
|
||||
GainOptimiser(
|
||||
input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False
|
||||
) if body.budget else CostOptimiser(input_measures, min_gain=gain)
|
||||
input_measures, max_cost=body.budget, max_gain=float(gain) if gain is not None else 0,
|
||||
allow_slack=False
|
||||
) if body.budget else CostOptimiser(input_measures, min_gain=float(gain) if gain is not None else 0)
|
||||
)
|
||||
optimiser.setup()
|
||||
optimiser.solve()
|
||||
|
|
@ -1149,7 +1220,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
)
|
||||
battery_sap_score = BatterySAPScorer.score(starting_sap=post_sap, pv_size=pv_size)
|
||||
|
||||
# We add the defauly already installed measures to the solution
|
||||
# We add the defaulty already installed measures to the solution
|
||||
selected = {r["id"] for r in solution + default_already_installed}
|
||||
|
||||
if property_required_measures:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue