default landlord differences to emtpy dict, adding predcition matrix for inspection predictions

This commit is contained in:
Khalim Conn-Kowlessar 2026-03-26 18:58:40 +00:00
parent a3081214ca
commit 5c94ecf3fb
5 changed files with 166 additions and 20 deletions

View file

@ -73,25 +73,59 @@ def app():
Property UPRN
"""
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/E.ON/202603 modelling project"
# # data_filename = "For Modelling - Final - reviewed.xlsx"
# data_filename = "eon - 20260323 address sanitisation.xlsx"
# sheet_name = "in"
# postcode_column = "postcode"
# address1_column = "Address 1"
# address1_method = None
# fulladdress_column = "Address 1"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = "address2uprn_uprn"
# landlord_property_type = "PropertyType"
# landlord_built_form = "BuiltForm"
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "UPRN"
# landlord_sap = None
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = None
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/SMS"
# data_filename = "For Modelling - Final - reviewed.xlsx"
data_filename = "Missed Properties - with address.xlsx"
sheet_name = "Sheet1"
data_filename = "SMS Data sample to sense check before WHLG deploy.xlsx"
sheet_name = "All Darlaston Properties"
postcode_column = "Postcode"
address1_column = "address1"
address1_column = "House Number"
address1_method = None
fulladdress_column = "address1"
address_cols_to_concat = []
fulladdress_column = None
address_cols_to_concat = ["House Number", "Street name"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = "UPRN"
landlord_property_type = "Type"
landlord_os_uprn = None
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Reference"
landlord_property_id = "id"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None

View file

@ -631,4 +631,6 @@ BUILT_FORM_MAPPINGS = {
'First & Second Floor Flat': 'mid-floor',
'First Floor Purpose Built': 'mid-floor',
'Purpose built First Floor': 'mid-floor',
'Mid-Terrace': 'mid-terrace'
}

View file

@ -14,6 +14,7 @@ from backend.SearchEpc import SearchEpc
from etl.epc.Record import EPCRecord
from backend.app.BatterySapScorer import BatterySAPScorer
from etl.epc.PredictionMatrix import PredictionMatrix
from backend.app.config import get_settings, get_prediction_buckets
from backend.app.db.connection import db_session, db_read_session
@ -575,7 +576,7 @@ async def model_engine(body: PlanTriggerRequest):
property_already_installed = list(already_installed_by_uprn[addr.uprn])
epc_searcher = SearchEpc(
address1=addr.address1,
address1=addr.address_1,
postcode=addr.postcode,
uprn=addr.uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
@ -584,8 +585,8 @@ async def model_engine(body: PlanTriggerRequest):
heating_system=addr.landlord_heating_system,
associated_uprns=associated_uprns
)
epc_searcher.ordnance_survey_client.built_form = addr.built_form
epc_searcher.ordnance_survey_client.property_type = addr.property_type
epc_searcher.ordnance_survey_client.built_form = addr.landlord_built_form
epc_searcher.ordnance_survey_client.property_type = addr.landlord_property_type
# For the moment, our OS API access is unavailable, so we skip and interpolate
epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True)
@ -634,7 +635,7 @@ async def model_engine(body: PlanTriggerRequest):
epc_page=epc_page,
rrn=rrn,
cleaned_address=epc_searcher.address_clean,
config_address=addr.address,
config_address=addr.address_1,
address_postal_town=epc_searcher.address_postal_town
)
)
@ -651,7 +652,7 @@ async def model_engine(body: PlanTriggerRequest):
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
already_installed=property_already_installed + eco_packages.get(property_id)[3],
already_installed=property_already_installed,
find_my_epc_components=find_my_epc_components,
property_valuation=req_data.valuation,
non_invasive_recommendations=property_non_invasive_recommendations,
@ -706,8 +707,6 @@ async def model_engine(body: PlanTriggerRequest):
with db_read_session() as session:
materials = db_funcs.materials_functions.get_materials(session)
# Rebaselining
# TODO: MUST happen before setting features
logger.info("Preparing rebaselining")
rebaselining_scoring_data = []
for p in tqdm(input_properties):
@ -872,7 +871,6 @@ async def model_engine(body: PlanTriggerRequest):
"carbon_ending"
]
)
# TODO: Temp putting this here
recommendations_scoring_data["is_post_sap10_ending"] = True
all_predictions = await model_api.async_paginated_predictions(
@ -928,6 +926,8 @@ async def model_engine(body: PlanTriggerRequest):
)
p.current_energy_bill = property_current_energy_bill
# Create matrix of all predictions for debug: - any rebaselining and measure level predictions
# Insert the predictions into the recommendations and run the optimiser
logger.info("Optimising measures")
for p in input_properties:
@ -1269,4 +1269,35 @@ async def model_engine(body: PlanTriggerRequest):
logger.info("Model Engine completed successfully")
prediction_matrix = PredictionMatrix()
# --- Add rebaselining and measure-level predictions to PredictionMatrix ---
for p in input_properties:
# Add rebaselined predictions if available
uprn = p.uprn
if uprn is None:
continue
# Rebaselined SAP prediction
rebaselined_sap = None
if uprn in predictions_by_model_and_uprn.get("retrofit_sap_baseline_predictions", {}):
rebaselined_sap = predictions_by_model_and_uprn["retrofit_sap_baseline_predictions"][uprn]
# Add original EPC and landlord differences for comparison
prediction_matrix.set_original_epc(
uprn=uprn,
original_epc=p.epc_record.original_epc,
landlord_differences=p.epc_record.landlord_differences,
lodgement_date=p.epc_record.lodgement_date,
)
prediction_matrix.set_rebaselined_prediction(uprn, rebaselined_sap)
# Add measure-level predictions
property_recommendations = recommendations.get(p.id, [])
for rec in property_recommendations:
prediction_matrix.add_recommendation(
uprn=uprn,
measure_id=rec.get("recommendation_id", rec.get("id", rec.get("type", "unknown"))),
prediction=rec.get("sap_points"),
metadata={k: v for k, v in rec.items() if k not in ("sap_points", "recommendation_id", "id")}
)
# --- End PredictionMatrix population ---
return Response(status_code=200)

View file

@ -0,0 +1,80 @@
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
import pandas as pd
@dataclass
class RecommendationPrediction:
measure_id: str
prediction: Any
metadata: Dict[str, Any] = field(default_factory=dict)
@dataclass
class PredictionEntry:
uprn: int
rebaselined_prediction: Any = None
recommendation_predictions: List[RecommendationPrediction] = field(default_factory=list)
original_epc: Optional[Dict[str, Any]] = None
landlord_differences: Optional[Dict[str, Any]] = None
lodgement_date: Optional[Any] = None
class PredictionMatrix:
def __init__(self):
self.entries: Dict[int, PredictionEntry] = {}
def add_entry(self, entry: PredictionEntry):
self.entries[entry.uprn] = entry
def add_recommendation(self, uprn: int, measure_id: str, prediction: Any, metadata: Optional[Dict[str, Any]] = None):
if uprn not in self.entries:
self.entries[uprn] = PredictionEntry(uprn=uprn)
rec = RecommendationPrediction(measure_id=measure_id, prediction=prediction, metadata=metadata or {})
self.entries[uprn].recommendation_predictions.append(rec)
def set_rebaselined_prediction(self, uprn: int, prediction: Any):
if uprn not in self.entries:
self.entries[uprn] = PredictionEntry(uprn=uprn)
self.entries[uprn].rebaselined_prediction = prediction
def set_original_epc(self, uprn: int, original_epc: Dict[str, Any], landlord_differences: Dict[str, Any], lodgement_date: Any = None):
if uprn not in self.entries:
self.entries[uprn] = PredictionEntry(uprn=uprn)
self.entries[uprn].original_epc = original_epc
self.entries[uprn].landlord_differences = landlord_differences
self.entries[uprn].lodgement_date = lodgement_date
def to_dataframe(self) -> pd.DataFrame:
rows = []
for entry in self.entries.values():
base = {
"uprn": entry.uprn,
"rebaselined_prediction": entry.rebaselined_prediction,
"lodgement_date": entry.lodgement_date,
"landlord_differences": entry.landlord_differences,
}
# Add original EPC fields if present
if entry.original_epc and entry.landlord_differences:
for k in entry.landlord_differences.keys():
base[f"{k}_ori"] = entry.original_epc.get(k)
base[f"{k}_ll"] = entry.landlord_differences.get(k)
# Add measure-level predictions
for rec in entry.recommendation_predictions:
row = base.copy()
row["measure_id"] = rec.measure_id
row["measure_prediction"] = rec.prediction
row["measure_metadata"] = rec.metadata
rows.append(row)
if not entry.recommendation_predictions:
rows.append(base)
return pd.DataFrame(rows)
def summarise_differences(self, df: Optional[pd.DataFrame] = None) -> pd.DataFrame:
if df is None:
df = self.to_dataframe()
ori_cols = [c for c in df.columns if c.endswith("_ori")]
for ori_col in ori_cols:
ll_col = ori_col.replace("_ori", "_ll")
if ll_col in df.columns:
same = df[ori_col].fillna("NULL") == df[ll_col].fillna("NULL")
df.loc[same, [ori_col, ll_col]] = None
return df

View file

@ -1,9 +1,8 @@
import warnings
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal, Callable
from backend.addresses.Address import Address
from dataclasses import fields
from dataclasses import fields, dataclass, field
from datetime import datetime
from dataclasses import dataclass
from etl.epc.ValidationConfiguration import (
EPCRecordValidationConfiguration,
EPCDifferenceRecordValidationConfiguration,
@ -331,7 +330,7 @@ class EPCRecord:
# Working dictionary that gets cleaned
_prepared_epc: Optional[PreparedEpcRow] = None
# Record of differences applied by landlord data
landlord_differences: Optional[dict[str, PreparedEpcValue]] = None
landlord_differences: dict[str, PreparedEpcValue] = field(default_factory=dict)
# Supporting
full_sap_epc: Optional[RawEpcRow] = None