mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
default landlord differences to emtpy dict, adding predcition matrix for inspection predictions
This commit is contained in:
parent
a3081214ca
commit
5c94ecf3fb
5 changed files with 166 additions and 20 deletions
|
|
@ -73,25 +73,59 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed"
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/E.ON/202603 modelling project"
|
||||
# # data_filename = "For Modelling - Final - reviewed.xlsx"
|
||||
# data_filename = "eon - 20260323 address sanitisation.xlsx"
|
||||
# sheet_name = "in"
|
||||
# postcode_column = "postcode"
|
||||
# address1_column = "Address 1"
|
||||
# address1_method = None
|
||||
# fulladdress_column = "Address 1"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = None
|
||||
# landlord_os_uprn = "address2uprn_uprn"
|
||||
# landlord_property_type = "PropertyType"
|
||||
# landlord_built_form = "BuiltForm"
|
||||
# landlord_wall_construction = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "UPRN"
|
||||
# landlord_sap = None
|
||||
# outcomes_filename = None
|
||||
# outcomes_sheetname = None
|
||||
# outcomes_postcode = None
|
||||
# outcomes_houseno = None
|
||||
# outcomes_id = None
|
||||
# outcomes_address = None
|
||||
# master_filepaths = []
|
||||
# master_id_colnames = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = None
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/SMS"
|
||||
# data_filename = "For Modelling - Final - reviewed.xlsx"
|
||||
data_filename = "Missed Properties - with address.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
data_filename = "SMS Data sample to sense check before WHLG deploy.xlsx"
|
||||
sheet_name = "All Darlaston Properties"
|
||||
postcode_column = "Postcode"
|
||||
address1_column = "address1"
|
||||
address1_column = "House Number"
|
||||
address1_method = None
|
||||
fulladdress_column = "address1"
|
||||
address_cols_to_concat = []
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["House Number", "Street name"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = "Type"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Reference"
|
||||
landlord_property_id = "id"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
|
|||
|
|
@ -631,4 +631,6 @@ BUILT_FORM_MAPPINGS = {
|
|||
'First & Second Floor Flat': 'mid-floor',
|
||||
'First Floor Purpose Built': 'mid-floor',
|
||||
'Purpose built First Floor': 'mid-floor',
|
||||
|
||||
'Mid-Terrace': 'mid-terrace'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from backend.SearchEpc import SearchEpc
|
|||
|
||||
from etl.epc.Record import EPCRecord
|
||||
from backend.app.BatterySapScorer import BatterySAPScorer
|
||||
from etl.epc.PredictionMatrix import PredictionMatrix
|
||||
|
||||
from backend.app.config import get_settings, get_prediction_buckets
|
||||
from backend.app.db.connection import db_session, db_read_session
|
||||
|
|
@ -575,7 +576,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
property_already_installed = list(already_installed_by_uprn[addr.uprn])
|
||||
|
||||
epc_searcher = SearchEpc(
|
||||
address1=addr.address1,
|
||||
address1=addr.address_1,
|
||||
postcode=addr.postcode,
|
||||
uprn=addr.uprn,
|
||||
auth_token=get_settings().EPC_AUTH_TOKEN,
|
||||
|
|
@ -584,8 +585,8 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
heating_system=addr.landlord_heating_system,
|
||||
associated_uprns=associated_uprns
|
||||
)
|
||||
epc_searcher.ordnance_survey_client.built_form = addr.built_form
|
||||
epc_searcher.ordnance_survey_client.property_type = addr.property_type
|
||||
epc_searcher.ordnance_survey_client.built_form = addr.landlord_built_form
|
||||
epc_searcher.ordnance_survey_client.property_type = addr.landlord_property_type
|
||||
# For the moment, our OS API access is unavailable, so we skip and interpolate
|
||||
|
||||
epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True)
|
||||
|
|
@ -634,7 +635,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
epc_page=epc_page,
|
||||
rrn=rrn,
|
||||
cleaned_address=epc_searcher.address_clean,
|
||||
config_address=addr.address,
|
||||
config_address=addr.address_1,
|
||||
address_postal_town=epc_searcher.address_postal_town
|
||||
)
|
||||
)
|
||||
|
|
@ -651,7 +652,7 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
address=epc_searcher.address_clean,
|
||||
postcode=epc_searcher.postcode_clean,
|
||||
epc_record=prepared_epc,
|
||||
already_installed=property_already_installed + eco_packages.get(property_id)[3],
|
||||
already_installed=property_already_installed,
|
||||
find_my_epc_components=find_my_epc_components,
|
||||
property_valuation=req_data.valuation,
|
||||
non_invasive_recommendations=property_non_invasive_recommendations,
|
||||
|
|
@ -706,8 +707,6 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
with db_read_session() as session:
|
||||
materials = db_funcs.materials_functions.get_materials(session)
|
||||
|
||||
# Rebaselining
|
||||
# TODO: MUST happen before setting features
|
||||
logger.info("Preparing rebaselining")
|
||||
rebaselining_scoring_data = []
|
||||
for p in tqdm(input_properties):
|
||||
|
|
@ -872,7 +871,6 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
"carbon_ending"
|
||||
]
|
||||
)
|
||||
# TODO: Temp putting this here
|
||||
recommendations_scoring_data["is_post_sap10_ending"] = True
|
||||
|
||||
all_predictions = await model_api.async_paginated_predictions(
|
||||
|
|
@ -928,6 +926,8 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
)
|
||||
p.current_energy_bill = property_current_energy_bill
|
||||
|
||||
# Create matrix of all predictions for debug: - any rebaselining and measure level predictions
|
||||
|
||||
# Insert the predictions into the recommendations and run the optimiser
|
||||
logger.info("Optimising measures")
|
||||
for p in input_properties:
|
||||
|
|
@ -1269,4 +1269,35 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
|
||||
logger.info("Model Engine completed successfully")
|
||||
|
||||
prediction_matrix = PredictionMatrix()
|
||||
|
||||
# --- Add rebaselining and measure-level predictions to PredictionMatrix ---
|
||||
for p in input_properties:
|
||||
# Add rebaselined predictions if available
|
||||
uprn = p.uprn
|
||||
if uprn is None:
|
||||
continue
|
||||
# Rebaselined SAP prediction
|
||||
rebaselined_sap = None
|
||||
if uprn in predictions_by_model_and_uprn.get("retrofit_sap_baseline_predictions", {}):
|
||||
rebaselined_sap = predictions_by_model_and_uprn["retrofit_sap_baseline_predictions"][uprn]
|
||||
# Add original EPC and landlord differences for comparison
|
||||
prediction_matrix.set_original_epc(
|
||||
uprn=uprn,
|
||||
original_epc=p.epc_record.original_epc,
|
||||
landlord_differences=p.epc_record.landlord_differences,
|
||||
lodgement_date=p.epc_record.lodgement_date,
|
||||
)
|
||||
prediction_matrix.set_rebaselined_prediction(uprn, rebaselined_sap)
|
||||
# Add measure-level predictions
|
||||
property_recommendations = recommendations.get(p.id, [])
|
||||
for rec in property_recommendations:
|
||||
prediction_matrix.add_recommendation(
|
||||
uprn=uprn,
|
||||
measure_id=rec.get("recommendation_id", rec.get("id", rec.get("type", "unknown"))),
|
||||
prediction=rec.get("sap_points"),
|
||||
metadata={k: v for k, v in rec.items() if k not in ("sap_points", "recommendation_id", "id")}
|
||||
)
|
||||
# --- End PredictionMatrix population ---
|
||||
|
||||
return Response(status_code=200)
|
||||
|
|
|
|||
80
etl/epc/PredictionMatrix.py
Normal file
80
etl/epc/PredictionMatrix.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
import pandas as pd
|
||||
|
||||
@dataclass
|
||||
class RecommendationPrediction:
|
||||
measure_id: str
|
||||
prediction: Any
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@dataclass
|
||||
class PredictionEntry:
|
||||
uprn: int
|
||||
rebaselined_prediction: Any = None
|
||||
recommendation_predictions: List[RecommendationPrediction] = field(default_factory=list)
|
||||
original_epc: Optional[Dict[str, Any]] = None
|
||||
landlord_differences: Optional[Dict[str, Any]] = None
|
||||
lodgement_date: Optional[Any] = None
|
||||
|
||||
class PredictionMatrix:
|
||||
def __init__(self):
|
||||
self.entries: Dict[int, PredictionEntry] = {}
|
||||
|
||||
def add_entry(self, entry: PredictionEntry):
|
||||
self.entries[entry.uprn] = entry
|
||||
|
||||
def add_recommendation(self, uprn: int, measure_id: str, prediction: Any, metadata: Optional[Dict[str, Any]] = None):
|
||||
if uprn not in self.entries:
|
||||
self.entries[uprn] = PredictionEntry(uprn=uprn)
|
||||
rec = RecommendationPrediction(measure_id=measure_id, prediction=prediction, metadata=metadata or {})
|
||||
self.entries[uprn].recommendation_predictions.append(rec)
|
||||
|
||||
def set_rebaselined_prediction(self, uprn: int, prediction: Any):
|
||||
if uprn not in self.entries:
|
||||
self.entries[uprn] = PredictionEntry(uprn=uprn)
|
||||
self.entries[uprn].rebaselined_prediction = prediction
|
||||
|
||||
def set_original_epc(self, uprn: int, original_epc: Dict[str, Any], landlord_differences: Dict[str, Any], lodgement_date: Any = None):
|
||||
if uprn not in self.entries:
|
||||
self.entries[uprn] = PredictionEntry(uprn=uprn)
|
||||
self.entries[uprn].original_epc = original_epc
|
||||
self.entries[uprn].landlord_differences = landlord_differences
|
||||
self.entries[uprn].lodgement_date = lodgement_date
|
||||
|
||||
def to_dataframe(self) -> pd.DataFrame:
|
||||
rows = []
|
||||
for entry in self.entries.values():
|
||||
base = {
|
||||
"uprn": entry.uprn,
|
||||
"rebaselined_prediction": entry.rebaselined_prediction,
|
||||
"lodgement_date": entry.lodgement_date,
|
||||
"landlord_differences": entry.landlord_differences,
|
||||
}
|
||||
# Add original EPC fields if present
|
||||
if entry.original_epc and entry.landlord_differences:
|
||||
for k in entry.landlord_differences.keys():
|
||||
base[f"{k}_ori"] = entry.original_epc.get(k)
|
||||
base[f"{k}_ll"] = entry.landlord_differences.get(k)
|
||||
# Add measure-level predictions
|
||||
for rec in entry.recommendation_predictions:
|
||||
row = base.copy()
|
||||
row["measure_id"] = rec.measure_id
|
||||
row["measure_prediction"] = rec.prediction
|
||||
row["measure_metadata"] = rec.metadata
|
||||
rows.append(row)
|
||||
if not entry.recommendation_predictions:
|
||||
rows.append(base)
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
def summarise_differences(self, df: Optional[pd.DataFrame] = None) -> pd.DataFrame:
|
||||
if df is None:
|
||||
df = self.to_dataframe()
|
||||
ori_cols = [c for c in df.columns if c.endswith("_ori")]
|
||||
for ori_col in ori_cols:
|
||||
ll_col = ori_col.replace("_ori", "_ll")
|
||||
if ll_col in df.columns:
|
||||
same = df[ori_col].fillna("NULL") == df[ll_col].fillna("NULL")
|
||||
df.loc[same, [ori_col, ll_col]] = None
|
||||
return df
|
||||
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
import warnings
|
||||
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal, Callable
|
||||
from backend.addresses.Address import Address
|
||||
from dataclasses import fields
|
||||
from dataclasses import fields, dataclass, field
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
from etl.epc.ValidationConfiguration import (
|
||||
EPCRecordValidationConfiguration,
|
||||
EPCDifferenceRecordValidationConfiguration,
|
||||
|
|
@ -331,7 +330,7 @@ class EPCRecord:
|
|||
# Working dictionary that gets cleaned
|
||||
_prepared_epc: Optional[PreparedEpcRow] = None
|
||||
# Record of differences applied by landlord data
|
||||
landlord_differences: Optional[dict[str, PreparedEpcValue]] = None
|
||||
landlord_differences: dict[str, PreparedEpcValue] = field(default_factory=dict)
|
||||
|
||||
# Supporting
|
||||
full_sap_epc: Optional[RawEpcRow] = None
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue