diff --git a/asset_list/app.py b/asset_list/app.py index a97bb8e0..5e821bb9 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -73,25 +73,59 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed" + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/E.ON/202603 modelling project" + # # data_filename = "For Modelling - Final - reviewed.xlsx" + # data_filename = "eon - 20260323 address sanitisation.xlsx" + # sheet_name = "in" + # postcode_column = "postcode" + # address1_column = "Address 1" + # address1_method = None + # fulladdress_column = "Address 1" + # address_cols_to_concat = [] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = "address2uprn_uprn" + # landlord_property_type = "PropertyType" + # landlord_built_form = "BuiltForm" + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "UPRN" + # landlord_sap = None + # outcomes_filename = None + # outcomes_sheetname = None + # outcomes_postcode = None + # outcomes_houseno = None + # outcomes_id = None + # outcomes_address = None + # master_filepaths = [] + # master_id_colnames = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = None + + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/SMS" # data_filename = "For Modelling - Final - reviewed.xlsx" - data_filename = "Missed Properties - with address.xlsx" - sheet_name = "Sheet1" + data_filename = "SMS Data sample to sense check before WHLG deploy.xlsx" + sheet_name = "All Darlaston Properties" postcode_column = "Postcode" - address1_column = "address1" + address1_column = "House Number" address1_method = None - fulladdress_column = "address1" - address_cols_to_concat = [] + fulladdress_column = None + address_cols_to_concat = ["House Number", "Street name"] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = "UPRN" - landlord_property_type = "Type" + landlord_os_uprn = None + landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "Reference" + landlord_property_id = "id" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 4842450d..e65c0b9a 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -631,4 +631,6 @@ BUILT_FORM_MAPPINGS = { 'First & Second Floor Flat': 'mid-floor', 'First Floor Purpose Built': 'mid-floor', 'Purpose built First Floor': 'mid-floor', + + 'Mid-Terrace': 'mid-terrace' } diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 5fd5eaf4..43362935 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -14,6 +14,7 @@ from backend.SearchEpc import SearchEpc from etl.epc.Record import EPCRecord from backend.app.BatterySapScorer import BatterySAPScorer +from etl.epc.PredictionMatrix import PredictionMatrix from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_session, db_read_session @@ -575,7 +576,7 @@ async def model_engine(body: PlanTriggerRequest): property_already_installed = list(already_installed_by_uprn[addr.uprn]) epc_searcher = SearchEpc( - address1=addr.address1, + address1=addr.address_1, postcode=addr.postcode, uprn=addr.uprn, auth_token=get_settings().EPC_AUTH_TOKEN, @@ -584,8 +585,8 @@ async def model_engine(body: PlanTriggerRequest): heating_system=addr.landlord_heating_system, associated_uprns=associated_uprns ) - epc_searcher.ordnance_survey_client.built_form = addr.built_form - epc_searcher.ordnance_survey_client.property_type = addr.property_type + epc_searcher.ordnance_survey_client.built_form = addr.landlord_built_form + epc_searcher.ordnance_survey_client.property_type = addr.landlord_property_type # For the moment, our OS API access is unavailable, so we skip and interpolate epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) @@ -634,7 +635,7 @@ async def model_engine(body: PlanTriggerRequest): epc_page=epc_page, rrn=rrn, cleaned_address=epc_searcher.address_clean, - config_address=addr.address, + config_address=addr.address_1, address_postal_town=epc_searcher.address_postal_town ) ) @@ -651,7 +652,7 @@ async def model_engine(body: PlanTriggerRequest): address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=property_already_installed + eco_packages.get(property_id)[3], + already_installed=property_already_installed, find_my_epc_components=find_my_epc_components, property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, @@ -706,8 +707,6 @@ async def model_engine(body: PlanTriggerRequest): with db_read_session() as session: materials = db_funcs.materials_functions.get_materials(session) - # Rebaselining - # TODO: MUST happen before setting features logger.info("Preparing rebaselining") rebaselining_scoring_data = [] for p in tqdm(input_properties): @@ -872,7 +871,6 @@ async def model_engine(body: PlanTriggerRequest): "carbon_ending" ] ) - # TODO: Temp putting this here recommendations_scoring_data["is_post_sap10_ending"] = True all_predictions = await model_api.async_paginated_predictions( @@ -928,6 +926,8 @@ async def model_engine(body: PlanTriggerRequest): ) p.current_energy_bill = property_current_energy_bill + # Create matrix of all predictions for debug: - any rebaselining and measure level predictions + # Insert the predictions into the recommendations and run the optimiser logger.info("Optimising measures") for p in input_properties: @@ -1269,4 +1269,35 @@ async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine completed successfully") + prediction_matrix = PredictionMatrix() + + # --- Add rebaselining and measure-level predictions to PredictionMatrix --- + for p in input_properties: + # Add rebaselined predictions if available + uprn = p.uprn + if uprn is None: + continue + # Rebaselined SAP prediction + rebaselined_sap = None + if uprn in predictions_by_model_and_uprn.get("retrofit_sap_baseline_predictions", {}): + rebaselined_sap = predictions_by_model_and_uprn["retrofit_sap_baseline_predictions"][uprn] + # Add original EPC and landlord differences for comparison + prediction_matrix.set_original_epc( + uprn=uprn, + original_epc=p.epc_record.original_epc, + landlord_differences=p.epc_record.landlord_differences, + lodgement_date=p.epc_record.lodgement_date, + ) + prediction_matrix.set_rebaselined_prediction(uprn, rebaselined_sap) + # Add measure-level predictions + property_recommendations = recommendations.get(p.id, []) + for rec in property_recommendations: + prediction_matrix.add_recommendation( + uprn=uprn, + measure_id=rec.get("recommendation_id", rec.get("id", rec.get("type", "unknown"))), + prediction=rec.get("sap_points"), + metadata={k: v for k, v in rec.items() if k not in ("sap_points", "recommendation_id", "id")} + ) + # --- End PredictionMatrix population --- + return Response(status_code=200) diff --git a/etl/epc/PredictionMatrix.py b/etl/epc/PredictionMatrix.py new file mode 100644 index 00000000..02568148 --- /dev/null +++ b/etl/epc/PredictionMatrix.py @@ -0,0 +1,80 @@ +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional +import pandas as pd + +@dataclass +class RecommendationPrediction: + measure_id: str + prediction: Any + metadata: Dict[str, Any] = field(default_factory=dict) + +@dataclass +class PredictionEntry: + uprn: int + rebaselined_prediction: Any = None + recommendation_predictions: List[RecommendationPrediction] = field(default_factory=list) + original_epc: Optional[Dict[str, Any]] = None + landlord_differences: Optional[Dict[str, Any]] = None + lodgement_date: Optional[Any] = None + +class PredictionMatrix: + def __init__(self): + self.entries: Dict[int, PredictionEntry] = {} + + def add_entry(self, entry: PredictionEntry): + self.entries[entry.uprn] = entry + + def add_recommendation(self, uprn: int, measure_id: str, prediction: Any, metadata: Optional[Dict[str, Any]] = None): + if uprn not in self.entries: + self.entries[uprn] = PredictionEntry(uprn=uprn) + rec = RecommendationPrediction(measure_id=measure_id, prediction=prediction, metadata=metadata or {}) + self.entries[uprn].recommendation_predictions.append(rec) + + def set_rebaselined_prediction(self, uprn: int, prediction: Any): + if uprn not in self.entries: + self.entries[uprn] = PredictionEntry(uprn=uprn) + self.entries[uprn].rebaselined_prediction = prediction + + def set_original_epc(self, uprn: int, original_epc: Dict[str, Any], landlord_differences: Dict[str, Any], lodgement_date: Any = None): + if uprn not in self.entries: + self.entries[uprn] = PredictionEntry(uprn=uprn) + self.entries[uprn].original_epc = original_epc + self.entries[uprn].landlord_differences = landlord_differences + self.entries[uprn].lodgement_date = lodgement_date + + def to_dataframe(self) -> pd.DataFrame: + rows = [] + for entry in self.entries.values(): + base = { + "uprn": entry.uprn, + "rebaselined_prediction": entry.rebaselined_prediction, + "lodgement_date": entry.lodgement_date, + "landlord_differences": entry.landlord_differences, + } + # Add original EPC fields if present + if entry.original_epc and entry.landlord_differences: + for k in entry.landlord_differences.keys(): + base[f"{k}_ori"] = entry.original_epc.get(k) + base[f"{k}_ll"] = entry.landlord_differences.get(k) + # Add measure-level predictions + for rec in entry.recommendation_predictions: + row = base.copy() + row["measure_id"] = rec.measure_id + row["measure_prediction"] = rec.prediction + row["measure_metadata"] = rec.metadata + rows.append(row) + if not entry.recommendation_predictions: + rows.append(base) + return pd.DataFrame(rows) + + def summarise_differences(self, df: Optional[pd.DataFrame] = None) -> pd.DataFrame: + if df is None: + df = self.to_dataframe() + ori_cols = [c for c in df.columns if c.endswith("_ori")] + for ori_col in ori_cols: + ll_col = ori_col.replace("_ori", "_ll") + if ll_col in df.columns: + same = df[ori_col].fillna("NULL") == df[ll_col].fillna("NULL") + df.loc[same, [ori_col, ll_col]] = None + return df + diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 4fc422b7..0842a07c 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -1,9 +1,8 @@ import warnings from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal, Callable from backend.addresses.Address import Address -from dataclasses import fields +from dataclasses import fields, dataclass, field from datetime import datetime -from dataclasses import dataclass from etl.epc.ValidationConfiguration import ( EPCRecordValidationConfiguration, EPCDifferenceRecordValidationConfiguration, @@ -331,7 +330,7 @@ class EPCRecord: # Working dictionary that gets cleaned _prepared_epc: Optional[PreparedEpcRow] = None # Record of differences applied by landlord data - landlord_differences: Optional[dict[str, PreparedEpcValue]] = None + landlord_differences: dict[str, PreparedEpcValue] = field(default_factory=dict) # Supporting full_sap_epc: Optional[RawEpcRow] = None