mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
80 lines
3.4 KiB
Python
80 lines
3.4 KiB
Python
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, List, Optional
|
|
import pandas as pd
|
|
|
|
@dataclass
|
|
class RecommendationPrediction:
|
|
measure_id: str
|
|
prediction: Any
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
@dataclass
|
|
class PredictionEntry:
|
|
uprn: int
|
|
rebaselined_prediction: Any = None
|
|
recommendation_predictions: List[RecommendationPrediction] = field(default_factory=list)
|
|
original_epc: Optional[Dict[str, Any]] = None
|
|
landlord_differences: Optional[Dict[str, Any]] = None
|
|
lodgement_date: Optional[Any] = None
|
|
|
|
class PredictionMatrix:
|
|
def __init__(self):
|
|
self.entries: Dict[int, PredictionEntry] = {}
|
|
|
|
def add_entry(self, entry: PredictionEntry):
|
|
self.entries[entry.uprn] = entry
|
|
|
|
def add_recommendation(self, uprn: int, measure_id: str, prediction: Any, metadata: Optional[Dict[str, Any]] = None):
|
|
if uprn not in self.entries:
|
|
self.entries[uprn] = PredictionEntry(uprn=uprn)
|
|
rec = RecommendationPrediction(measure_id=measure_id, prediction=prediction, metadata=metadata or {})
|
|
self.entries[uprn].recommendation_predictions.append(rec)
|
|
|
|
def set_rebaselined_prediction(self, uprn: int, prediction: Any):
|
|
if uprn not in self.entries:
|
|
self.entries[uprn] = PredictionEntry(uprn=uprn)
|
|
self.entries[uprn].rebaselined_prediction = prediction
|
|
|
|
def set_original_epc(self, uprn: int, original_epc: Dict[str, Any], landlord_differences: Dict[str, Any], lodgement_date: Any = None):
|
|
if uprn not in self.entries:
|
|
self.entries[uprn] = PredictionEntry(uprn=uprn)
|
|
self.entries[uprn].original_epc = original_epc
|
|
self.entries[uprn].landlord_differences = landlord_differences
|
|
self.entries[uprn].lodgement_date = lodgement_date
|
|
|
|
def to_dataframe(self) -> pd.DataFrame:
|
|
rows = []
|
|
for entry in self.entries.values():
|
|
base = {
|
|
"uprn": entry.uprn,
|
|
"rebaselined_prediction": entry.rebaselined_prediction,
|
|
"lodgement_date": entry.lodgement_date,
|
|
"landlord_differences": entry.landlord_differences,
|
|
}
|
|
# Add original EPC fields if present
|
|
if entry.original_epc and entry.landlord_differences:
|
|
for k in entry.landlord_differences.keys():
|
|
base[f"{k}_ori"] = entry.original_epc.get(k)
|
|
base[f"{k}_ll"] = entry.landlord_differences.get(k)
|
|
# Add measure-level predictions
|
|
for rec in entry.recommendation_predictions:
|
|
row = base.copy()
|
|
row["measure_id"] = rec.measure_id
|
|
row["measure_prediction"] = rec.prediction
|
|
row["measure_metadata"] = rec.metadata
|
|
rows.append(row)
|
|
if not entry.recommendation_predictions:
|
|
rows.append(base)
|
|
return pd.DataFrame(rows)
|
|
|
|
def summarise_differences(self, df: Optional[pd.DataFrame] = None) -> pd.DataFrame:
|
|
if df is None:
|
|
df = self.to_dataframe()
|
|
ori_cols = [c for c in df.columns if c.endswith("_ori")]
|
|
for ori_col in ori_cols:
|
|
ll_col = ori_col.replace("_ori", "_ll")
|
|
if ll_col in df.columns:
|
|
same = df[ori_col].fillna("NULL") == df[ll_col].fillna("NULL")
|
|
df.loc[same, [ori_col, ll_col]] = None
|
|
return df
|
|
|