from dataclasses import dataclass, field from typing import Any, Dict, List, Optional import pandas as pd @dataclass class RecommendationPrediction: measure_id: str prediction: Any metadata: Dict[str, Any] = field(default_factory=dict) @dataclass class PredictionEntry: uprn: int rebaselined_prediction: Any = None recommendation_predictions: List[RecommendationPrediction] = field(default_factory=list) original_epc: Optional[Dict[str, Any]] = None landlord_differences: Optional[Dict[str, Any]] = None lodgement_date: Optional[Any] = None class PredictionMatrix: def __init__(self): self.entries: Dict[int, PredictionEntry] = {} def add_entry(self, entry: PredictionEntry): self.entries[entry.uprn] = entry def add_recommendation(self, uprn: int, measure_id: str, prediction: Any, metadata: Optional[Dict[str, Any]] = None): if uprn not in self.entries: self.entries[uprn] = PredictionEntry(uprn=uprn) rec = RecommendationPrediction(measure_id=measure_id, prediction=prediction, metadata=metadata or {}) self.entries[uprn].recommendation_predictions.append(rec) def set_rebaselined_prediction(self, uprn: int, prediction: Any): if uprn not in self.entries: self.entries[uprn] = PredictionEntry(uprn=uprn) self.entries[uprn].rebaselined_prediction = prediction def set_original_epc(self, uprn: int, original_epc: Dict[str, Any], landlord_differences: Dict[str, Any], lodgement_date: Any = None): if uprn not in self.entries: self.entries[uprn] = PredictionEntry(uprn=uprn) self.entries[uprn].original_epc = original_epc self.entries[uprn].landlord_differences = landlord_differences self.entries[uprn].lodgement_date = lodgement_date def to_dataframe(self) -> pd.DataFrame: rows = [] for entry in self.entries.values(): base = { "uprn": entry.uprn, "rebaselined_prediction": entry.rebaselined_prediction, "lodgement_date": entry.lodgement_date, "landlord_differences": entry.landlord_differences, } # Add original EPC fields if present if entry.original_epc and entry.landlord_differences: for k in entry.landlord_differences.keys(): base[f"{k}_ori"] = entry.original_epc.get(k) base[f"{k}_ll"] = entry.landlord_differences.get(k) # Add measure-level predictions for rec in entry.recommendation_predictions: row = base.copy() row["measure_id"] = rec.measure_id row["measure_prediction"] = rec.prediction row["measure_metadata"] = rec.metadata rows.append(row) if not entry.recommendation_predictions: rows.append(base) return pd.DataFrame(rows) def summarise_differences(self, df: Optional[pd.DataFrame] = None) -> pd.DataFrame: if df is None: df = self.to_dataframe() ori_cols = [c for c in df.columns if c.endswith("_ori")] for ori_col in ori_cols: ll_col = ori_col.replace("_ori", "_ll") if ll_col in df.columns: same = df[ori_col].fillna("NULL") == df[ll_col].fillna("NULL") df.loc[same, [ori_col, ll_col]] = None return df