implemented simple scoring model for battery SAP improvement

2026-06-08 11:17:27 +00:00 · 2025-12-09 23:17:36 +00:00 · 2025-12-09 23:17:36 +00:00 · 8ed1d3b9bd
commit 8ed1d3b9bd
parent 0b026c0c4c
3 changed files with 119 additions and 6 deletions
--- a/backend/app/BatterySapScorer.py
+++ b/backend/app/BatterySapScorer.py
@ -0,0 +1,29 @@
+import numpy as np
+
+
+class BatterySAPScorer:
+    """
+    Lightweight production scorer — no sklearn dependency.
+    Uses hard-coded coefficients discovered offline. The code for discovering the coefficients
+    can be found in etl/battery_model/train.py
+    """
+
+    INTERCEPT = 10.310168559226678
+    COEF_STARTING_SAP = -0.16120648633993315
+    COEF_PV_SIZE = 1.0500492005420736
+
+    @classmethod
+    def score(cls, starting_sap, pv_size):
+        """
+        heating_system: string used to infer is_electric
+        """
+
+        sap_uplift = (
+            cls.INTERCEPT
+            + cls.COEF_STARTING_SAP * starting_sap
+            + cls.COEF_PV_SIZE * pv_size
+        )
+
+        # Round + clamp to [0,5]
+        sap_uplift = int(np.round(np.clip(sap_uplift, 0, 5)))
+        return sap_uplift
--- a/etl/battery_model/train.py
+++ b/etl/battery_model/train.py
@ -0,0 +1,62 @@
+import pandas as pd
+from sklearn.linear_model import Ridge
+
+
+class SAPUpliftTrainer:
+    """
+    Offline training class — discovers SAP uplift model coefficients.
+    """
+
+    def __init__(self, alpha=1.0):
+        self.alpha = alpha
+        self.model = Ridge(alpha=self.alpha)
+        self.feature_names = ["starting SAP", "PV Array size"]
+
+    def prepare_data(self, df):
+        df = df.copy()
+        # df["is_electric"] = df["heating"].str.contains(
+        #     "Electric", case=False, na=False
+        # ).astype(int)
+        X = df[self.feature_names]
+        y = df["SAP points"]
+        return X, y
+
+    def fit(self, df):
+        X, y = self.prepare_data(df)
+        self.model.fit(X, y)
+
+    def coefficients(self):
+        return {
+            "intercept": float(self.model.intercept_),
+            **{
+                name: float(coef)
+                for name, coef in zip(self.feature_names, self.model.coef_)
+            }
+        }
+
+    def export_runtime_config(self):
+        """
+        Returns a dict suitable for copy-pasting into the runtime scoring class.
+        """
+        coefs = self.coefficients()
+        return {
+            "intercept": coefs["intercept"],
+            "coef_starting_sap": coefs["starting SAP"],
+            "coef_pv_size": coefs["PV Array size"],
+            # "coef_is_electric": coefs["is_electric"],
+        }
+
+
+# The training data can be found in the Domna sharepoint in Product Development > Solar Battery Recommendations
+df = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/SAP Movement data(Sheet1).csv")
+
+trainer = SAPUpliftTrainer(alpha=1.0)
+trainer.fit(df)
+
+print(trainer.coefficients())
+print(trainer.export_runtime_config())
+
+# Last updated: 9th December 2025
+# Coefficients:
+# {'intercept': 10.310168559226678, 'starting SAP': -0.16120648633993315, 'PV Array size': 1.0500492005420736}
+# The code for scoring with this model can be found in backend/app/BatterySapScorer.py
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -11,8 +11,8 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 221
-SCENARIOS = [427]
+PORTFOLIO_ID = 388
+SCENARIOS = [803]


 def get_data(portfolio_id, scenario_ids):
@ -95,6 +95,18 @@ post_install_sap = post_install_sap[post_install_sap["default"]]
 # Sum up the sap points by property id
 post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()

+# Find dupes by property id and measure type
+dupes = recommended_measures_df.duplicated(
+    subset=["property_id", "measure_type"], keep=False
+)
+dupe_df = recommended_measures_df[dupes]
+
+if dupe_df.shape:
+    # Drop dupes - happened due to a funny bug
+    recommended_measures_df = recommended_measures_df.drop_duplicates(
+        subset=["property_id", "measure_type"], keep='first'
+    )
+
 recommendations_measures_pivot = recommended_measures_df.pivot(
    index='property_id',
    columns='measure_type',
@ -131,10 +143,19 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3

 # asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
 asset_list = read_excel_from_s3(
-    bucket_name="retrofit-plan-inputs-dev", file_key='8/221/20250722T202328736Z/asset_list.xlsx',
-    header_row=0, sheet_name="320 - edited"
+    bucket_name="retrofit-plan-inputs-dev", file_key='2/388/20251208T203603925Z/asset_list.xlsx',
+    header_row=0, sheet_name="Standardised Asset List"
 )
 asset_list = pd.DataFrame(asset_list)
+asset_list = asset_list.rename(
+    columns={
+        "postcode": "domna_postcode"
+    }
+)
+if "domna_full_address":
+    # For Peabody
+    asset_list["domna_full_address"] = asset_list["domna_address_1"]
+
 asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
 asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
 df["uprn"] = df["uprn"].astype(str)
@ -179,9 +200,10 @@ asset_list = asset_list.merge(
    on="uprn"
 )

-# For exporting NCHA
+# For exporting
 asset_list.to_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/320 Portfolio/asset_list_epc_b.xlsx",
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+    "Project/20251209_sample_package_data.xlsx",
    index=False
 )