implemented simple scoring model for battery SAP improvement

This commit is contained in:
Khalim Conn-Kowlessar 2025-12-09 23:17:36 +00:00
parent 0b026c0c4c
commit 8ed1d3b9bd
3 changed files with 119 additions and 6 deletions

View file

@ -0,0 +1,29 @@
import numpy as np
class BatterySAPScorer:
"""
Lightweight production scorer no sklearn dependency.
Uses hard-coded coefficients discovered offline. The code for discovering the coefficients
can be found in etl/battery_model/train.py
"""
INTERCEPT = 10.310168559226678
COEF_STARTING_SAP = -0.16120648633993315
COEF_PV_SIZE = 1.0500492005420736
@classmethod
def score(cls, starting_sap, pv_size):
"""
heating_system: string used to infer is_electric
"""
sap_uplift = (
cls.INTERCEPT
+ cls.COEF_STARTING_SAP * starting_sap
+ cls.COEF_PV_SIZE * pv_size
)
# Round + clamp to [0,5]
sap_uplift = int(np.round(np.clip(sap_uplift, 0, 5)))
return sap_uplift

View file

@ -0,0 +1,62 @@
import pandas as pd
from sklearn.linear_model import Ridge
class SAPUpliftTrainer:
"""
Offline training class discovers SAP uplift model coefficients.
"""
def __init__(self, alpha=1.0):
self.alpha = alpha
self.model = Ridge(alpha=self.alpha)
self.feature_names = ["starting SAP", "PV Array size"]
def prepare_data(self, df):
df = df.copy()
# df["is_electric"] = df["heating"].str.contains(
# "Electric", case=False, na=False
# ).astype(int)
X = df[self.feature_names]
y = df["SAP points"]
return X, y
def fit(self, df):
X, y = self.prepare_data(df)
self.model.fit(X, y)
def coefficients(self):
return {
"intercept": float(self.model.intercept_),
**{
name: float(coef)
for name, coef in zip(self.feature_names, self.model.coef_)
}
}
def export_runtime_config(self):
"""
Returns a dict suitable for copy-pasting into the runtime scoring class.
"""
coefs = self.coefficients()
return {
"intercept": coefs["intercept"],
"coef_starting_sap": coefs["starting SAP"],
"coef_pv_size": coefs["PV Array size"],
# "coef_is_electric": coefs["is_electric"],
}
# The training data can be found in the Domna sharepoint in Product Development > Solar Battery Recommendations
df = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/SAP Movement data(Sheet1).csv")
trainer = SAPUpliftTrainer(alpha=1.0)
trainer.fit(df)
print(trainer.coefficients())
print(trainer.export_runtime_config())
# Last updated: 9th December 2025
# Coefficients:
# {'intercept': 10.310168559226678, 'starting SAP': -0.16120648633993315, 'PV Array size': 1.0500492005420736}
# The code for scoring with this model can be found in backend/app/BatterySapScorer.py

View file

@ -11,8 +11,8 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 221
SCENARIOS = [427]
PORTFOLIO_ID = 388
SCENARIOS = [803]
def get_data(portfolio_id, scenario_ids):
@ -95,6 +95,18 @@ post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
# Find dupes by property id and measure type
dupes = recommended_measures_df.duplicated(
subset=["property_id", "measure_type"], keep=False
)
dupe_df = recommended_measures_df[dupes]
if dupe_df.shape:
# Drop dupes - happened due to a funny bug
recommended_measures_df = recommended_measures_df.drop_duplicates(
subset=["property_id", "measure_type"], keep='first'
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
@ -131,10 +143,19 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3
# asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
asset_list = read_excel_from_s3(
bucket_name="retrofit-plan-inputs-dev", file_key='8/221/20250722T202328736Z/asset_list.xlsx',
header_row=0, sheet_name="320 - edited"
bucket_name="retrofit-plan-inputs-dev", file_key='2/388/20251208T203603925Z/asset_list.xlsx',
header_row=0, sheet_name="Standardised Asset List"
)
asset_list = pd.DataFrame(asset_list)
asset_list = asset_list.rename(
columns={
"postcode": "domna_postcode"
}
)
if "domna_full_address":
# For Peabody
asset_list["domna_full_address"] = asset_list["domna_address_1"]
asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
df["uprn"] = df["uprn"].astype(str)
@ -179,9 +200,10 @@ asset_list = asset_list.merge(
on="uprn"
)
# For exporting NCHA
# For exporting
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/320 Portfolio/asset_list_epc_b.xlsx",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/20251209_sample_package_data.xlsx",
index=False
)