Model/etl/battery_model/train.py
2025-12-09 23:17:36 +00:00

62 lines
1.9 KiB
Python

import pandas as pd
from sklearn.linear_model import Ridge
class SAPUpliftTrainer:
"""
Offline training class — discovers SAP uplift model coefficients.
"""
def __init__(self, alpha=1.0):
self.alpha = alpha
self.model = Ridge(alpha=self.alpha)
self.feature_names = ["starting SAP", "PV Array size"]
def prepare_data(self, df):
df = df.copy()
# df["is_electric"] = df["heating"].str.contains(
# "Electric", case=False, na=False
# ).astype(int)
X = df[self.feature_names]
y = df["SAP points"]
return X, y
def fit(self, df):
X, y = self.prepare_data(df)
self.model.fit(X, y)
def coefficients(self):
return {
"intercept": float(self.model.intercept_),
**{
name: float(coef)
for name, coef in zip(self.feature_names, self.model.coef_)
}
}
def export_runtime_config(self):
"""
Returns a dict suitable for copy-pasting into the runtime scoring class.
"""
coefs = self.coefficients()
return {
"intercept": coefs["intercept"],
"coef_starting_sap": coefs["starting SAP"],
"coef_pv_size": coefs["PV Array size"],
# "coef_is_electric": coefs["is_electric"],
}
# The training data can be found in the Domna sharepoint in Product Development > Solar Battery Recommendations
df = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/SAP Movement data(Sheet1).csv")
trainer = SAPUpliftTrainer(alpha=1.0)
trainer.fit(df)
print(trainer.coefficients())
print(trainer.export_runtime_config())
# Last updated: 9th December 2025
# Coefficients:
# {'intercept': 10.310168559226678, 'starting SAP': -0.16120648633993315, 'PV Array size': 1.0500492005420736}
# The code for scoring with this model can be found in backend/app/BatterySapScorer.py