mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
implemented simple scoring model for battery SAP improvement
This commit is contained in:
parent
0b026c0c4c
commit
8ed1d3b9bd
3 changed files with 119 additions and 6 deletions
29
backend/app/BatterySapScorer.py
Normal file
29
backend/app/BatterySapScorer.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class BatterySAPScorer:
|
||||
"""
|
||||
Lightweight production scorer — no sklearn dependency.
|
||||
Uses hard-coded coefficients discovered offline. The code for discovering the coefficients
|
||||
can be found in etl/battery_model/train.py
|
||||
"""
|
||||
|
||||
INTERCEPT = 10.310168559226678
|
||||
COEF_STARTING_SAP = -0.16120648633993315
|
||||
COEF_PV_SIZE = 1.0500492005420736
|
||||
|
||||
@classmethod
|
||||
def score(cls, starting_sap, pv_size):
|
||||
"""
|
||||
heating_system: string used to infer is_electric
|
||||
"""
|
||||
|
||||
sap_uplift = (
|
||||
cls.INTERCEPT
|
||||
+ cls.COEF_STARTING_SAP * starting_sap
|
||||
+ cls.COEF_PV_SIZE * pv_size
|
||||
)
|
||||
|
||||
# Round + clamp to [0,5]
|
||||
sap_uplift = int(np.round(np.clip(sap_uplift, 0, 5)))
|
||||
return sap_uplift
|
||||
62
etl/battery_model/train.py
Normal file
62
etl/battery_model/train.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import pandas as pd
|
||||
from sklearn.linear_model import Ridge
|
||||
|
||||
|
||||
class SAPUpliftTrainer:
|
||||
"""
|
||||
Offline training class — discovers SAP uplift model coefficients.
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=1.0):
|
||||
self.alpha = alpha
|
||||
self.model = Ridge(alpha=self.alpha)
|
||||
self.feature_names = ["starting SAP", "PV Array size"]
|
||||
|
||||
def prepare_data(self, df):
|
||||
df = df.copy()
|
||||
# df["is_electric"] = df["heating"].str.contains(
|
||||
# "Electric", case=False, na=False
|
||||
# ).astype(int)
|
||||
X = df[self.feature_names]
|
||||
y = df["SAP points"]
|
||||
return X, y
|
||||
|
||||
def fit(self, df):
|
||||
X, y = self.prepare_data(df)
|
||||
self.model.fit(X, y)
|
||||
|
||||
def coefficients(self):
|
||||
return {
|
||||
"intercept": float(self.model.intercept_),
|
||||
**{
|
||||
name: float(coef)
|
||||
for name, coef in zip(self.feature_names, self.model.coef_)
|
||||
}
|
||||
}
|
||||
|
||||
def export_runtime_config(self):
|
||||
"""
|
||||
Returns a dict suitable for copy-pasting into the runtime scoring class.
|
||||
"""
|
||||
coefs = self.coefficients()
|
||||
return {
|
||||
"intercept": coefs["intercept"],
|
||||
"coef_starting_sap": coefs["starting SAP"],
|
||||
"coef_pv_size": coefs["PV Array size"],
|
||||
# "coef_is_electric": coefs["is_electric"],
|
||||
}
|
||||
|
||||
|
||||
# The training data can be found in the Domna sharepoint in Product Development > Solar Battery Recommendations
|
||||
df = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/SAP Movement data(Sheet1).csv")
|
||||
|
||||
trainer = SAPUpliftTrainer(alpha=1.0)
|
||||
trainer.fit(df)
|
||||
|
||||
print(trainer.coefficients())
|
||||
print(trainer.export_runtime_config())
|
||||
|
||||
# Last updated: 9th December 2025
|
||||
# Coefficients:
|
||||
# {'intercept': 10.310168559226678, 'starting SAP': -0.16120648633993315, 'PV Array size': 1.0500492005420736}
|
||||
# The code for scoring with this model can be found in backend/app/BatterySapScorer.py
|
||||
|
|
@ -11,8 +11,8 @@ from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcMod
|
|||
|
||||
# PORTFOLIO_ID = 206
|
||||
# SCENARIOS = [389]
|
||||
PORTFOLIO_ID = 221
|
||||
SCENARIOS = [427]
|
||||
PORTFOLIO_ID = 388
|
||||
SCENARIOS = [803]
|
||||
|
||||
|
||||
def get_data(portfolio_id, scenario_ids):
|
||||
|
|
@ -95,6 +95,18 @@ post_install_sap = post_install_sap[post_install_sap["default"]]
|
|||
# Sum up the sap points by property id
|
||||
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
|
||||
|
||||
# Find dupes by property id and measure type
|
||||
dupes = recommended_measures_df.duplicated(
|
||||
subset=["property_id", "measure_type"], keep=False
|
||||
)
|
||||
dupe_df = recommended_measures_df[dupes]
|
||||
|
||||
if dupe_df.shape:
|
||||
# Drop dupes - happened due to a funny bug
|
||||
recommended_measures_df = recommended_measures_df.drop_duplicates(
|
||||
subset=["property_id", "measure_type"], keep='first'
|
||||
)
|
||||
|
||||
recommendations_measures_pivot = recommended_measures_df.pivot(
|
||||
index='property_id',
|
||||
columns='measure_type',
|
||||
|
|
@ -131,10 +143,19 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3
|
|||
|
||||
# asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv')
|
||||
asset_list = read_excel_from_s3(
|
||||
bucket_name="retrofit-plan-inputs-dev", file_key='8/221/20250722T202328736Z/asset_list.xlsx',
|
||||
header_row=0, sheet_name="320 - edited"
|
||||
bucket_name="retrofit-plan-inputs-dev", file_key='2/388/20251208T203603925Z/asset_list.xlsx',
|
||||
header_row=0, sheet_name="Standardised Asset List"
|
||||
)
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
asset_list = asset_list.rename(
|
||||
columns={
|
||||
"postcode": "domna_postcode"
|
||||
}
|
||||
)
|
||||
if "domna_full_address":
|
||||
# For Peabody
|
||||
asset_list["domna_full_address"] = asset_list["domna_address_1"]
|
||||
|
||||
asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
|
||||
asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
|
||||
df["uprn"] = df["uprn"].astype(str)
|
||||
|
|
@ -179,9 +200,10 @@ asset_list = asset_list.merge(
|
|||
on="uprn"
|
||||
)
|
||||
|
||||
# For exporting NCHA
|
||||
# For exporting
|
||||
asset_list.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/320 Portfolio/asset_list_epc_b.xlsx",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
"Project/20251209_sample_package_data.xlsx",
|
||||
index=False
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue