investigating dropping variables

This commit is contained in:
Khalim Conn-Kowlessar 2023-07-04 16:13:40 +01:00
parent ccfdb7cc8c
commit 58edd9a255

View file

@ -282,9 +282,13 @@ class SapModel:
train_x = train_x.drop(columns=["idx"])
test_x = test_x.drop(columns=["idx"])
importance_df = self.make_importance(train_x)
# importance_df = self.make_importance(train_x)
# Test dropping the least important features
to_drop = importance_df.tail(1)["Feature"].values
# to_drop = importance_df.tail(2)["Feature"].values
# Dropping this is a good idea
to_drop = [
"hotwater-description_Electric immersion, off-peak",
]
train_x = train_x.drop(columns=to_drop)
test_x = test_x[train_x.columns]
@ -353,6 +357,8 @@ class SapModel:
fit_success = check_successes(self.fit_error, best_fit)
predict_success = check_successes(self.predict_error, best_predict)
print(self.results.summary())
self.model_data['fit'] = self.results.fittedvalues
# The worst errors over index heavily for flats
self.worst["x"] = self.model_data[self.model_data.index.isin(self.worst["errors"].index)]