removed outlier testing but got some decent results binning some variables

This commit is contained in:
Khalim Conn-Kowlessar 2023-07-04 20:11:05 +01:00
parent 0b41057db8
commit df8bfd7d02

View file

@ -180,7 +180,8 @@ class SapModel:
)
return df
bucket_variables = []
bucket_variables = ["number-open-fireplaces", "fixed-lighting-outlets-count", 'extension-count',
'multi-glaze-proportion', 'floor-height']
remaining_numericals = [x for x in self.NUMERICAL_COLUMNS if x not in bucket_variables]
for col in bucket_variables:
@ -337,7 +338,8 @@ class SapModel:
def fit_model(self):
# Dummy out the categorical variables
binned = []
binned = ["number-open-fireplaces", "fixed-lighting-outlets-count", 'extension-count', 'multi-glaze-proportion',
'floor-height']
x = pd.get_dummies(self.model_data, columns=self.CATEGORICAL_COLS + binned, drop_first=True)
@ -420,13 +422,13 @@ class SapModel:
).sort_values("actual", ascending=True).merge(self.model_data[["idx", "property-type"]], on="idx")
# temp hardcoded values
best_fit = {'MAPE': 0.04617542805587113, 'Mean Squared Error': 18.62306128026334,
'Mean Absolute Error': 2.865262003625814, 'R2 Score': 0.8008316762496143,
'Explained Variance Score': 0.8008316762496143, 'Median Absolute Error': 1.911197425417548}
best_fit = {'MAPE': 0.04646530042225876, 'Mean Squared Error': 18.635209563729763,
'Mean Absolute Error': 2.856347408023325, 'R2 Score': 0.800701753826118,
'Explained Variance Score': 0.800701753826118, 'Median Absolute Error': 1.9026758012120197}
best_predict = {'MAPE': 0.04358926901734807, 'Mean Squared Error': 21.197491698961528,
'Mean Absolute Error': 3.046853690257838, 'R2 Score': 0.7215087343364782,
'Explained Variance Score': 0.7215726927575035, 'Median Absolute Error': 1.921094388694634}
best_predict = {'MAPE': 0.04346083528432316, 'Mean Squared Error': 21.16036509335514,
'Mean Absolute Error': 3.0440540802375833, 'R2 Score': 0.7219965012634312,
'Explained Variance Score': 0.7220620137390414, 'Median Absolute Error': 1.9031967986967828}
def check_successes(experiment_error, best_error):
@ -456,8 +458,8 @@ class SapModel:
predict_success = check_successes(self.predict_error, best_predict)
print(self.results.summary())
self.model_data['fit'] = self.results.fittedvalues
# The worst errors over index heavily for flats
self.worst["x"] = self.model_data[self.model_data.index.isin(self.worst["errors"].index)]