From df8bfd7d02481d00d016a7600b22a0e85a16cdd7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 4 Jul 2023 20:11:05 +0100 Subject: [PATCH] removed outlier testing but got some decent results binning some variables --- model_data/analysis/SapModel.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/model_data/analysis/SapModel.py b/model_data/analysis/SapModel.py index 190a4c36..e0911780 100644 --- a/model_data/analysis/SapModel.py +++ b/model_data/analysis/SapModel.py @@ -180,7 +180,8 @@ class SapModel: ) return df - bucket_variables = [] + bucket_variables = ["number-open-fireplaces", "fixed-lighting-outlets-count", 'extension-count', + 'multi-glaze-proportion', 'floor-height'] remaining_numericals = [x for x in self.NUMERICAL_COLUMNS if x not in bucket_variables] for col in bucket_variables: @@ -337,7 +338,8 @@ class SapModel: def fit_model(self): # Dummy out the categorical variables - binned = [] + binned = ["number-open-fireplaces", "fixed-lighting-outlets-count", 'extension-count', 'multi-glaze-proportion', + 'floor-height'] x = pd.get_dummies(self.model_data, columns=self.CATEGORICAL_COLS + binned, drop_first=True) @@ -420,13 +422,13 @@ class SapModel: ).sort_values("actual", ascending=True).merge(self.model_data[["idx", "property-type"]], on="idx") # temp hardcoded values - best_fit = {'MAPE': 0.04617542805587113, 'Mean Squared Error': 18.62306128026334, - 'Mean Absolute Error': 2.865262003625814, 'R2 Score': 0.8008316762496143, - 'Explained Variance Score': 0.8008316762496143, 'Median Absolute Error': 1.911197425417548} + best_fit = {'MAPE': 0.04646530042225876, 'Mean Squared Error': 18.635209563729763, + 'Mean Absolute Error': 2.856347408023325, 'R2 Score': 0.800701753826118, + 'Explained Variance Score': 0.800701753826118, 'Median Absolute Error': 1.9026758012120197} - best_predict = {'MAPE': 0.04358926901734807, 'Mean Squared Error': 21.197491698961528, - 'Mean Absolute Error': 3.046853690257838, 'R2 Score': 0.7215087343364782, - 'Explained Variance Score': 0.7215726927575035, 'Median Absolute Error': 1.921094388694634} + best_predict = {'MAPE': 0.04346083528432316, 'Mean Squared Error': 21.16036509335514, + 'Mean Absolute Error': 3.0440540802375833, 'R2 Score': 0.7219965012634312, + 'Explained Variance Score': 0.7220620137390414, 'Median Absolute Error': 1.9031967986967828} def check_successes(experiment_error, best_error): @@ -456,8 +458,8 @@ class SapModel: predict_success = check_successes(self.predict_error, best_predict) print(self.results.summary()) - self.model_data['fit'] = self.results.fittedvalues + # The worst errors over index heavily for flats self.worst["x"] = self.model_data[self.model_data.index.isin(self.worst["errors"].index)]