diff --git a/model_data/analysis/SapModel.py b/model_data/analysis/SapModel.py index 87b71cf6..45ffd530 100644 --- a/model_data/analysis/SapModel.py +++ b/model_data/analysis/SapModel.py @@ -9,7 +9,6 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, e median_absolute_error, mean_absolute_percentage_error from sklearn.linear_model import Lasso from sklearn.preprocessing import StandardScaler -from sklearn.linear_model import LinearRegression import xgboost as xgb with open("all_data.pkl", "rb") as f: @@ -151,7 +150,7 @@ class SapModel: for col in ["photo-supply", "multi-glaze-proportion", "low-energy-lighting", "number-open-fireplaces"]: model_data[col] = np.where( - model_data[col] == "", "0", model_data["photo-supply"] + model_data[col] == "", "0", model_data[col] ).astype(float) return model_data @@ -343,13 +342,13 @@ class SapModel: ).sort_values("actual", ascending=True).merge(self.model_data[["idx", "property-type"]], on="idx") # temp hardcoded values - best_fit = {'MAPE': 0.042768242654695386, 'Mean Squared Error': 21.606875710236896, - 'Mean Absolute Error': 3.293776606279645, 'R2 Score': 0.7930242722318233, - 'Explained Variance Score': 0.7930242722318233, 'Median Absolute Error': 2.47686604239054} + best_fit = {'MAPE': 0.042824355225087686, 'Mean Squared Error': 21.49263731368226, + 'Mean Absolute Error': 3.298755911054327, 'R2 Score': 0.794118580154128, + 'Explained Variance Score': 0.794118580154128, 'Median Absolute Error': 2.426789554039914} - best_predict = {'MAPE': 0.04397538047202114, 'Mean Squared Error': 22.582856696398935, - 'Mean Absolute Error': 3.384549163877968, 'R2 Score': 0.7515887251149801, - 'Explained Variance Score': 0.7516508219403573, 'Median Absolute Error': 2.4624472128668344} + best_predict = {'MAPE': 0.04413439429441669, 'Mean Squared Error': 22.700373062051142, + 'Mean Absolute Error': 3.3961241443022008, 'R2 Score': 0.750296045867001, + 'Explained Variance Score': 0.7503518147827141, 'Median Absolute Error': 2.4442017110145855} def check_successes(experiment_error, best_error): @@ -641,3 +640,25 @@ self = SapModel( data=all_data["data"], cleaner=all_data["cleaner"] ) + +from sklearn.ensemble import RandomForestRegressor + +rf = RandomForestRegressor(random_state=self.random_state) +X = self.df.drop(columns=self.RESPONSE) +for col in ["photo-supply", "multi-glaze-proportion", "low-energy-lighting", "number-open-fireplaces"]: + X[col] = np.where( + X[col] == "", "0", X[col] + ).astype(float) + +Y = self.df[self.RESPONSE] +rf.fit(X, Y) + +# Print the name and importance of each feature +importance_df = [] +for feature, importance in zip(train_x.columns, rf.feature_importances_): + importance_df.append( + { + "Feature": feature, + "rf_importance": importance + } + )