mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
had a silly bug in cleaning code, which is fixed
This commit is contained in:
parent
c59aff412c
commit
68e903e492
1 changed files with 29 additions and 8 deletions
|
|
@ -9,7 +9,6 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, e
|
|||
median_absolute_error, mean_absolute_percentage_error
|
||||
from sklearn.linear_model import Lasso
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.linear_model import LinearRegression
|
||||
import xgboost as xgb
|
||||
|
||||
with open("all_data.pkl", "rb") as f:
|
||||
|
|
@ -151,7 +150,7 @@ class SapModel:
|
|||
|
||||
for col in ["photo-supply", "multi-glaze-proportion", "low-energy-lighting", "number-open-fireplaces"]:
|
||||
model_data[col] = np.where(
|
||||
model_data[col] == "", "0", model_data["photo-supply"]
|
||||
model_data[col] == "", "0", model_data[col]
|
||||
).astype(float)
|
||||
|
||||
return model_data
|
||||
|
|
@ -343,13 +342,13 @@ class SapModel:
|
|||
).sort_values("actual", ascending=True).merge(self.model_data[["idx", "property-type"]], on="idx")
|
||||
|
||||
# temp hardcoded values
|
||||
best_fit = {'MAPE': 0.042768242654695386, 'Mean Squared Error': 21.606875710236896,
|
||||
'Mean Absolute Error': 3.293776606279645, 'R2 Score': 0.7930242722318233,
|
||||
'Explained Variance Score': 0.7930242722318233, 'Median Absolute Error': 2.47686604239054}
|
||||
best_fit = {'MAPE': 0.042824355225087686, 'Mean Squared Error': 21.49263731368226,
|
||||
'Mean Absolute Error': 3.298755911054327, 'R2 Score': 0.794118580154128,
|
||||
'Explained Variance Score': 0.794118580154128, 'Median Absolute Error': 2.426789554039914}
|
||||
|
||||
best_predict = {'MAPE': 0.04397538047202114, 'Mean Squared Error': 22.582856696398935,
|
||||
'Mean Absolute Error': 3.384549163877968, 'R2 Score': 0.7515887251149801,
|
||||
'Explained Variance Score': 0.7516508219403573, 'Median Absolute Error': 2.4624472128668344}
|
||||
best_predict = {'MAPE': 0.04413439429441669, 'Mean Squared Error': 22.700373062051142,
|
||||
'Mean Absolute Error': 3.3961241443022008, 'R2 Score': 0.750296045867001,
|
||||
'Explained Variance Score': 0.7503518147827141, 'Median Absolute Error': 2.4442017110145855}
|
||||
|
||||
def check_successes(experiment_error, best_error):
|
||||
|
||||
|
|
@ -641,3 +640,25 @@ self = SapModel(
|
|||
data=all_data["data"],
|
||||
cleaner=all_data["cleaner"]
|
||||
)
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
|
||||
rf = RandomForestRegressor(random_state=self.random_state)
|
||||
X = self.df.drop(columns=self.RESPONSE)
|
||||
for col in ["photo-supply", "multi-glaze-proportion", "low-energy-lighting", "number-open-fireplaces"]:
|
||||
X[col] = np.where(
|
||||
X[col] == "", "0", X[col]
|
||||
).astype(float)
|
||||
|
||||
Y = self.df[self.RESPONSE]
|
||||
rf.fit(X, Y)
|
||||
|
||||
# Print the name and importance of each feature
|
||||
importance_df = []
|
||||
for feature, importance in zip(train_x.columns, rf.feature_importances_):
|
||||
importance_df.append(
|
||||
{
|
||||
"Feature": feature,
|
||||
"rf_importance": importance
|
||||
}
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue