mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
testing rf importance and permutation importance
This commit is contained in:
parent
58edd9a255
commit
c59aff412c
1 changed files with 53 additions and 32 deletions
|
|
@ -156,6 +156,32 @@ class SapModel:
|
|||
|
||||
return model_data
|
||||
|
||||
@staticmethod
|
||||
def clean_missings(model_data):
|
||||
# Cleaning of energy-tariff and construction-age-band hurt prediction performance, indicating there is
|
||||
# potentially
|
||||
# a notable difference between a "" missing and a "NO DATA!" missing, worth differentiating
|
||||
|
||||
model_data["construction-age-band"].value_counts()
|
||||
|
||||
model_data["mechanical-ventilation"] = np.where(
|
||||
model_data["mechanical-ventilation"] == "", "NO DATA!", model_data["mechanical-ventilation"]
|
||||
)
|
||||
|
||||
model_data["solar-water-heating-flag"] = np.where(
|
||||
model_data["solar-water-heating-flag"] == "", "N", model_data["solar-water-heating-flag"]
|
||||
)
|
||||
|
||||
model_data["glazed-type"] = np.where(
|
||||
model_data["glazed-type"] == "", "NO DATA!", model_data["glazed-type"]
|
||||
)
|
||||
|
||||
model_data["glazed-area"] = np.where(
|
||||
model_data["glazed-area"] == "", "NO DATA!", model_data["glazed-type"]
|
||||
)
|
||||
|
||||
return model_data
|
||||
|
||||
def create_dataset(self):
|
||||
model_data = self.df[[self.RESPONSE] + self.COMPONENT_FEATURES + self.BASE_FEATURES]
|
||||
model_data = model_data.reset_index(drop=True)
|
||||
|
|
@ -164,38 +190,7 @@ class SapModel:
|
|||
# Append on u-values
|
||||
model_data = self._append_cleaned_data(model_data)
|
||||
|
||||
def clean_missings(model_data):
|
||||
CLEANING_COLS = ["mechanical-ventilation", "energy-tariff", "solar-water-heating-flag", "glazed-type", ""]
|
||||
model_data["construction-age-band"].value_counts()
|
||||
|
||||
model_data["mechanical-ventilation"] = np.where(
|
||||
model_data["mechanical-ventilation"] == "", "NO DATA!", model_data["mechanical-ventilation"]
|
||||
)
|
||||
|
||||
# REVIEW THIS
|
||||
# model_data["energy-tariff"] = np.where(
|
||||
# model_data["energy-tariff"] == "", "Unknown", model_data["mechanical-ventilation"]
|
||||
# )
|
||||
#
|
||||
model_data["solar-water-heating-flag"] = np.where(
|
||||
model_data["solar-water-heating-flag"] == "", "N", model_data["solar-water-heating-flag"]
|
||||
)
|
||||
|
||||
model_data["glazed-type"] = np.where(
|
||||
model_data["glazed-type"] == "", "NO DATA!", model_data["glazed-type"]
|
||||
)
|
||||
|
||||
model_data["glazed-area"] = np.where(
|
||||
model_data["glazed-area"] == "", "NO DATA!", model_data["glazed-type"]
|
||||
)
|
||||
|
||||
# model_data["construction-age-band"] = np.where(
|
||||
# model_data["construction-age-band"] == "", "NO DATA!", model_data["construction-age-band"]
|
||||
# )
|
||||
|
||||
return model_data
|
||||
|
||||
model_data = clean_missings(model_data)
|
||||
model_data = self.clean_missings(model_data)
|
||||
|
||||
# Convert transaction_type
|
||||
model_data = self._convert_transaction_type(model_data)
|
||||
|
|
@ -292,6 +287,32 @@ class SapModel:
|
|||
train_x = train_x.drop(columns=to_drop)
|
||||
test_x = test_x[train_x.columns]
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.inspection import permutation_importance
|
||||
|
||||
rf = RandomForestRegressor(random_state=self.random_state)
|
||||
rf.fit(train_x, self.train_y)
|
||||
|
||||
# Print the name and importance of each feature
|
||||
importance_df = []
|
||||
for feature, importance in zip(train_x.columns, rf.feature_importances_):
|
||||
importance_df.append(
|
||||
{
|
||||
"Feature": feature,
|
||||
"rf_importance": importance
|
||||
}
|
||||
)
|
||||
importance_df = pd.DataFrame(importance_df)
|
||||
importance_df = importance_df.sort_values(by="rf_importance", ascending=False)
|
||||
|
||||
perm_importance = permutation_importance(rf, test_x, self.test_y, scoring='neg_mean_squared_error')
|
||||
perm_importance_df = pd.DataFrame(
|
||||
{
|
||||
"Feature": test_x.columns,
|
||||
"perm_importance": perm_importance.importances_mean
|
||||
}
|
||||
).sort_values(by="perm_importance", ascending=False)
|
||||
|
||||
# make regression model
|
||||
model = sm.OLS(self.train_y, train_x)
|
||||
# fit model and print results
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue