mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
additional cleanup
This commit is contained in:
parent
308eb99afb
commit
a79c2852cc
1 changed files with 7 additions and 2 deletions
|
|
@ -12,6 +12,9 @@ from sklearn.inspection import permutation_importance
|
|||
|
||||
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
||||
from tqdm import tqdm
|
||||
from model_data.utils import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
with open("all_data.pkl", "rb") as f:
|
||||
all_data = pickle.load(f)
|
||||
|
|
@ -271,6 +274,7 @@ class SapModel:
|
|||
return model_data
|
||||
|
||||
def create_dataset(self):
|
||||
logger.info("Creating modelling dataset")
|
||||
model_data = self.df[[self.RESPONSE] + self.COMPONENT_FEATURES + self.BASE_FEATURES]
|
||||
model_data = model_data.reset_index(drop=True)
|
||||
model_data["idx"] = model_data.index.copy()
|
||||
|
|
@ -353,7 +357,6 @@ class SapModel:
|
|||
def fit_model(self):
|
||||
"""
|
||||
Main function to fit the model and produce accuracy metrics
|
||||
:return:
|
||||
"""
|
||||
|
||||
x = pd.get_dummies(self.model_data, columns=self.CATEGORICAL_COLS + self.BUCKET_VARIABLES, drop_first=True)
|
||||
|
|
@ -369,6 +372,7 @@ class SapModel:
|
|||
# Create the training and test sets for each run
|
||||
self.make_training_test(x)
|
||||
self.train_x, self.test_x = self.remove_zero_std_cols(self.train_x, self.test_x)
|
||||
logger.info("Detecting multi-collinearity in training dataset")
|
||||
self.detect_multi_collinearity()
|
||||
|
||||
# Add a constant to the independent value
|
||||
|
|
@ -379,6 +383,7 @@ class SapModel:
|
|||
train_x = train_x.drop(columns=["idx"])
|
||||
test_x = test_x.drop(columns=["idx"])
|
||||
|
||||
logger.info("Fitting testing model")
|
||||
# make regression model
|
||||
model = sm.OLS(self.train_y, train_x)
|
||||
# fit model and print results
|
||||
|
|
@ -429,7 +434,7 @@ class SapModel:
|
|||
# We're now ready to fit the final model
|
||||
# For the momeent, the pre-processing at the top of this function merely removes columns, so we
|
||||
# just need to remove the columns that were removed from the training data from the final model
|
||||
|
||||
logger.info("Fitting final model")
|
||||
x = sm.add_constant(x)
|
||||
y = x[self.RESPONSE]
|
||||
x = x[self.train_x.columns]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue