diff --git a/model_data/analysis/SapModel.py b/model_data/analysis/SapModel.py index 0336125a..3b177fa9 100644 --- a/model_data/analysis/SapModel.py +++ b/model_data/analysis/SapModel.py @@ -12,6 +12,9 @@ from sklearn.inspection import permutation_importance from statsmodels.stats.outliers_influence import variance_inflation_factor from tqdm import tqdm +from model_data.utils import setup_logger + +logger = setup_logger() with open("all_data.pkl", "rb") as f: all_data = pickle.load(f) @@ -271,6 +274,7 @@ class SapModel: return model_data def create_dataset(self): + logger.info("Creating modelling dataset") model_data = self.df[[self.RESPONSE] + self.COMPONENT_FEATURES + self.BASE_FEATURES] model_data = model_data.reset_index(drop=True) model_data["idx"] = model_data.index.copy() @@ -353,7 +357,6 @@ class SapModel: def fit_model(self): """ Main function to fit the model and produce accuracy metrics - :return: """ x = pd.get_dummies(self.model_data, columns=self.CATEGORICAL_COLS + self.BUCKET_VARIABLES, drop_first=True) @@ -369,6 +372,7 @@ class SapModel: # Create the training and test sets for each run self.make_training_test(x) self.train_x, self.test_x = self.remove_zero_std_cols(self.train_x, self.test_x) + logger.info("Detecting multi-collinearity in training dataset") self.detect_multi_collinearity() # Add a constant to the independent value @@ -379,6 +383,7 @@ class SapModel: train_x = train_x.drop(columns=["idx"]) test_x = test_x.drop(columns=["idx"]) + logger.info("Fitting testing model") # make regression model model = sm.OLS(self.train_y, train_x) # fit model and print results @@ -429,7 +434,7 @@ class SapModel: # We're now ready to fit the final model # For the momeent, the pre-processing at the top of this function merely removes columns, so we # just need to remove the columns that were removed from the training data from the final model - + logger.info("Fitting final model") x = sm.add_constant(x) y = x[self.RESPONSE] x = x[self.train_x.columns]