diff --git a/modules/ml-pipeline/src/pipeline/build_model.py b/modules/ml-pipeline/src/pipeline/build_model.py index 9f88dbd..519acd5 100644 --- a/modules/ml-pipeline/src/pipeline/build_model.py +++ b/modules/ml-pipeline/src/pipeline/build_model.py @@ -15,6 +15,7 @@ from core.interface.InterfaceDataClient import DataClient from core.DataClient import dataclient_factory from core.MLModels import model_factory from core.MLMetrics import metrics_factory +from configs.post_prediction_logic import post_prediction_logic RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local") @@ -74,7 +75,9 @@ def build_model( prediction_data = train_data.drop(columns=target) - fit_predictions = model.predict(data=prediction_data) + fit_predictions = model.predict( + data=prediction_data, post_prediction_logic=post_prediction_logic + ) logger.info("------------------------------") logger.info("--- Generating fit metrics ---") diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index 5e870b8..e0cc3d3 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -11,6 +11,6 @@ AutogluonAutoML: output_filepath: ./data/model/autogluonmodel/ problem_type: regression eval_metric: mean_absolute_error - time_limit: 60 + time_limit: 600 presets: medium_quality excluded_model_types: ['KNN'] diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml b/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml index ac75080..74d1823 100644 --- a/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml @@ -3,7 +3,7 @@ feature_processor_config: subsample_amount: null subsample_seed: 0 target: SAP_ENDING - drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE"] + drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE", "HEAT_DEMAND_ENDING", "CARBON_ENDING"] # retain_features: ["TOTAL_FLOOR_AREA_STARTING", "SAP_STARTING", "HEAT_DEMAND_STARTING", "CARBON_STARTING", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", "PHOTO_SUPPLY_STARTING", "MULTI_GLAZE_PROPORTION_STARTING", "LOW_ENERGY_LIGHTING_STARTING", "NUMBER_OPEN_FIREPLACES_STARTING", "EXTENSION_COUNT_STARTING", "FLOOR_HEIGHT_STARTING", "PHOTO_SUPPLY_ENDING", "MULTI_GLAZE_PROPORTION_ENDING", "LOW_ENERGY_LIGHTING_ENDING", "NUMBER_OPEN_FIREPLACES_ENDING", "EXTENSION_COUNT_ENDING", "TOTAL_FLOOR_AREA_ENDING", "FLOOR_HEIGHT_ENDING", "DAYS_TO_STARTING", "DAYS_TO_ENDING"] # retain_features: null # retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS', @@ -32,29 +32,30 @@ feature_processor_config: # 'DAYS_TO_STARTING', # 'WALLS_DESCRIPTION_STARTING', # 'FLOOR_DESCRIPTION_STARTING'] - retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS', - 'NUMBER_HEATED_ROOMS', - 'FIXED_LIGHTING_OUTLETS_COUNT', - 'CONSTRUCTION_AGE_BAND', - 'TRANSACTION_TYPE_ENDING', - 'LIGHTING_DESCRIPTION_ENDING', - 'MAINHEAT_DESCRIPTION_ENDING', - 'HOTWATER_DESCRIPTION_ENDING', - 'MAIN_FUEL_ENDING', - 'MECHANICAL_VENTILATION_ENDING', - 'SECONDHEAT_DESCRIPTION_ENDING', - 'ENERGY_TARIFF_ENDING', - 'SOLAR_WATER_HEATING_FLAG_ENDING', - 'PHOTO_SUPPLY_ENDING', - 'WINDOWS_DESCRIPTION_ENDING', - 'GLAZED_TYPE_ENDING', - 'MULTI_GLAZE_PROPORTION_ENDING', - 'LOW_ENERGY_LIGHTING_ENDING', - 'NUMBER_OPEN_FIREPLACES_ENDING', - 'MAINHEATCONT_DESCRIPTION_ENDING', - 'EXTENSION_COUNT_ENDING', - 'TOTAL_FLOOR_AREA_ENDING', - 'FLOOR_HEIGHT_ENDING', - 'DAYS_TO_ENDING', -'WALLS_DESCRIPTION_ENDING', -'FLOOR_DESCRIPTION_ENDING'] +# retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS', +# 'NUMBER_HEATED_ROOMS', +# 'FIXED_LIGHTING_OUTLETS_COUNT', +# 'CONSTRUCTION_AGE_BAND', +# 'TRANSACTION_TYPE_ENDING', +# 'LIGHTING_DESCRIPTION_ENDING', +# 'MAINHEAT_DESCRIPTION_ENDING', +# 'HOTWATER_DESCRIPTION_ENDING', +# 'MAIN_FUEL_ENDING', +# 'MECHANICAL_VENTILATION_ENDING', +# 'SECONDHEAT_DESCRIPTION_ENDING', +# 'ENERGY_TARIFF_ENDING', +# 'SOLAR_WATER_HEATING_FLAG_ENDING', +# 'PHOTO_SUPPLY_ENDING', +# 'WINDOWS_DESCRIPTION_ENDING', +# 'GLAZED_TYPE_ENDING', +# 'MULTI_GLAZE_PROPORTION_ENDING', +# 'LOW_ENERGY_LIGHTING_ENDING', +# 'NUMBER_OPEN_FIREPLACES_ENDING', +# 'MAINHEATCONT_DESCRIPTION_ENDING', +# 'EXTENSION_COUNT_ENDING', +# 'TOTAL_FLOOR_AREA_ENDING', +# 'FLOOR_HEIGHT_ENDING', +# 'DAYS_TO_ENDING', +# 'WALLS_DESCRIPTION_ENDING', +# 'FLOOR_DESCRIPTION_ENDING'] + retain_features: null diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py index 180d3a9..1f97005 100644 --- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py @@ -5,16 +5,40 @@ During the feature processor step, we can apply additional business logic and fe """ Business Logic dict + functions """ -business_logic = {} + + +def remove_starting_columns(df): + keep_column_index = [ + False if col_name.endswith("_STARTING") else True + for col_name in list(df.columns) + ] + keep_columns = df.columns[keep_column_index].to_list() + keep_columns.append("SAP_STARTING") + df = df[keep_columns] + return df + + +# def keep_ending_columns(df): +# ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)] +# keep_columns = df.columns[ending_column_index].to_list() +# keep_columns.append("SAP_STARTING") +# print(keep_columns) +# df = df[keep_columns] +# return df + +business_logic = { + "remove_starting_columns": remove_starting_columns + # "keep_ENDING_COLUMNS": keep_ending_columns +} """ New features dict + function """ -def SAP_ENDING(df): - return df["SAP_STARTING"] + df["RDSAP_CHANGE"] +# def SAP_ENDING(df): +# return df["SAP_STARTING"] + df["RDSAP_CHANGE"] -new_feature_funcs = {"SAP_ENDING": SAP_ENDING} -# new_feature_funcs = {} +# new_feature_funcs = {"SAP_ENDING": SAP_ENDING} +new_feature_funcs = {} diff --git a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py new file mode 100644 index 0000000..95cb293 --- /dev/null +++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py @@ -0,0 +1,32 @@ +""" +After predictions, we may want to apply some post processing to the predictions +""" +import pandas as pd + + +def clip_predictions_to_minimum_value( + data: pd.DataFrame, predictions: pd.Series, minimum_value: int = 1 +) -> pd.Series: + + series_name = predictions.name + predictions.name = "predictions" + predictions_df = pd.concat([data, predictions], axis=1) + replace_index = predictions_df["SAP_STARTING"] > predictions_df["predictions"] + predictions_df.loc[replace_index, "predictions"] = ( + predictions_df.loc[replace_index, "SAP_STARTING"] + minimum_value + ) + + predictions_new = predictions_df["predictions"] + predictions_new.name = series_name + return predictions_new + + +# def round_predictions(data: pd.DataFrame, predictions: pd.Series) -> pd.Series: + +# return predictions.round() + + +post_prediction_logic = { + "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value, + # "round_predictions": round_predictions +} diff --git a/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml b/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml index bed35d9..52fd5fc 100644 --- a/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml @@ -1 +1,4 @@ dataclient_type: local +nshap_samples: 100 # how many samples to use to approximate each Shapely value, larger values will be slower +n_val: 30 # how many datapoints from validation data should we interpret predictions for, larger values will be slower +row_index: [0, 10, 20] # index of an example datapoint diff --git a/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml index b7a5670..bbcf3f8 100644 --- a/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml @@ -1,6 +1,7 @@ input_dataclient_type: aws-s3 output_dataclient_type: local -data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet +# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet +data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet train_proportion: 0.9 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py b/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py index c8c9a4e..dd020a4 100644 --- a/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py +++ b/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py @@ -109,7 +109,9 @@ class DataFrameFeatureProcessor: # TODO: to test for key, value in new_feature_funcs.items(): - df[key] = value(df) + key_column = value(df) + key_column.name = key + df = pd.concat([df, key_column], axis=1) return df diff --git a/modules/ml-pipeline/src/pipeline/core/MLModels.py b/modules/ml-pipeline/src/pipeline/core/MLModels.py index 2c237ba..4d9a9e9 100644 --- a/modules/ml-pipeline/src/pipeline/core/MLModels.py +++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py @@ -75,7 +75,9 @@ class SKLearnLinearRegression: y_train = data[target] self.model.fit(x_train, y_train) - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict( + self, data: pd.DataFrame, post_prediction_logic: dict | None = None + ) -> pd.Series: """ Method to predict """ @@ -128,7 +130,9 @@ class SKLearnSVMRegression: y_train = data[target] self.model.fit(x_train, y_train) - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict( + self, data: pd.DataFrame, post_prediction_logic: dict | None = None + ) -> pd.Series: """ Method to predict """ @@ -197,15 +201,39 @@ class AutogluonAutoML: excluded_model_types=model_hyperparameters["excluded_model_types"], ) - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict( + self, data: pd.DataFrame, post_prediction_logic: dict | None = None + ) -> pd.Series: """ Method to predict """ + if post_prediction_logic is None: + post_prediction_logic = {} + if self.model is None: print("No model loaded/ trained") exit(1) predictions = pd.Series(self.model.predict(data)) + if len(post_prediction_logic) != 0: + predictions = self._apply_post_prediction_logic( + data=data, + predictions=predictions, + post_prediction_logic=post_prediction_logic, + ) + + return predictions + + def _apply_post_prediction_logic( + self, data: pd.DataFrame, predictions: pd.Series, post_prediction_logic: dict + ): + """ + For predictions, we can apply post processing logic to clean up predictions + """ + + for _, value in post_prediction_logic.items(): + predictions = value(data, predictions) + return predictions diff --git a/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py b/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py index aae3689..ecbc24a 100644 --- a/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py +++ b/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py @@ -32,7 +32,9 @@ class MLModel(Protocol): """ ... - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict( + self, data: pd.DataFrame, post_prediction_logic: dict | None + ) -> pd.Series: """ Method to predict """ diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 5f143c3..6197fe7 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -15,20 +15,20 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: c183712d22ab739e0be016724f44ee1c.dir - size: 12203729 + md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir + size: 21102167 nfiles: 2 build_model: cmd: python build_model.py deps: - path: build_model.py hash: md5 - md5: f9fa2a66d908b42ae196ce6f0f782258 - size: 5134 + md5: 84b86e829cb164fb2a202033f39e66e8 + size: 5243 - path: data/prepared_data hash: md5 - md5: c183712d22ab739e0be016724f44ee1c.dir - size: 12203729 + md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir + size: 21102167 nfiles: 2 params: configs/build_model.yaml: @@ -36,7 +36,7 @@ stages: output_filepath: ./data/model/autogluonmodel/ problem_type: regression eval_metric: mean_absolute_error - time_limit: 60 + time_limit: 600 presets: medium_quality excluded_model_types: - KNN @@ -49,30 +49,30 @@ stages: outs: - path: data/model/ hash: md5 - md5: cb03448b572cb167bf281ee8d43dccd9.dir - size: 99423757 - nfiles: 14 + md5: d9b051bb9cc626b4fc4b77873838f029.dir + size: 242877007 + nfiles: 18 - path: metrics/fit_metrics.json hash: md5 - md5: 48d9cc86c22c1ac0da8903a32a7d10c3 - size: 183 + md5: bbf8a1bb90cd8d9fea447ca97fe8eea3 + size: 180 generate_predictions: cmd: python generate_predictions.py deps: - path: data/model hash: md5 - md5: cb03448b572cb167bf281ee8d43dccd9.dir - size: 99423757 - nfiles: 14 + md5: d9b051bb9cc626b4fc4b77873838f029.dir + size: 242877007 + nfiles: 18 - path: data/prepared_data hash: md5 - md5: c183712d22ab739e0be016724f44ee1c.dir - size: 12203729 + md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir + size: 21102167 nfiles: 2 - path: generate_predictions.py hash: md5 - md5: a25c4611ff467cdc1c921918112a30fe - size: 4311 + md5: 20c4657f5872cb8b60b69344600251b8 + size: 4420 params: configs/generate_predictions.yaml: input_dataclient_type: local @@ -83,21 +83,21 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 3d5002f0eecd2374a0ef2fd6f711503e.dir - size: 383878 + md5: 81f707df70bc0d9f7b305427e0034ed1.dir + size: 383598 nfiles: 1 generate_metrics: cmd: python generate_metrics.py deps: - path: data/predictions hash: md5 - md5: 3d5002f0eecd2374a0ef2fd6f711503e.dir - size: 383878 + md5: 81f707df70bc0d9f7b305427e0034ed1.dir + size: 383598 nfiles: 1 - path: data/prepared_data hash: md5 - md5: c183712d22ab739e0be016724f44ee1c.dir - size: 12203729 + md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir + size: 21102167 nfiles: 2 - path: generate_metrics.py hash: md5 @@ -111,8 +111,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 08a81d2e5cecf360043498526bc98314 - size: 183 + md5: 75baa77d94386c9a567afdac48384435 + size: 185 startup_cleanup: cmd: python startup_cleanup.py deps: diff --git a/modules/ml-pipeline/src/pipeline/generate_predictions.py b/modules/ml-pipeline/src/pipeline/generate_predictions.py index 490d7e9..0e0bd9b 100644 --- a/modules/ml-pipeline/src/pipeline/generate_predictions.py +++ b/modules/ml-pipeline/src/pipeline/generate_predictions.py @@ -12,6 +12,7 @@ from core.interface.InterfaceDataClient import DataClient from core.DataClient import dataclient_factory from core.MLModels import model_factory from core.Logger import logger +from configs.post_prediction_logic import post_prediction_logic RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local") @@ -70,7 +71,9 @@ def generate_predictions( test_data.drop(columns=target) if target in test_data.columns else test_data ) - predictions = model.predict(data=prediction_data) + predictions = model.predict( + data=prediction_data, post_prediction_logic=post_prediction_logic + ) logger.info("--------------------------") logger.info("--- Saving predictions ---") diff --git a/modules/ml-pipeline/src/pipeline/prediction_analysis.py b/modules/ml-pipeline/src/pipeline/prediction_analysis.py index 7f9f9a1..8947916 100644 --- a/modules/ml-pipeline/src/pipeline/prediction_analysis.py +++ b/modules/ml-pipeline/src/pipeline/prediction_analysis.py @@ -14,6 +14,7 @@ shap.initjs() import yaml +from typing import List from pathlib import Path from core.interface.InterfaceModels import MLModel from core.interface.InterfaceDataClient import DataClient @@ -36,6 +37,11 @@ feature_process_params = yaml.safe_load(open(feature_process_path)) build_model_path = Path(__file__).parent / "configs" / "build_model.yaml" build_model_params = yaml.safe_load(open(build_model_path)) +generate_predictions_path = ( + Path(__file__).parent / "configs" / "generate_predictions.yaml" +) +generate_predictions_params = yaml.safe_load(open(generate_predictions_path)) + prediction_analysis_path = ( Path(__file__).parent / "configs" / "prediction_analysis.yaml" ) @@ -50,29 +56,40 @@ dataclient = dataclient_factory( dataclient_config=client_params[dataclient_type], ) +target = feature_process_params["feature_processor_config"]["target"] +predictions_column_name = generate_predictions_params["predictions_column_name"] + output_test_filepath = prepare_data_params["output_test_filepath"] +predictions_output_filepath = generate_predictions_params["predictions_output_filepath"] + +nshap_samples = prediction_analysis_params["nshap_samples"] +row_index = prediction_analysis_params["row_index"] + def prediction_analysis( - model: MLModel, dataclient: DataClient, output_test_filepath: str + model: MLModel, + dataclient: DataClient, + target: str, + predictions_column_name: str, + output_test_filepath: str, + predictions_output_filepath: str, + nshap_samples: int, + row_index: List[int], ): test_df = dataclient.load_data(output_test_filepath) - predictions = dataclient.load_data("./data/predictions/predictions.parquet") + predictions = dataclient.load_data(predictions_output_filepath) - mix_df = test_df.copy() - mix_df["predictions"] = predictions - mix_df["residual"] = abs(mix_df["predictions"] - mix_df["SAP_ENDING"]) + mix_df = pd.concat([test_df.copy(), predictions], axis=1) + mix_df["residual"] = abs(mix_df[predictions_column_name] - mix_df[target]) mix_df = mix_df.sort_values("residual", ascending=False) - target = "SAP_ENDING" test_df_without_target = test_df.drop(columns=[target]) - # test_df_summary = shap.kmeans(test_df, 10) - # print("Baseline feature-values: \n", test_df_summary) - class AutogluonWrapper: - def __init__(self, predictor, feature_names): - self.ag_model = predictor + class ModelWrapper: + def __init__(self, model, feature_names): + self.model = model self.feature_names = feature_names def predict(self, X): @@ -80,33 +97,39 @@ def prediction_analysis( X = X.values.reshape(1, -1) if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X, columns=self.feature_names) - return self.ag_model.predict(X) + return self.model.predict(X) + + model_wrapper = ModelWrapper(model, feature_names=test_df_without_target.columns) - ag_wrapper = AutogluonWrapper( - model.model, feature_names=test_df_without_target.columns - ) explainer = shap.KernelExplainer( - ag_wrapper.predict, test_df_without_target.head(100) + model_wrapper.predict, test_df_without_target.head(100) ) - NSHAP_SAMPLES = 100 # how many samples to use to approximate each Shapely value, larger values will be slower - N_VAL = 30 # how many datapoints from validation data should we interpret predictions for, larger values will be slower + shap_predictions_df = pd.DataFrame(index=test_df_without_target.columns) + for index in row_index: + single_datapoint = test_df_without_target.iloc[[index]] + # single_prediction = model_wrapper.predict(single_datapoint) + shap_values_single = explainer.shap_values( + single_datapoint, nsamples=nshap_samples + ) + shap.force_plot( + explainer.expected_value, + shap_values_single, + test_df_without_target.iloc[index, :], + ) + shap_single_prediction_df = pd.DataFrame( + shap_values_single, columns=test_df_without_target.columns + ).T + shap_single_prediction_df.columns = [index] + shap_single_prediction_df = shap_single_prediction_df.sort_values(index) + shap_predictions_df = pd.merge( + left=shap_predictions_df, + right=shap_single_prediction_df, + left_index=True, + right_index=True, + ) - ROW_INDEX = 8541 # 23690 #21059 # index of an example datapoint - single_datapoint = test_df_without_target.iloc[[ROW_INDEX]] - single_prediction = ag_wrapper.predict(single_datapoint) - - shap_values_single = explainer.shap_values(single_datapoint, nsamples=NSHAP_SAMPLES) - shap.force_plot( - explainer.expected_value, - shap_values_single, - test_df_without_target.iloc[ROW_INDEX, :], - ) - shap_single_prediciton_df = pd.DataFrame( - shap_values_single, columns=test_df_without_target.columns - ).T - shap_single_prediciton_df.columns = ["contribution"] - shap_single_prediciton_df = shap_single_prediciton_df.sort_values("contribution") + return shap_predictions_df if __name__ == "__main__": @@ -116,7 +139,13 @@ if __name__ == "__main__": logger.info("----------------------------") prediction_analysis( - model=model, dataclient=dataclient, output_test_filepath=output_test_filepath + model=model, + dataclient=dataclient, + target=target, + predictions_column_name=predictions_column_name, + output_test_filepath=output_test_filepath, + nshap_samples=nshap_samples, + row_index=row_index, ) logger.info("-------------------------------")