diff --git a/modules/ml-pipeline/src/pipeline/build_model.py b/modules/ml-pipeline/src/pipeline/build_model.py
index 9f88dbd..519acd5 100644
--- a/modules/ml-pipeline/src/pipeline/build_model.py
+++ b/modules/ml-pipeline/src/pipeline/build_model.py
@@ -15,6 +15,7 @@ from core.interface.InterfaceDataClient import DataClient
 from core.DataClient import dataclient_factory
 from core.MLModels import model_factory
 from core.MLMetrics import metrics_factory
+from configs.post_prediction_logic import post_prediction_logic
 
 
 RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@@ -74,7 +75,9 @@ def build_model(
 
     prediction_data = train_data.drop(columns=target)
 
-    fit_predictions = model.predict(data=prediction_data)
+    fit_predictions = model.predict(
+        data=prediction_data, post_prediction_logic=post_prediction_logic
+    )
 
     logger.info("------------------------------")
     logger.info("--- Generating fit metrics ---")
diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index 5e870b8..e0cc3d3 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -11,6 +11,6 @@ AutogluonAutoML:
   output_filepath: ./data/model/autogluonmodel/
   problem_type: regression
   eval_metric: mean_absolute_error
-  time_limit: 60
+  time_limit: 600
   presets: medium_quality
   excluded_model_types: ['KNN']
diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml b/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml
index ac75080..74d1823 100644
--- a/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml
@@ -3,7 +3,7 @@ feature_processor_config:
   subsample_amount: null
   subsample_seed: 0
   target: SAP_ENDING
-  drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE"]
+  drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]
   # retain_features: ["TOTAL_FLOOR_AREA_STARTING", "SAP_STARTING", "HEAT_DEMAND_STARTING", "CARBON_STARTING", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", "PHOTO_SUPPLY_STARTING", "MULTI_GLAZE_PROPORTION_STARTING", "LOW_ENERGY_LIGHTING_STARTING", "NUMBER_OPEN_FIREPLACES_STARTING", "EXTENSION_COUNT_STARTING", "FLOOR_HEIGHT_STARTING", "PHOTO_SUPPLY_ENDING", "MULTI_GLAZE_PROPORTION_ENDING", "LOW_ENERGY_LIGHTING_ENDING", "NUMBER_OPEN_FIREPLACES_ENDING", "EXTENSION_COUNT_ENDING", "TOTAL_FLOOR_AREA_ENDING", "FLOOR_HEIGHT_ENDING", "DAYS_TO_STARTING", "DAYS_TO_ENDING"]
   # retain_features: null
 #   retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS',
@@ -32,29 +32,30 @@ feature_processor_config:
 #  'DAYS_TO_STARTING',
 # 'WALLS_DESCRIPTION_STARTING',
 # 'FLOOR_DESCRIPTION_STARTING']
-  retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS',
- 'NUMBER_HEATED_ROOMS',
- 'FIXED_LIGHTING_OUTLETS_COUNT',
- 'CONSTRUCTION_AGE_BAND',
- 'TRANSACTION_TYPE_ENDING',
- 'LIGHTING_DESCRIPTION_ENDING',
- 'MAINHEAT_DESCRIPTION_ENDING',
- 'HOTWATER_DESCRIPTION_ENDING',
- 'MAIN_FUEL_ENDING',
- 'MECHANICAL_VENTILATION_ENDING',
- 'SECONDHEAT_DESCRIPTION_ENDING',
- 'ENERGY_TARIFF_ENDING',
- 'SOLAR_WATER_HEATING_FLAG_ENDING',
- 'PHOTO_SUPPLY_ENDING',
- 'WINDOWS_DESCRIPTION_ENDING',
- 'GLAZED_TYPE_ENDING',
- 'MULTI_GLAZE_PROPORTION_ENDING',
- 'LOW_ENERGY_LIGHTING_ENDING',
- 'NUMBER_OPEN_FIREPLACES_ENDING',
- 'MAINHEATCONT_DESCRIPTION_ENDING',
- 'EXTENSION_COUNT_ENDING',
- 'TOTAL_FLOOR_AREA_ENDING',
- 'FLOOR_HEIGHT_ENDING',
- 'DAYS_TO_ENDING',
-'WALLS_DESCRIPTION_ENDING',
-'FLOOR_DESCRIPTION_ENDING']
+#   retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS',
+#  'NUMBER_HEATED_ROOMS',
+#  'FIXED_LIGHTING_OUTLETS_COUNT',
+#  'CONSTRUCTION_AGE_BAND',
+#  'TRANSACTION_TYPE_ENDING',
+#  'LIGHTING_DESCRIPTION_ENDING',
+#  'MAINHEAT_DESCRIPTION_ENDING',
+#  'HOTWATER_DESCRIPTION_ENDING',
+#  'MAIN_FUEL_ENDING',
+#  'MECHANICAL_VENTILATION_ENDING',
+#  'SECONDHEAT_DESCRIPTION_ENDING',
+#  'ENERGY_TARIFF_ENDING',
+#  'SOLAR_WATER_HEATING_FLAG_ENDING',
+#  'PHOTO_SUPPLY_ENDING',
+#  'WINDOWS_DESCRIPTION_ENDING',
+#  'GLAZED_TYPE_ENDING',
+#  'MULTI_GLAZE_PROPORTION_ENDING',
+#  'LOW_ENERGY_LIGHTING_ENDING',
+#  'NUMBER_OPEN_FIREPLACES_ENDING',
+#  'MAINHEATCONT_DESCRIPTION_ENDING',
+#  'EXTENSION_COUNT_ENDING',
+#  'TOTAL_FLOOR_AREA_ENDING',
+#  'FLOOR_HEIGHT_ENDING',
+#  'DAYS_TO_ENDING',
+# 'WALLS_DESCRIPTION_ENDING',
+# 'FLOOR_DESCRIPTION_ENDING']
+  retain_features: null
diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
index 180d3a9..1f97005 100644
--- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
+++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
@@ -5,16 +5,40 @@ During the feature processor step, we can apply additional business logic and fe
 """
 Business Logic dict + functions
 """
-business_logic = {}
+
+
+def remove_starting_columns(df):
+    keep_column_index = [
+        False if col_name.endswith("_STARTING") else True
+        for col_name in list(df.columns)
+    ]
+    keep_columns = df.columns[keep_column_index].to_list()
+    keep_columns.append("SAP_STARTING")
+    df = df[keep_columns]
+    return df
+
+
+# def keep_ending_columns(df):
+#     ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)]
+#     keep_columns = df.columns[ending_column_index].to_list()
+#     keep_columns.append("SAP_STARTING")
+#     print(keep_columns)
+#     df = df[keep_columns]
+#     return df
+
+business_logic = {
+    "remove_starting_columns": remove_starting_columns
+    # "keep_ENDING_COLUMNS": keep_ending_columns
+}
 
 """
 New features dict + function
 """
 
 
-def SAP_ENDING(df):
-    return df["SAP_STARTING"] + df["RDSAP_CHANGE"]
+# def SAP_ENDING(df):
+#     return df["SAP_STARTING"] + df["RDSAP_CHANGE"]
 
 
-new_feature_funcs = {"SAP_ENDING": SAP_ENDING}
-# new_feature_funcs = {}
+# new_feature_funcs = {"SAP_ENDING": SAP_ENDING}
+new_feature_funcs = {}
diff --git a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
new file mode 100644
index 0000000..95cb293
--- /dev/null
+++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
@@ -0,0 +1,32 @@
+"""
+After predictions, we may want to apply some post processing to the predictions
+"""
+import pandas as pd
+
+
+def clip_predictions_to_minimum_value(
+    data: pd.DataFrame, predictions: pd.Series, minimum_value: int = 1
+) -> pd.Series:
+
+    series_name = predictions.name
+    predictions.name = "predictions"
+    predictions_df = pd.concat([data, predictions], axis=1)
+    replace_index = predictions_df["SAP_STARTING"] > predictions_df["predictions"]
+    predictions_df.loc[replace_index, "predictions"] = (
+        predictions_df.loc[replace_index, "SAP_STARTING"] + minimum_value
+    )
+
+    predictions_new = predictions_df["predictions"]
+    predictions_new.name = series_name
+    return predictions_new
+
+
+# def round_predictions(data: pd.DataFrame, predictions: pd.Series) -> pd.Series:
+
+#     return predictions.round()
+
+
+post_prediction_logic = {
+    "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
+    # "round_predictions": round_predictions
+}
diff --git a/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml b/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml
index bed35d9..52fd5fc 100644
--- a/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml
@@ -1 +1,4 @@
 dataclient_type: local
+nshap_samples: 100 # how many samples to use to approximate each Shapely value, larger values will be slower
+n_val: 30  # how many datapoints from validation data should we interpret predictions for, larger values will be slower
+row_index: [0, 10, 20] # index of an example datapoint
diff --git a/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml
index b7a5670..bbcf3f8 100644
--- a/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml
@@ -1,6 +1,7 @@
 input_dataclient_type: aws-s3
 output_dataclient_type: local
-data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
+# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
+data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet
 train_proportion: 0.9
 output_train_filepath: ./data/prepared_data/train.parquet
 output_test_filepath: ./data/prepared_data/test.parquet
diff --git a/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py b/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py
index c8c9a4e..dd020a4 100644
--- a/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py
+++ b/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py
@@ -109,7 +109,9 @@ class DataFrameFeatureProcessor:
 
         # TODO: to test
         for key, value in new_feature_funcs.items():
-            df[key] = value(df)
+            key_column = value(df)
+            key_column.name = key
+            df = pd.concat([df, key_column], axis=1)
 
         return df
 
diff --git a/modules/ml-pipeline/src/pipeline/core/MLModels.py b/modules/ml-pipeline/src/pipeline/core/MLModels.py
index 2c237ba..4d9a9e9 100644
--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@@ -75,7 +75,9 @@ class SKLearnLinearRegression:
         y_train = data[target]
         self.model.fit(x_train, y_train)
 
-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None = None
+    ) -> pd.Series:
         """
         Method to predict
         """
@@ -128,7 +130,9 @@ class SKLearnSVMRegression:
         y_train = data[target]
         self.model.fit(x_train, y_train)
 
-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None = None
+    ) -> pd.Series:
         """
         Method to predict
         """
@@ -197,15 +201,39 @@ class AutogluonAutoML:
             excluded_model_types=model_hyperparameters["excluded_model_types"],
         )
 
-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None = None
+    ) -> pd.Series:
         """
         Method to predict
         """
 
+        if post_prediction_logic is None:
+            post_prediction_logic = {}
+
         if self.model is None:
             print("No model loaded/ trained")
             exit(1)
 
         predictions = pd.Series(self.model.predict(data))
 
+        if len(post_prediction_logic) != 0:
+            predictions = self._apply_post_prediction_logic(
+                data=data,
+                predictions=predictions,
+                post_prediction_logic=post_prediction_logic,
+            )
+
+        return predictions
+
+    def _apply_post_prediction_logic(
+        self, data: pd.DataFrame, predictions: pd.Series, post_prediction_logic: dict
+    ):
+        """
+        For predictions, we can apply post processing logic to clean up predictions
+        """
+
+        for _, value in post_prediction_logic.items():
+            predictions = value(data, predictions)
+
         return predictions
diff --git a/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py b/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py
index aae3689..ecbc24a 100644
--- a/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py
@@ -32,7 +32,9 @@ class MLModel(Protocol):
         """
         ...
 
-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None
+    ) -> pd.Series:
         """
         Method to predict
         """
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 5f143c3..6197fe7 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -15,20 +15,20 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
       nfiles: 2
   build_model:
     cmd: python build_model.py
     deps:
     - path: build_model.py
       hash: md5
-      md5: f9fa2a66d908b42ae196ce6f0f782258
-      size: 5134
+      md5: 84b86e829cb164fb2a202033f39e66e8
+      size: 5243
     - path: data/prepared_data
       hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
       nfiles: 2
     params:
       configs/build_model.yaml:
@@ -36,7 +36,7 @@ stages:
           output_filepath: ./data/model/autogluonmodel/
           problem_type: regression
           eval_metric: mean_absolute_error
-          time_limit: 60
+          time_limit: 600
           presets: medium_quality
           excluded_model_types:
           - KNN
@@ -49,30 +49,30 @@ stages:
     outs:
     - path: data/model/
       hash: md5
-      md5: cb03448b572cb167bf281ee8d43dccd9.dir
-      size: 99423757
-      nfiles: 14
+      md5: d9b051bb9cc626b4fc4b77873838f029.dir
+      size: 242877007
+      nfiles: 18
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 48d9cc86c22c1ac0da8903a32a7d10c3
-      size: 183
+      md5: bbf8a1bb90cd8d9fea447ca97fe8eea3
+      size: 180
   generate_predictions:
     cmd: python generate_predictions.py
     deps:
     - path: data/model
       hash: md5
-      md5: cb03448b572cb167bf281ee8d43dccd9.dir
-      size: 99423757
-      nfiles: 14
+      md5: d9b051bb9cc626b4fc4b77873838f029.dir
+      size: 242877007
+      nfiles: 18
     - path: data/prepared_data
       hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
       nfiles: 2
     - path: generate_predictions.py
       hash: md5
-      md5: a25c4611ff467cdc1c921918112a30fe
-      size: 4311
+      md5: 20c4657f5872cb8b60b69344600251b8
+      size: 4420
     params:
       configs/generate_predictions.yaml:
         input_dataclient_type: local
@@ -83,21 +83,21 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 3d5002f0eecd2374a0ef2fd6f711503e.dir
-      size: 383878
+      md5: 81f707df70bc0d9f7b305427e0034ed1.dir
+      size: 383598
       nfiles: 1
   generate_metrics:
     cmd: python generate_metrics.py
     deps:
     - path: data/predictions
       hash: md5
-      md5: 3d5002f0eecd2374a0ef2fd6f711503e.dir
-      size: 383878
+      md5: 81f707df70bc0d9f7b305427e0034ed1.dir
+      size: 383598
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
       nfiles: 2
     - path: generate_metrics.py
       hash: md5
@@ -111,8 +111,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 08a81d2e5cecf360043498526bc98314
-      size: 183
+      md5: 75baa77d94386c9a567afdac48384435
+      size: 185
   startup_cleanup:
     cmd: python startup_cleanup.py
     deps:
diff --git a/modules/ml-pipeline/src/pipeline/generate_predictions.py b/modules/ml-pipeline/src/pipeline/generate_predictions.py
index 490d7e9..0e0bd9b 100644
--- a/modules/ml-pipeline/src/pipeline/generate_predictions.py
+++ b/modules/ml-pipeline/src/pipeline/generate_predictions.py
@@ -12,6 +12,7 @@ from core.interface.InterfaceDataClient import DataClient
 from core.DataClient import dataclient_factory
 from core.MLModels import model_factory
 from core.Logger import logger
+from configs.post_prediction_logic import post_prediction_logic
 
 
 RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@@ -70,7 +71,9 @@ def generate_predictions(
         test_data.drop(columns=target) if target in test_data.columns else test_data
     )
 
-    predictions = model.predict(data=prediction_data)
+    predictions = model.predict(
+        data=prediction_data, post_prediction_logic=post_prediction_logic
+    )
 
     logger.info("--------------------------")
     logger.info("--- Saving predictions ---")
diff --git a/modules/ml-pipeline/src/pipeline/prediction_analysis.py b/modules/ml-pipeline/src/pipeline/prediction_analysis.py
index 7f9f9a1..8947916 100644
--- a/modules/ml-pipeline/src/pipeline/prediction_analysis.py
+++ b/modules/ml-pipeline/src/pipeline/prediction_analysis.py
@@ -14,6 +14,7 @@ shap.initjs()
 
 
 import yaml
+from typing import List
 from pathlib import Path
 from core.interface.InterfaceModels import MLModel
 from core.interface.InterfaceDataClient import DataClient
@@ -36,6 +37,11 @@ feature_process_params = yaml.safe_load(open(feature_process_path))
 build_model_path = Path(__file__).parent / "configs" / "build_model.yaml"
 build_model_params = yaml.safe_load(open(build_model_path))
 
+generate_predictions_path = (
+    Path(__file__).parent / "configs" / "generate_predictions.yaml"
+)
+generate_predictions_params = yaml.safe_load(open(generate_predictions_path))
+
 prediction_analysis_path = (
     Path(__file__).parent / "configs" / "prediction_analysis.yaml"
 )
@@ -50,29 +56,40 @@ dataclient = dataclient_factory(
     dataclient_config=client_params[dataclient_type],
 )
 
+target = feature_process_params["feature_processor_config"]["target"]
+predictions_column_name = generate_predictions_params["predictions_column_name"]
+
 output_test_filepath = prepare_data_params["output_test_filepath"]
 
+predictions_output_filepath = generate_predictions_params["predictions_output_filepath"]
+
+nshap_samples = prediction_analysis_params["nshap_samples"]
+row_index = prediction_analysis_params["row_index"]
+
 
 def prediction_analysis(
-    model: MLModel, dataclient: DataClient, output_test_filepath: str
+    model: MLModel,
+    dataclient: DataClient,
+    target: str,
+    predictions_column_name: str,
+    output_test_filepath: str,
+    predictions_output_filepath: str,
+    nshap_samples: int,
+    row_index: List[int],
 ):
 
     test_df = dataclient.load_data(output_test_filepath)
-    predictions = dataclient.load_data("./data/predictions/predictions.parquet")
+    predictions = dataclient.load_data(predictions_output_filepath)
 
-    mix_df = test_df.copy()
-    mix_df["predictions"] = predictions
-    mix_df["residual"] = abs(mix_df["predictions"] - mix_df["SAP_ENDING"])
+    mix_df = pd.concat([test_df.copy(), predictions], axis=1)
+    mix_df["residual"] = abs(mix_df[predictions_column_name] - mix_df[target])
     mix_df = mix_df.sort_values("residual", ascending=False)
 
-    target = "SAP_ENDING"
     test_df_without_target = test_df.drop(columns=[target])
 
-    # test_df_summary = shap.kmeans(test_df, 10)
-    # print("Baseline feature-values: \n", test_df_summary)
-    class AutogluonWrapper:
-        def __init__(self, predictor, feature_names):
-            self.ag_model = predictor
+    class ModelWrapper:
+        def __init__(self, model, feature_names):
+            self.model = model
             self.feature_names = feature_names
 
         def predict(self, X):
@@ -80,33 +97,39 @@ def prediction_analysis(
                 X = X.values.reshape(1, -1)
             if not isinstance(X, pd.DataFrame):
                 X = pd.DataFrame(X, columns=self.feature_names)
-            return self.ag_model.predict(X)
+            return self.model.predict(X)
+
+    model_wrapper = ModelWrapper(model, feature_names=test_df_without_target.columns)
 
-    ag_wrapper = AutogluonWrapper(
-        model.model, feature_names=test_df_without_target.columns
-    )
     explainer = shap.KernelExplainer(
-        ag_wrapper.predict, test_df_without_target.head(100)
+        model_wrapper.predict, test_df_without_target.head(100)
     )
 
-    NSHAP_SAMPLES = 100  # how many samples to use to approximate each Shapely value, larger values will be slower
-    N_VAL = 30  # how many datapoints from validation data should we interpret predictions for, larger values will be slower
+    shap_predictions_df = pd.DataFrame(index=test_df_without_target.columns)
+    for index in row_index:
+        single_datapoint = test_df_without_target.iloc[[index]]
+        # single_prediction = model_wrapper.predict(single_datapoint)
+        shap_values_single = explainer.shap_values(
+            single_datapoint, nsamples=nshap_samples
+        )
+        shap.force_plot(
+            explainer.expected_value,
+            shap_values_single,
+            test_df_without_target.iloc[index, :],
+        )
+        shap_single_prediction_df = pd.DataFrame(
+            shap_values_single, columns=test_df_without_target.columns
+        ).T
+        shap_single_prediction_df.columns = [index]
+        shap_single_prediction_df = shap_single_prediction_df.sort_values(index)
+        shap_predictions_df = pd.merge(
+            left=shap_predictions_df,
+            right=shap_single_prediction_df,
+            left_index=True,
+            right_index=True,
+        )
 
-    ROW_INDEX = 8541  # 23690 #21059  # index of an example datapoint
-    single_datapoint = test_df_without_target.iloc[[ROW_INDEX]]
-    single_prediction = ag_wrapper.predict(single_datapoint)
-
-    shap_values_single = explainer.shap_values(single_datapoint, nsamples=NSHAP_SAMPLES)
-    shap.force_plot(
-        explainer.expected_value,
-        shap_values_single,
-        test_df_without_target.iloc[ROW_INDEX, :],
-    )
-    shap_single_prediciton_df = pd.DataFrame(
-        shap_values_single, columns=test_df_without_target.columns
-    ).T
-    shap_single_prediciton_df.columns = ["contribution"]
-    shap_single_prediciton_df = shap_single_prediciton_df.sort_values("contribution")
+    return shap_predictions_df
 
 
 if __name__ == "__main__":
@@ -116,7 +139,13 @@ if __name__ == "__main__":
     logger.info("----------------------------")
 
     prediction_analysis(
-        model=model, dataclient=dataclient, output_test_filepath=output_test_filepath
+        model=model,
+        dataclient=dataclient,
+        target=target,
+        predictions_column_name=predictions_column_name,
+        output_test_filepath=output_test_filepath,
+        nshap_samples=nshap_samples,
+        row_index=row_index,
     )
 
     logger.info("-------------------------------")