cleaned up prediction analysis code and add clipping to model

2026-07-27 22:45:04 +00:00 · 2023-09-28 18:09:48 +00:00 · 2023-09-28 18:09:48 +00:00 · 84d3dee7d7
commit 84d3dee7d7
parent 56cf9c33d4
13 changed files with 230 additions and 102 deletions
--- a/modules/ml-pipeline/src/pipeline/build_model.py
+++ b/modules/ml-pipeline/src/pipeline/build_model.py
@ -15,6 +15,7 @@ from core.interface.InterfaceDataClient import DataClient
 from core.DataClient import dataclient_factory
 from core.MLModels import model_factory
 from core.MLMetrics import metrics_factory
+from configs.post_prediction_logic import post_prediction_logic


 RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@ -74,7 +75,9 @@ def build_model(

    prediction_data = train_data.drop(columns=target)

-    fit_predictions = model.predict(data=prediction_data)
+    fit_predictions = model.predict(
+        data=prediction_data, post_prediction_logic=post_prediction_logic
+    )

    logger.info("------------------------------")
    logger.info("--- Generating fit metrics ---")
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@ -11,6 +11,6 @@ AutogluonAutoML:
  output_filepath: ./data/model/autogluonmodel/
  problem_type: regression
  eval_metric: mean_absolute_error
-  time_limit: 60
+  time_limit: 600
  presets: medium_quality
  excluded_model_types: ['KNN']
--- a/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor.yaml
@ -3,7 +3,7 @@ feature_processor_config:
  subsample_amount: null
  subsample_seed: 0
  target: SAP_ENDING
-  drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE"]
+  drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]
  # retain_features: ["TOTAL_FLOOR_AREA_STARTING", "SAP_STARTING", "HEAT_DEMAND_STARTING", "CARBON_STARTING", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", "PHOTO_SUPPLY_STARTING", "MULTI_GLAZE_PROPORTION_STARTING", "LOW_ENERGY_LIGHTING_STARTING", "NUMBER_OPEN_FIREPLACES_STARTING", "EXTENSION_COUNT_STARTING", "FLOOR_HEIGHT_STARTING", "PHOTO_SUPPLY_ENDING", "MULTI_GLAZE_PROPORTION_ENDING", "LOW_ENERGY_LIGHTING_ENDING", "NUMBER_OPEN_FIREPLACES_ENDING", "EXTENSION_COUNT_ENDING", "TOTAL_FLOOR_AREA_ENDING", "FLOOR_HEIGHT_ENDING", "DAYS_TO_STARTING", "DAYS_TO_ENDING"]
  # retain_features: null
 #   retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS',
@ -32,29 +32,30 @@ feature_processor_config:
 #  'DAYS_TO_STARTING',
 # 'WALLS_DESCRIPTION_STARTING',
 # 'FLOOR_DESCRIPTION_STARTING']
-  retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS',
- 'NUMBER_HEATED_ROOMS',
- 'FIXED_LIGHTING_OUTLETS_COUNT',
- 'CONSTRUCTION_AGE_BAND',
- 'TRANSACTION_TYPE_ENDING',
- 'LIGHTING_DESCRIPTION_ENDING',
- 'MAINHEAT_DESCRIPTION_ENDING',
- 'HOTWATER_DESCRIPTION_ENDING',
- 'MAIN_FUEL_ENDING',
- 'MECHANICAL_VENTILATION_ENDING',
- 'SECONDHEAT_DESCRIPTION_ENDING',
- 'ENERGY_TARIFF_ENDING',
- 'SOLAR_WATER_HEATING_FLAG_ENDING',
- 'PHOTO_SUPPLY_ENDING',
- 'WINDOWS_DESCRIPTION_ENDING',
- 'GLAZED_TYPE_ENDING',
- 'MULTI_GLAZE_PROPORTION_ENDING',
- 'LOW_ENERGY_LIGHTING_ENDING',
- 'NUMBER_OPEN_FIREPLACES_ENDING',
- 'MAINHEATCONT_DESCRIPTION_ENDING',
- 'EXTENSION_COUNT_ENDING',
- 'TOTAL_FLOOR_AREA_ENDING',
- 'FLOOR_HEIGHT_ENDING',
- 'DAYS_TO_ENDING',
-'WALLS_DESCRIPTION_ENDING',
-'FLOOR_DESCRIPTION_ENDING']
+#   retain_features: ["SAP_STARTING", 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTITUENCY', 'NUMBER_HABITABLE_ROOMS',
+#  'NUMBER_HEATED_ROOMS',
+#  'FIXED_LIGHTING_OUTLETS_COUNT',
+#  'CONSTRUCTION_AGE_BAND',
+#  'TRANSACTION_TYPE_ENDING',
+#  'LIGHTING_DESCRIPTION_ENDING',
+#  'MAINHEAT_DESCRIPTION_ENDING',
+#  'HOTWATER_DESCRIPTION_ENDING',
+#  'MAIN_FUEL_ENDING',
+#  'MECHANICAL_VENTILATION_ENDING',
+#  'SECONDHEAT_DESCRIPTION_ENDING',
+#  'ENERGY_TARIFF_ENDING',
+#  'SOLAR_WATER_HEATING_FLAG_ENDING',
+#  'PHOTO_SUPPLY_ENDING',
+#  'WINDOWS_DESCRIPTION_ENDING',
+#  'GLAZED_TYPE_ENDING',
+#  'MULTI_GLAZE_PROPORTION_ENDING',
+#  'LOW_ENERGY_LIGHTING_ENDING',
+#  'NUMBER_OPEN_FIREPLACES_ENDING',
+#  'MAINHEATCONT_DESCRIPTION_ENDING',
+#  'EXTENSION_COUNT_ENDING',
+#  'TOTAL_FLOOR_AREA_ENDING',
+#  'FLOOR_HEIGHT_ENDING',
+#  'DAYS_TO_ENDING',
+# 'WALLS_DESCRIPTION_ENDING',
+# 'FLOOR_DESCRIPTION_ENDING']
+  retain_features: null
--- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
+++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
@ -5,16 +5,40 @@ During the feature processor step, we can apply additional business logic and fe
 """
 Business Logic dict + functions
 """
-business_logic = {}
+
+
+def remove_starting_columns(df):
+    keep_column_index = [
+        False if col_name.endswith("_STARTING") else True
+        for col_name in list(df.columns)
+    ]
+    keep_columns = df.columns[keep_column_index].to_list()
+    keep_columns.append("SAP_STARTING")
+    df = df[keep_columns]
+    return df
+
+
+# def keep_ending_columns(df):
+#     ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)]
+#     keep_columns = df.columns[ending_column_index].to_list()
+#     keep_columns.append("SAP_STARTING")
+#     print(keep_columns)
+#     df = df[keep_columns]
+#     return df
+
+business_logic = {
+    "remove_starting_columns": remove_starting_columns
+    # "keep_ENDING_COLUMNS": keep_ending_columns
+}

 """
 New features dict + function
 """


-def SAP_ENDING(df):
-    return df["SAP_STARTING"] + df["RDSAP_CHANGE"]
+# def SAP_ENDING(df):
+#     return df["SAP_STARTING"] + df["RDSAP_CHANGE"]


-new_feature_funcs = {"SAP_ENDING": SAP_ENDING}
-# new_feature_funcs = {}
+# new_feature_funcs = {"SAP_ENDING": SAP_ENDING}
+new_feature_funcs = {}
--- a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
+++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
@ -0,0 +1,32 @@
+"""
+After predictions, we may want to apply some post processing to the predictions
+"""
+import pandas as pd
+
+
+def clip_predictions_to_minimum_value(
+    data: pd.DataFrame, predictions: pd.Series, minimum_value: int = 1
+) -> pd.Series:
+
+    series_name = predictions.name
+    predictions.name = "predictions"
+    predictions_df = pd.concat([data, predictions], axis=1)
+    replace_index = predictions_df["SAP_STARTING"] > predictions_df["predictions"]
+    predictions_df.loc[replace_index, "predictions"] = (
+        predictions_df.loc[replace_index, "SAP_STARTING"] + minimum_value
+    )
+
+    predictions_new = predictions_df["predictions"]
+    predictions_new.name = series_name
+    return predictions_new
+
+
+# def round_predictions(data: pd.DataFrame, predictions: pd.Series) -> pd.Series:
+
+#     return predictions.round()
+
+
+post_prediction_logic = {
+    "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
+    # "round_predictions": round_predictions
+}
--- a/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/prediction_analysis.yaml
@ -1 +1,4 @@
 dataclient_type: local
+nshap_samples: 100 # how many samples to use to approximate each Shapely value, larger values will be slower
+n_val: 30  # how many datapoints from validation data should we interpret predictions for, larger values will be slower
+row_index: [0, 10, 20] # index of an example datapoint
--- a/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/prepare_data.yaml
@ -1,6 +1,7 @@
 input_dataclient_type: aws-s3
 output_dataclient_type: local
-data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
+# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
+data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet
 train_proportion: 0.9
 output_train_filepath: ./data/prepared_data/train.parquet
 output_test_filepath: ./data/prepared_data/test.parquet
--- a/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py
+++ b/modules/ml-pipeline/src/pipeline/core/FeatureProcessor.py
@ -109,7 +109,9 @@ class DataFrameFeatureProcessor:

        # TODO: to test
        for key, value in new_feature_funcs.items():
-            df[key] = value(df)
+            key_column = value(df)
+            key_column.name = key
+            df = pd.concat([df, key_column], axis=1)

        return df

--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@ -75,7 +75,9 @@ class SKLearnLinearRegression:
        y_train = data[target]
        self.model.fit(x_train, y_train)

-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None = None
+    ) -> pd.Series:
        """
        Method to predict
        """
@ -128,7 +130,9 @@ class SKLearnSVMRegression:
        y_train = data[target]
        self.model.fit(x_train, y_train)

-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None = None
+    ) -> pd.Series:
        """
        Method to predict
        """
@ -197,15 +201,39 @@ class AutogluonAutoML:
            excluded_model_types=model_hyperparameters["excluded_model_types"],
        )

-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None = None
+    ) -> pd.Series:
        """
        Method to predict
        """

+        if post_prediction_logic is None:
+            post_prediction_logic = {}
+
        if self.model is None:
            print("No model loaded/ trained")
            exit(1)

        predictions = pd.Series(self.model.predict(data))

+        if len(post_prediction_logic) != 0:
+            predictions = self._apply_post_prediction_logic(
+                data=data,
+                predictions=predictions,
+                post_prediction_logic=post_prediction_logic,
+            )
+
+        return predictions
+
+    def _apply_post_prediction_logic(
+        self, data: pd.DataFrame, predictions: pd.Series, post_prediction_logic: dict
+    ):
+        """
+        For predictions, we can apply post processing logic to clean up predictions
+        """
+
+        for _, value in post_prediction_logic.items():
+            predictions = value(data, predictions)
+
        return predictions
--- a/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/interface/InterfaceModels.py
@ -32,7 +32,9 @@ class MLModel(Protocol):
        """
        ...

-    def predict(self, data: pd.DataFrame) -> pd.Series:
+    def predict(
+        self, data: pd.DataFrame, post_prediction_logic: dict | None
+    ) -> pd.Series:
        """
        Method to predict
        """
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@ -15,20 +15,20 @@ stages:
    outs:
    - path: data/prepared_data/
      hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
      nfiles: 2
  build_model:
    cmd: python build_model.py
    deps:
    - path: build_model.py
      hash: md5
-      md5: f9fa2a66d908b42ae196ce6f0f782258
-      size: 5134
+      md5: 84b86e829cb164fb2a202033f39e66e8
+      size: 5243
    - path: data/prepared_data
      hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
      nfiles: 2
    params:
      configs/build_model.yaml:
@ -36,7 +36,7 @@ stages:
          output_filepath: ./data/model/autogluonmodel/
          problem_type: regression
          eval_metric: mean_absolute_error
-          time_limit: 60
+          time_limit: 600
          presets: medium_quality
          excluded_model_types:
          - KNN
@ -49,30 +49,30 @@ stages:
    outs:
    - path: data/model/
      hash: md5
-      md5: cb03448b572cb167bf281ee8d43dccd9.dir
-      size: 99423757
-      nfiles: 14
+      md5: d9b051bb9cc626b4fc4b77873838f029.dir
+      size: 242877007
+      nfiles: 18
    - path: metrics/fit_metrics.json
      hash: md5
-      md5: 48d9cc86c22c1ac0da8903a32a7d10c3
-      size: 183
+      md5: bbf8a1bb90cd8d9fea447ca97fe8eea3
+      size: 180
  generate_predictions:
    cmd: python generate_predictions.py
    deps:
    - path: data/model
      hash: md5
-      md5: cb03448b572cb167bf281ee8d43dccd9.dir
-      size: 99423757
-      nfiles: 14
+      md5: d9b051bb9cc626b4fc4b77873838f029.dir
+      size: 242877007
+      nfiles: 18
    - path: data/prepared_data
      hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
      nfiles: 2
    - path: generate_predictions.py
      hash: md5
-      md5: a25c4611ff467cdc1c921918112a30fe
-      size: 4311
+      md5: 20c4657f5872cb8b60b69344600251b8
+      size: 4420
    params:
      configs/generate_predictions.yaml:
        input_dataclient_type: local
@ -83,21 +83,21 @@ stages:
    outs:
    - path: data/predictions/
      hash: md5
-      md5: 3d5002f0eecd2374a0ef2fd6f711503e.dir
-      size: 383878
+      md5: 81f707df70bc0d9f7b305427e0034ed1.dir
+      size: 383598
      nfiles: 1
  generate_metrics:
    cmd: python generate_metrics.py
    deps:
    - path: data/predictions
      hash: md5
-      md5: 3d5002f0eecd2374a0ef2fd6f711503e.dir
-      size: 383878
+      md5: 81f707df70bc0d9f7b305427e0034ed1.dir
+      size: 383598
      nfiles: 1
    - path: data/prepared_data
      hash: md5
-      md5: c183712d22ab739e0be016724f44ee1c.dir
-      size: 12203729
+      md5: 2f00c92bf2fff7ed8006f4036f8f7d06.dir
+      size: 21102167
      nfiles: 2
    - path: generate_metrics.py
      hash: md5
@ -111,8 +111,8 @@ stages:
    outs:
    - path: metrics/metrics.json
      hash: md5
-      md5: 08a81d2e5cecf360043498526bc98314
-      size: 183
+      md5: 75baa77d94386c9a567afdac48384435
+      size: 185
  startup_cleanup:
    cmd: python startup_cleanup.py
    deps:
--- a/modules/ml-pipeline/src/pipeline/generate_predictions.py
+++ b/modules/ml-pipeline/src/pipeline/generate_predictions.py
@ -12,6 +12,7 @@ from core.interface.InterfaceDataClient import DataClient
 from core.DataClient import dataclient_factory
 from core.MLModels import model_factory
 from core.Logger import logger
+from configs.post_prediction_logic import post_prediction_logic


 RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
@ -70,7 +71,9 @@ def generate_predictions(
        test_data.drop(columns=target) if target in test_data.columns else test_data
    )

-    predictions = model.predict(data=prediction_data)
+    predictions = model.predict(
+        data=prediction_data, post_prediction_logic=post_prediction_logic
+    )

    logger.info("--------------------------")
    logger.info("--- Saving predictions ---")
--- a/modules/ml-pipeline/src/pipeline/prediction_analysis.py
+++ b/modules/ml-pipeline/src/pipeline/prediction_analysis.py
@ -14,6 +14,7 @@ shap.initjs()


 import yaml
+from typing import List
 from pathlib import Path
 from core.interface.InterfaceModels import MLModel
 from core.interface.InterfaceDataClient import DataClient
@ -36,6 +37,11 @@ feature_process_params = yaml.safe_load(open(feature_process_path))
 build_model_path = Path(__file__).parent / "configs" / "build_model.yaml"
 build_model_params = yaml.safe_load(open(build_model_path))

+generate_predictions_path = (
+    Path(__file__).parent / "configs" / "generate_predictions.yaml"
+)
+generate_predictions_params = yaml.safe_load(open(generate_predictions_path))
+
 prediction_analysis_path = (
    Path(__file__).parent / "configs" / "prediction_analysis.yaml"
 )
@ -50,29 +56,40 @@ dataclient = dataclient_factory(
    dataclient_config=client_params[dataclient_type],
 )

+target = feature_process_params["feature_processor_config"]["target"]
+predictions_column_name = generate_predictions_params["predictions_column_name"]
+
 output_test_filepath = prepare_data_params["output_test_filepath"]

+predictions_output_filepath = generate_predictions_params["predictions_output_filepath"]
+
+nshap_samples = prediction_analysis_params["nshap_samples"]
+row_index = prediction_analysis_params["row_index"]
+

 def prediction_analysis(
-    model: MLModel, dataclient: DataClient, output_test_filepath: str
+    model: MLModel,
+    dataclient: DataClient,
+    target: str,
+    predictions_column_name: str,
+    output_test_filepath: str,
+    predictions_output_filepath: str,
+    nshap_samples: int,
+    row_index: List[int],
 ):

    test_df = dataclient.load_data(output_test_filepath)
-    predictions = dataclient.load_data("./data/predictions/predictions.parquet")
+    predictions = dataclient.load_data(predictions_output_filepath)

-    mix_df = test_df.copy()
-    mix_df["predictions"] = predictions
-    mix_df["residual"] = abs(mix_df["predictions"] - mix_df["SAP_ENDING"])
+    mix_df = pd.concat([test_df.copy(), predictions], axis=1)
+    mix_df["residual"] = abs(mix_df[predictions_column_name] - mix_df[target])
    mix_df = mix_df.sort_values("residual", ascending=False)

-    target = "SAP_ENDING"
    test_df_without_target = test_df.drop(columns=[target])

-    # test_df_summary = shap.kmeans(test_df, 10)
-    # print("Baseline feature-values: \n", test_df_summary)
-    class AutogluonWrapper:
-        def __init__(self, predictor, feature_names):
-            self.ag_model = predictor
+    class ModelWrapper:
+        def __init__(self, model, feature_names):
+            self.model = model
            self.feature_names = feature_names

        def predict(self, X):
@ -80,33 +97,39 @@ def prediction_analysis(
                X = X.values.reshape(1, -1)
            if not isinstance(X, pd.DataFrame):
                X = pd.DataFrame(X, columns=self.feature_names)
-            return self.ag_model.predict(X)
+            return self.model.predict(X)
+
+    model_wrapper = ModelWrapper(model, feature_names=test_df_without_target.columns)

-    ag_wrapper = AutogluonWrapper(
-        model.model, feature_names=test_df_without_target.columns
-    )
    explainer = shap.KernelExplainer(
-        ag_wrapper.predict, test_df_without_target.head(100)
+        model_wrapper.predict, test_df_without_target.head(100)
    )

-    NSHAP_SAMPLES = 100  # how many samples to use to approximate each Shapely value, larger values will be slower
-    N_VAL = 30  # how many datapoints from validation data should we interpret predictions for, larger values will be slower
+    shap_predictions_df = pd.DataFrame(index=test_df_without_target.columns)
+    for index in row_index:
+        single_datapoint = test_df_without_target.iloc[[index]]
+        # single_prediction = model_wrapper.predict(single_datapoint)
+        shap_values_single = explainer.shap_values(
+            single_datapoint, nsamples=nshap_samples
+        )
+        shap.force_plot(
+            explainer.expected_value,
+            shap_values_single,
+            test_df_without_target.iloc[index, :],
+        )
+        shap_single_prediction_df = pd.DataFrame(
+            shap_values_single, columns=test_df_without_target.columns
+        ).T
+        shap_single_prediction_df.columns = [index]
+        shap_single_prediction_df = shap_single_prediction_df.sort_values(index)
+        shap_predictions_df = pd.merge(
+            left=shap_predictions_df,
+            right=shap_single_prediction_df,
+            left_index=True,
+            right_index=True,
+        )

-    ROW_INDEX = 8541  # 23690 #21059  # index of an example datapoint
-    single_datapoint = test_df_without_target.iloc[[ROW_INDEX]]
-    single_prediction = ag_wrapper.predict(single_datapoint)
-
-    shap_values_single = explainer.shap_values(single_datapoint, nsamples=NSHAP_SAMPLES)
-    shap.force_plot(
-        explainer.expected_value,
-        shap_values_single,
-        test_df_without_target.iloc[ROW_INDEX, :],
-    )
-    shap_single_prediciton_df = pd.DataFrame(
-        shap_values_single, columns=test_df_without_target.columns
-    ).T
-    shap_single_prediciton_df.columns = ["contribution"]
-    shap_single_prediciton_df = shap_single_prediciton_df.sort_values("contribution")
+    return shap_predictions_df


 if __name__ == "__main__":
@ -116,7 +139,13 @@ if __name__ == "__main__":
    logger.info("----------------------------")

    prediction_analysis(
-        model=model, dataclient=dataclient, output_test_filepath=output_test_filepath
+        model=model,
+        dataclient=dataclient,
+        target=target,
+        predictions_column_name=predictions_column_name,
+        output_test_filepath=output_test_filepath,
+        nshap_samples=nshap_samples,
+        row_index=row_index,
    )

    logger.info("-------------------------------")