From 3c47d59fb9e4e733811fbf18fdf6eb2e2a2c44f8 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Fri, 4 Oct 2024 23:17:15 +0100
Subject: [PATCH 01/59] add model with new data

---
 .../src/pipeline/configs/scenarios.yaml       |  2 +-
 .../src/pipeline/configs/settings.yaml        |  8 +--
 modules/ml-pipeline/src/pipeline/dvc.lock     | 61 ++++++++++---------
 3 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
index 0d4ee07..dd146eb 100644
--- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
@@ -8,6 +8,6 @@ default:
       # - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
-      - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
+      # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
     comparison_output_filepath: ./metrics/scenario_table.md
     metrics_output_filepath: ./metrics/scenario_metrics.md
diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
index 838e9a9..edaecba 100644
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@@ -18,10 +18,8 @@ default:
   prepare_data:
     input_dataclient_type: aws-s3
     output_dataclient_type: local
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet
-    data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
+    data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
     train_proportion: 0.9
     output_train_filepath: ./data/prepared_data/train.parquet
     output_test_filepath: ./data/prepared_data/test.parquet
@@ -37,7 +35,7 @@ default:
       drop_columns: [
         "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending",
         'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
-        'number_habitable_rooms', 'number_heated_rooms']
+        'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting', 'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending',]
       retain_features: null
       # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
       #  'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 31315db..1d19e60 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -34,13 +34,19 @@ stages:
         - number_heated_rooms_ending
         - number_habitable_rooms
         - number_heated_rooms
+        - lighting_cost_starting
+        - lighting_cost_ending
+        - heating_cost_starting
+        - heating_cost_ending
+        - hot_water_cost_starting
+        - hot_water_cost_ending
         default.feature_processor.feature_processor_config.retain_features:
         default.feature_processor.feature_processor_config.subsample_amount:
         default.feature_processor.feature_processor_config.subsample_seed: 0
         default.feature_processor.feature_processor_config.target: sap_ending
         default.feature_processor.feature_processor_type: dataframe
         default.prepare_data.data_filepath:
-          s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
+          s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
         default.prepare_data.input_dataclient_type: aws-s3
         default.prepare_data.output_dataclient_type: local
         default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
@@ -49,8 +55,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 45056059
+      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
+      size: 49665833
       nfiles: 2
   build_model:
     cmd: python 2_build_model.py
@@ -61,8 +67,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 45056059
+      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
+      size: 49665833
       nfiles: 2
     params:
       configs/build_model.yaml:
@@ -94,18 +100,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: d9c9afc05e8780db47c0548b19bf7d19.dir
-      size: 3349989
+      md5: 9dafd875d76676a06890af9be8778567.dir
+      size: 3617506
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 13c3100e1486c27a83a8a47491077842.dir
-      size: 773523079
+      md5: 36fb8323508e968a297d650954ccbc58.dir
+      size: 800833479
       nfiles: 36
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a
-      size: 224
+      md5: e2bc34ae26afbb854f0b021d12b1b569
+      size: 223
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -115,13 +121,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 13c3100e1486c27a83a8a47491077842.dir
-      size: 773523079
+      md5: 36fb8323508e968a297d650954ccbc58.dir
+      size: 800833479
       nfiles: 36
     - path: data/prepared_data
       hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 45056059
+      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
+      size: 49665833
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -133,8 +139,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
-      size: 463197
+      md5: 8f4de36de171006cb6d9eacff7718ba3.dir
+      size: 508400
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -145,13 +151,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
-      size: 463197
+      md5: 8f4de36de171006cb6d9eacff7718ba3.dir
+      size: 508400
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 45056059
+      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
+      size: 49665833
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -161,8 +167,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 3e08df02fd5c5d094bcf936e1338d596
-      size: 223
+      md5: 58e25eb5e1699dbb33e78af9ba4c2964
+      size: 222
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -176,15 +182,14 @@ stages:
           input_dataclient_type: aws-s3
           output_dataclient_type: local
           scenario_data_filepaths:
-          - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
           comparison_output_filepath: ./metrics/scenario_table.md
           metrics_output_filepath: ./metrics/scenario_metrics.md
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: fa4d6d7bbd7818613800da5f8f37ea96
-      size: 363
+      md5: d41d8cd98f00b204e9800998ecf8427e
+      size: 0
     - path: metrics/scenario_table.md
       hash: md5
-      md5: d6baf100a1623cc2467c2f8221d314c9
-      size: 2133
+      md5: d41d8cd98f00b204e9800998ecf8427e
+      size: 0

From 8858052b1f1a3035b77504f9ac83c9dc681f8f5d Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 6 Oct 2024 11:16:40 +0100
Subject: [PATCH 02/59] add fix for pip issues

---
 deployment/Dockerfile.prediction.lambda       | 6 +++++-
 modules/ml-pipeline/src/Prediction.Dockerfile | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/deployment/Dockerfile.prediction.lambda b/deployment/Dockerfile.prediction.lambda
index f8000bf..9ee4306 100644
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@@ -13,7 +13,11 @@ RUN yum install -y gcc python3-devel gcc-c++
 
 # Install python packages
 COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r ./requirements.txt
+
+RUN pip install uv
+
+RUN uv pip install -r requirements.txt --system
+# RUN pip install --no-cache-dir -r ./requirements.txt
 
 # Copy the project code
 COPY modules/ml-pipeline/src/pipeline ./pipeline
diff --git a/modules/ml-pipeline/src/Prediction.Dockerfile b/modules/ml-pipeline/src/Prediction.Dockerfile
index e0a292c..deb8c45 100644
--- a/modules/ml-pipeline/src/Prediction.Dockerfile
+++ b/modules/ml-pipeline/src/Prediction.Dockerfile
@@ -6,7 +6,12 @@ RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev
 COPY pipeline/requirements/predictions/requirements.txt requirements.txt
 
 RUN pip install --upgrade pip
-RUN pip install -r requirements.txt
+
+RUN pip install uv
+
+RUN uv pip install -r requirements.txt --system
+
+# RUN pip install -r requirements.txt
 
 # Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script
 COPY pipeline/ /home/pipeline/

From 9d0ae55f15fb530ec0046ef4bb25e8b6d90f3e4c Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 7 Oct 2024 16:36:47 +0100
Subject: [PATCH 03/59] add scenario data

---
 modules/ml-pipeline/src/pipeline/configs/scenarios.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
index dd146eb..845238d 100644
--- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
@@ -9,5 +9,6 @@ default:
       # - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
+      - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
     comparison_output_filepath: ./metrics/scenario_table.md
     metrics_output_filepath: ./metrics/scenario_metrics.md

From 9ed90fec162decc7798d90f1757e59442ef2546b Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 7 Oct 2024 16:40:31 +0100
Subject: [PATCH 04/59] add scenario data

---
 modules/ml-pipeline/src/pipeline/dvc.lock | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 1d19e60..678fd3f 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -182,14 +182,15 @@ stages:
           input_dataclient_type: aws-s3
           output_dataclient_type: local
           scenario_data_filepaths:
+          - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
           comparison_output_filepath: ./metrics/scenario_table.md
           metrics_output_filepath: ./metrics/scenario_metrics.md
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: d41d8cd98f00b204e9800998ecf8427e
-      size: 0
+      md5: 75863b14bb0db59fd709aacbc278d080
+      size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: d41d8cd98f00b204e9800998ecf8427e
-      size: 0
+      md5: 02158bbcefbd98d12940fc4998e2ae5d
+      size: 969

From c19eece16c3e0ec164783990b145a39969ee8e6b Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 8 Oct 2024 15:10:04 +0100
Subject: [PATCH 05/59] add new scenario data to run

---
 modules/ml-pipeline/src/pipeline/configs/scenarios.yaml | 8 ++------
 modules/ml-pipeline/src/pipeline/dvc.lock               | 8 ++++----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
index 845238d..98b00fd 100644
--- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
@@ -3,12 +3,8 @@ default:
     input_dataclient_type: aws-s3
     output_dataclient_type: local
     scenario_data_filepaths:
-      # - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet
-      # - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet
-      # - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
-      # - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
-      # - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
-      - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
+      # - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
+      - s3://retrofit-data-dev/scenario_data/08-10-2024-15-07-33/recommendations_scoring_data.parquet
     comparison_output_filepath: ./metrics/scenario_table.md
     metrics_output_filepath: ./metrics/scenario_metrics.md
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 678fd3f..bd91a75 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -182,15 +182,15 @@ stages:
           input_dataclient_type: aws-s3
           output_dataclient_type: local
           scenario_data_filepaths:
-          - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
+          - s3://retrofit-data-dev/scenario_data/08-10-2024-15-07-33/recommendations_scoring_data.parquet
           comparison_output_filepath: ./metrics/scenario_table.md
           metrics_output_filepath: ./metrics/scenario_metrics.md
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 75863b14bb0db59fd709aacbc278d080
-      size: 356
+      md5: 746ffa7d10fcdceaf2330d07c7ee623b
+      size: 363
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 02158bbcefbd98d12940fc4998e2ae5d
+      md5: 3b7dfc36054bce065f849423abf31a5d
       size: 969

From f9625e4575602c63d600a888764be304112c77a4 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 8 Oct 2024 14:31:27 +0000
Subject: [PATCH 06/59] Update Registry

---
 MODEL_REGISTRY.md | 50 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 2fea343..f0b8dc9 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.14.0",
+        "version": "v0.15.0",
         "stage": {
             "dev": "v0.14.0"
         },
@@ -16,17 +16,57 @@
         "active": true
     },
     "heat": {
-        "version": "v0.5.0",
+        "version": "v0.6.0",
         "stage": {
-            "dev": "v0.5.0"
+            "dev": "v0.6.0"
         },
         "registered": true,
         "active": true
     },
     "carbon": {
-        "version": "v0.5.0",
+        "version": "v0.6.0",
         "stage": {
-            "dev": "v0.5.0"
+            "dev": "v0.6.0"
+        },
+        "registered": true,
+        "active": true
+    },
+    "hotwater": {
+        "version": "v1.0.0",
+        "stage": {
+            "dev": "v1.0.0"
+        },
+        "registered": true,
+        "active": true
+    },
+    "heating": {
+        "version": "v1.0.0",
+        "stage": {
+            "dev": "v1.0.0"
+        },
+        "registered": true,
+        "active": true
+    },
+    "lighting": {
+        "version": "v1.0.0",
+        "stage": {
+            "dev": "v1.0.0"
+        },
+        "registered": true,
+        "active": true
+    },
+    "hotwaterkwh": {
+        "version": "v1.3.0",
+        "stage": {
+            "dev": "v1.3.0"
+        },
+        "registered": true,
+        "active": true
+    },
+    "heatingkwh": {
+        "version": "v1.5.0",
+        "stage": {
+            "dev": "v1.5.0"
         },
         "registered": true,
         "active": true

From db5b795ce7425a5c357a87d22dab02e271d0130f Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 8 Oct 2024 14:32:04 +0000
Subject: [PATCH 07/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index f0b8dc9..eeb25e9 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.15.0",
         "stage": {
-            "dev": "v0.14.0"
+            "dev": "v0.15.0"
         },
         "registered": true,
         "active": true

From 037c1c9e5f38801c5c30ed4668824c033e54d958 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Wed, 9 Oct 2024 11:47:28 +0100
Subject: [PATCH 08/59] sap model with changed u values data

---
 .../src/pipeline/configs/scenarios.yaml       |  3 +-
 .../src/pipeline/configs/settings.yaml        |  3 +-
 modules/ml-pipeline/src/pipeline/dvc.lock     | 60 +++++++++----------
 3 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
index 98b00fd..19a8764 100644
--- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
@@ -5,6 +5,7 @@ default:
     scenario_data_filepaths:
       # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
-      - s3://retrofit-data-dev/scenario_data/08-10-2024-15-07-33/recommendations_scoring_data.parquet
+      # - s3://retrofit-data-dev/scenario_data/08-10-2024-15-07-33/recommendations_scoring_data.parquet
+      - s3://retrofit-data-dev/scenario_data/08-10-2024-22-18-44/recommendations_scoring_data.parquet
     comparison_output_filepath: ./metrics/scenario_table.md
     metrics_output_filepath: ./metrics/scenario_metrics.md
diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
index edaecba..4022a1b 100644
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@@ -19,7 +19,8 @@ default:
     input_dataclient_type: aws-s3
     output_dataclient_type: local
     # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
-    data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
+    data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
     train_proportion: 0.9
     output_train_filepath: ./data/prepared_data/train.parquet
     output_test_filepath: ./data/prepared_data/test.parquet
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index bd91a75..910ab1c 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -46,7 +46,7 @@ stages:
         default.feature_processor.feature_processor_config.target: sap_ending
         default.feature_processor.feature_processor_type: dataframe
         default.prepare_data.data_filepath:
-          s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
+          s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
         default.prepare_data.input_dataclient_type: aws-s3
         default.prepare_data.output_dataclient_type: local
         default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
@@ -55,8 +55,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
-      size: 49665833
+      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
+      size: 49655735
       nfiles: 2
   build_model:
     cmd: python 2_build_model.py
@@ -67,8 +67,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
-      size: 49665833
+      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
+      size: 49655735
       nfiles: 2
     params:
       configs/build_model.yaml:
@@ -100,18 +100,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: 9dafd875d76676a06890af9be8778567.dir
-      size: 3617506
+      md5: a1c97fa17adae60ee97696c95cfebe7e.dir
+      size: 3618488
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 36fb8323508e968a297d650954ccbc58.dir
-      size: 800833479
-      nfiles: 36
+      md5: 7e6f38163a2cfe208ace26702ae28793.dir
+      size: 762167138
+      nfiles: 35
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: e2bc34ae26afbb854f0b021d12b1b569
-      size: 223
+      md5: 8a6db7d72f9b4b8a5e411beaa3a2a413
+      size: 224
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -121,13 +121,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 36fb8323508e968a297d650954ccbc58.dir
-      size: 800833479
-      nfiles: 36
+      md5: 7e6f38163a2cfe208ace26702ae28793.dir
+      size: 762167138
+      nfiles: 35
     - path: data/prepared_data
       hash: md5
-      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
-      size: 49665833
+      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
+      size: 49655735
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -139,8 +139,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 8f4de36de171006cb6d9eacff7718ba3.dir
-      size: 508400
+      md5: 14729e943275748a19e86cf47d6561ee.dir
+      size: 508468
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -151,13 +151,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 8f4de36de171006cb6d9eacff7718ba3.dir
-      size: 508400
+      md5: 14729e943275748a19e86cf47d6561ee.dir
+      size: 508468
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 8ce9c629a531d54fa9b0bfc305391400.dir
-      size: 49665833
+      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
+      size: 49655735
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -167,8 +167,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 58e25eb5e1699dbb33e78af9ba4c2964
-      size: 222
+      md5: e076cba5f5737fd6d95931931411fdd0
+      size: 225
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -182,15 +182,15 @@ stages:
           input_dataclient_type: aws-s3
           output_dataclient_type: local
           scenario_data_filepaths:
-          - s3://retrofit-data-dev/scenario_data/08-10-2024-15-07-33/recommendations_scoring_data.parquet
+          - s3://retrofit-data-dev/scenario_data/08-10-2024-22-18-44/recommendations_scoring_data.parquet
           comparison_output_filepath: ./metrics/scenario_table.md
           metrics_output_filepath: ./metrics/scenario_metrics.md
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 746ffa7d10fcdceaf2330d07c7ee623b
-      size: 363
+      md5: 3e1047665bc18e92bf0f4216b9cc0c88
+      size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 3b7dfc36054bce065f849423abf31a5d
-      size: 969
+      md5: 71d4052f1ab986253bce9268644cc421
+      size: 872

From 5166493eda6a4d9b1e1c6e8c8b7876f141521f2b Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Wed, 9 Oct 2024 16:25:04 +0100
Subject: [PATCH 09/59] model with no floor_thermal starting and ending

---
 .../src/pipeline/configs/settings.yaml        |  4 +-
 modules/ml-pipeline/src/pipeline/dvc.lock     | 50 ++++++++++---------
 2 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
index 4022a1b..483fdd5 100644
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@@ -36,7 +36,9 @@ default:
       drop_columns: [
         "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending",
         'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
-        'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting', 'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending',]
+        'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting',
+        'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending',
+        'floor_thermal_transmittance', 'floor_thermal_transmittance_ending']
       retain_features: null
       # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
       #  'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 910ab1c..5e87957 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -40,6 +40,8 @@ stages:
         - heating_cost_ending
         - hot_water_cost_starting
         - hot_water_cost_ending
+        - floor_thermal_transmittance
+        - floor_thermal_transmittance_ending
         default.feature_processor.feature_processor_config.retain_features:
         default.feature_processor.feature_processor_config.subsample_amount:
         default.feature_processor.feature_processor_config.subsample_seed: 0
@@ -55,8 +57,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
-      size: 49655735
+      md5: 9159a400187e6d65687b5e411a4cb0de.dir
+      size: 48034631
       nfiles: 2
   build_model:
     cmd: python 2_build_model.py
@@ -67,8 +69,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
-      size: 49655735
+      md5: 9159a400187e6d65687b5e411a4cb0de.dir
+      size: 48034631
       nfiles: 2
     params:
       configs/build_model.yaml:
@@ -100,18 +102,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: a1c97fa17adae60ee97696c95cfebe7e.dir
-      size: 3618488
+      md5: 6ac50c46e6fd740ccf76da4c2bf6735d.dir
+      size: 3615441
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 7e6f38163a2cfe208ace26702ae28793.dir
-      size: 762167138
+      md5: 2212643103819177f58da1d3063c8c94.dir
+      size: 761489901
       nfiles: 35
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 8a6db7d72f9b4b8a5e411beaa3a2a413
-      size: 224
+      md5: d379cf95e07eb7c8797b4b766f8292cf
+      size: 225
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -121,13 +123,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 7e6f38163a2cfe208ace26702ae28793.dir
-      size: 762167138
+      md5: 2212643103819177f58da1d3063c8c94.dir
+      size: 761489901
       nfiles: 35
     - path: data/prepared_data
       hash: md5
-      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
-      size: 49655735
+      md5: 9159a400187e6d65687b5e411a4cb0de.dir
+      size: 48034631
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -139,8 +141,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 14729e943275748a19e86cf47d6561ee.dir
-      size: 508468
+      md5: e8bd8e8ba88a667ccea645890d348e62.dir
+      size: 507948
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -151,13 +153,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 14729e943275748a19e86cf47d6561ee.dir
-      size: 508468
+      md5: e8bd8e8ba88a667ccea645890d348e62.dir
+      size: 507948
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
-      size: 49655735
+      md5: 9159a400187e6d65687b5e411a4cb0de.dir
+      size: 48034631
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -167,8 +169,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: e076cba5f5737fd6d95931931411fdd0
-      size: 225
+      md5: a8cf405272776730f5818d50b20c6f43
+      size: 222
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -188,9 +190,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 3e1047665bc18e92bf0f4216b9cc0c88
+      md5: b110d6cb700b124220bf5cbc3c69f342
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 71d4052f1ab986253bce9268644cc421
+      md5: ab2b7b4b251c9c7eb72df21f4d0e02bc
       size: 872

From dbcb839be8510a169cc9e890b61c3f52face4ecb Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Wed, 9 Oct 2024 21:47:15 +0100
Subject: [PATCH 10/59] add new scenario data

---
 modules/ml-pipeline/src/pipeline/configs/scenarios.yaml | 3 ++-
 modules/ml-pipeline/src/pipeline/dvc.lock               | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
index 19a8764..6e60e8b 100644
--- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
@@ -6,6 +6,7 @@ default:
       # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
       # - s3://retrofit-data-dev/scenario_data/08-10-2024-15-07-33/recommendations_scoring_data.parquet
-      - s3://retrofit-data-dev/scenario_data/08-10-2024-22-18-44/recommendations_scoring_data.parquet
+      # - s3://retrofit-data-dev/scenario_data/08-10-2024-22-18-44/recommendations_scoring_data.parquet
+      - s3://retrofit-data-dev/scenario_data/09-10-2024-18-21-08/recommendations_scoring_data.parquet
     comparison_output_filepath: ./metrics/scenario_table.md
     metrics_output_filepath: ./metrics/scenario_metrics.md
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 5e87957..e37c23b 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -184,15 +184,15 @@ stages:
           input_dataclient_type: aws-s3
           output_dataclient_type: local
           scenario_data_filepaths:
-          - s3://retrofit-data-dev/scenario_data/08-10-2024-22-18-44/recommendations_scoring_data.parquet
+          - s3://retrofit-data-dev/scenario_data/09-10-2024-18-21-08/recommendations_scoring_data.parquet
           comparison_output_filepath: ./metrics/scenario_table.md
           metrics_output_filepath: ./metrics/scenario_metrics.md
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: b110d6cb700b124220bf5cbc3c69f342
+      md5: a5d9c42d38ef50e4fdf99a3e6043af2a
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: ab2b7b4b251c9c7eb72df21f4d0e02bc
+      md5: 3e48c953451af8852572299b66988910
       size: 872

From 5a67ba1e151743b6f75fccebfc4a28b5ba49ae0b Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Wed, 16 Oct 2024 16:03:02 +0000
Subject: [PATCH 11/59] Update Registry

---
 MODEL_REGISTRY.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index eeb25e9..4c42409 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.15.0",
+        "version": "v0.16.0",
         "stage": {
             "dev": "v0.15.0"
         },
@@ -16,17 +16,17 @@
         "active": true
     },
     "heat": {
-        "version": "v0.6.0",
+        "version": "v0.7.0",
         "stage": {
-            "dev": "v0.6.0"
+            "dev": "v0.7.0"
         },
         "registered": true,
         "active": true
     },
     "carbon": {
-        "version": "v0.6.0",
+        "version": "v0.7.0",
         "stage": {
-            "dev": "v0.6.0"
+            "dev": "v0.7.0"
         },
         "registered": true,
         "active": true

From ce1ebb6174b39ec90afdf1516e04387c1ddd48fb Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Wed, 16 Oct 2024 16:03:39 +0000
Subject: [PATCH 12/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 4c42409..1925c6e 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.16.0",
         "stage": {
-            "dev": "v0.15.0"
+            "dev": "v0.16.0"
         },
         "registered": true,
         "active": true

From 88c5b6c93a7039298f8bf4da771b29301c0e42c6 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 15 Sep 2025 14:56:53 +0100
Subject: [PATCH 13/59] add new model, with latest data

---
 .../src/pipeline/configs/settings.yaml        |  5 +-
 modules/ml-pipeline/src/pipeline/dvc.lock     | 56 ++++++++++---------
 2 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
index 483fdd5..34e03fb 100644
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@@ -20,7 +20,8 @@ default:
     output_dataclient_type: local
     # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
     # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
-    data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
+    data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
     train_proportion: 0.9
     output_train_filepath: ./data/prepared_data/train.parquet
     output_test_filepath: ./data/prepared_data/test.parquet
@@ -38,7 +39,7 @@ default:
         'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
         'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting',
         'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending',
-        'floor_thermal_transmittance', 'floor_thermal_transmittance_ending']
+        'floor_thermal_transmittance', 'floor_thermal_transmittance_ending', 'lodgement_date_starting', 'lodgement_date_ending',]
       retain_features: null
       # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
       #  'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index e37c23b..16b7b07 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -42,13 +42,15 @@ stages:
         - hot_water_cost_ending
         - floor_thermal_transmittance
         - floor_thermal_transmittance_ending
+        - lodgement_date_starting
+        - lodgement_date_ending
         default.feature_processor.feature_processor_config.retain_features:
         default.feature_processor.feature_processor_config.subsample_amount:
         default.feature_processor.feature_processor_config.subsample_seed: 0
         default.feature_processor.feature_processor_config.target: sap_ending
         default.feature_processor.feature_processor_type: dataframe
-        default.prepare_data.data_filepath:
-          s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
+        default.prepare_data.data_filepath: 
+          s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
         default.prepare_data.input_dataclient_type: aws-s3
         default.prepare_data.output_dataclient_type: local
         default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
@@ -57,8 +59,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 9159a400187e6d65687b5e411a4cb0de.dir
-      size: 48034631
+      md5: 7cc090d55cb296ce5d360d655c06e861.dir
+      size: 46183314
       nfiles: 2
   build_model:
     cmd: python 2_build_model.py
@@ -69,8 +71,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 9159a400187e6d65687b5e411a4cb0de.dir
-      size: 48034631
+      md5: 7cc090d55cb296ce5d360d655c06e861.dir
+      size: 46183314
       nfiles: 2
     params:
       configs/build_model.yaml:
@@ -102,17 +104,17 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: 6ac50c46e6fd740ccf76da4c2bf6735d.dir
-      size: 3615441
+      md5: a6196bf08607c43ba6bc637611bb32b0.dir
+      size: 3491001
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 2212643103819177f58da1d3063c8c94.dir
-      size: 761489901
-      nfiles: 35
+      md5: b225d7b01356cecefb3794a9a3cd19b5.dir
+      size: 790430916
+      nfiles: 36
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: d379cf95e07eb7c8797b4b766f8292cf
+      md5: 33421d5e3a2d569dbe6d4486c568a2b7
       size: 225
   generate_predictions:
     cmd: python 3_generate_predictions.py
@@ -123,13 +125,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 2212643103819177f58da1d3063c8c94.dir
-      size: 761489901
-      nfiles: 35
+      md5: b225d7b01356cecefb3794a9a3cd19b5.dir
+      size: 790430916
+      nfiles: 36
     - path: data/prepared_data
       hash: md5
-      md5: 9159a400187e6d65687b5e411a4cb0de.dir
-      size: 48034631
+      md5: 7cc090d55cb296ce5d360d655c06e861.dir
+      size: 46183314
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -141,8 +143,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: e8bd8e8ba88a667ccea645890d348e62.dir
-      size: 507948
+      md5: bd6821db9abc95af8c74aa20effd7f37.dir
+      size: 487194
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -153,13 +155,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: e8bd8e8ba88a667ccea645890d348e62.dir
-      size: 507948
+      md5: bd6821db9abc95af8c74aa20effd7f37.dir
+      size: 487194
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 9159a400187e6d65687b5e411a4cb0de.dir
-      size: 48034631
+      md5: 7cc090d55cb296ce5d360d655c06e861.dir
+      size: 46183314
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -169,8 +171,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: a8cf405272776730f5818d50b20c6f43
-      size: 222
+      md5: 9c2a7802554f5c2f750b2242c6003026
+      size: 223
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -190,9 +192,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: a5d9c42d38ef50e4fdf99a3e6043af2a
+      md5: c01524a0cc2e61151c106d7049af3bf9
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 3e48c953451af8852572299b66988910
+      md5: a995c8ef7ffbe2ca254441150817ae21
       size: 872

From 7b001f3abf8ff3a6ebac60552ac2ec853fcb0464 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 11:37:08 +0000
Subject: [PATCH 14/59] update to python 3.12 and autogluon 1.3

---
 modules/ml-pipeline/Makefile                  | 17 +++--
 .../src/pipeline/5_generate_scenarios.py      |  6 ++
 .../src/pipeline/configs/build_model.yaml     |  3 +-
 .../src/pipeline/configs/settings.yaml        |  3 +-
 .../ml-pipeline/src/pipeline/core/MLModels.py |  4 +-
 modules/ml-pipeline/src/pipeline/dvc.lock     | 73 ++++++++++---------
 .../src/pipeline/metrics/.gitignore           |  2 -
 .../src/pipeline/metrics/fit_metrics.json     |  1 +
 .../src/pipeline/metrics/metrics.json         |  1 +
 .../predictions/requirements-dev.txt          | 14 ++--
 .../requirements/predictions/requirements.txt | 14 ++--
 .../training/requirements-dev.txt             | 20 ++---
 .../requirements/training/requirements.txt    |  8 +-
 13 files changed, 92 insertions(+), 74 deletions(-)
 create mode 100644 modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json
 create mode 100644 modules/ml-pipeline/src/pipeline/metrics/metrics.json

diff --git a/modules/ml-pipeline/Makefile b/modules/ml-pipeline/Makefile
index 0bef7d6..ff83c27 100644
--- a/modules/ml-pipeline/Makefile
+++ b/modules/ml-pipeline/Makefile
@@ -1,7 +1,8 @@
 export PYENV_ROOT=$(HOME)/.pyenv
 export PATH := $(PYENV_ROOT)/bin:$(PATH)
-PYTHON_VERSION ?= 3.10.12
-CONDA_ENV=dev_env_pipeline
+PYTHON_VERSION ?= 3.12.12
+CONDA_ENV=dev_env_pipeline_1
+CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
 
 .PHONY: init
 init: dev-conda
@@ -12,11 +13,13 @@ dev-conda:
 	# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
 	conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
 	conda init bash
-	conda run -v -n ${CONDA_ENV} pip install --upgrade pip
-	conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/training/requirements-dev.txt
-	conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt
-	conda run -v -n ${CONDA_ENV} pre-commit install
-	conda run -v -n ${CONDA_ENV} pip install ipykernel
+	${CONDA_ACTIVATE} ${CONDA_ENV} && \
+		which pip && \
+		pip install --upgrade pip && \
+		pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
+		pip install -r src/pipeline/requirements/version_control/requirements.txt && \
+		pre-commit install && \
+		pip install ipykernel
 	echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
 	echo "conda activate ${CONDA_ENV}"
 
diff --git a/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py b/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py
index 6debe32..faab4a9 100644
--- a/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py
+++ b/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py
@@ -99,6 +99,12 @@ def generate_scenario_predictions(
             ]
         )
 
+    # TEMPORARY FIX: ADD is_post_sap10_starting and is_post_sap10_ending if not present
+    if "is_post_sap10_starting" not in scenario_data.columns:
+        scenario_data["is_post_sap10_starting"] = False
+    if "is_post_sap10_ending" not in scenario_data.columns:
+        scenario_data["is_post_sap10_ending"] = False
+
     logger.info("--- Loading Model ---")
 
     model.load_model(model_filepath)
diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index a36bfbc..38c0910 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -17,6 +17,7 @@ default:
       time_limit: 1800
       presets: medium_quality
       excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
-      infer_limit: 0.05
+      infer_limit: 0.0005
       infer_limit_batch_size: 10000
+      "fit_strategy": "parallel"
       ag_args_ensemble: {'num_folds_parallel': 2}
diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
index 34e03fb..28d5cd9 100644
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@@ -21,7 +21,8 @@ default:
     # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
     # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
     # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
-    data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
+    data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
     train_proportion: 0.9
     output_train_filepath: ./data/prepared_data/train.parquet
     output_test_filepath: ./data/prepared_data/test.parquet
diff --git a/modules/ml-pipeline/src/pipeline/core/MLModels.py b/modules/ml-pipeline/src/pipeline/core/MLModels.py
index 257261d..437c69f 100644
--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@@ -1,4 +1,4 @@
-""""
+""" "
 Implementations of MLModels, all of which will have four methods to:
 - Load model
 - Save Model
@@ -152,6 +152,7 @@ class AutogluonAutoML:
         "infer_limit",
         "infer_limit_batch_size",
         "ag_args_ensemble",
+        "fit_strategy",
     ]
 
     def load_model(self, path: Union[Path, str]) -> None:
@@ -209,6 +210,7 @@ class AutogluonAutoML:
             infer_limit=model_hyperparameters["infer_limit"],
             infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
             ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
+            fit_strategy=model_hyperparameters["fit_strategy"],
         )
 
     def predict(
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 16b7b07..5502f03 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -49,18 +49,20 @@ stages:
         default.feature_processor.feature_processor_config.subsample_seed: 0
         default.feature_processor.feature_processor_config.target: sap_ending
         default.feature_processor.feature_processor_type: dataframe
-        default.prepare_data.data_filepath: 
-          s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
+        default.prepare_data.data_filepath:
+          s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
         default.prepare_data.input_dataclient_type: aws-s3
         default.prepare_data.output_dataclient_type: local
-        default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
-        default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
+        default.prepare_data.output_test_filepath:
+          ./data/prepared_data/test.parquet
+        default.prepare_data.output_train_filepath:
+          ./data/prepared_data/train.parquet
         default.prepare_data.train_proportion: 0.9
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 7cc090d55cb296ce5d360d655c06e861.dir
-      size: 46183314
+      md5: 5c56787d9e6450e26a78c15700e104c7.dir
+      size: 45746089
       nfiles: 2
   build_model:
     cmd: python 2_build_model.py
@@ -71,8 +73,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 7cc090d55cb296ce5d360d655c06e861.dir
-      size: 46183314
+      md5: 5c56787d9e6450e26a78c15700e104c7.dir
+      size: 45746089
       nfiles: 2
     params:
       configs/build_model.yaml:
@@ -97,25 +99,26 @@ stages:
               - NN_TORCH
               - KNN
               - XT
-              infer_limit: 0.05
+              infer_limit: 0.0005
               infer_limit_batch_size: 10000
+              fit_strategy: parallel
               ag_args_ensemble:
                 num_folds_parallel: 2
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: a6196bf08607c43ba6bc637611bb32b0.dir
-      size: 3491001
+      md5: 4fa77e3f129d2e6f9ef7222c44978c26.dir
+      size: 3474669
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: b225d7b01356cecefb3794a9a3cd19b5.dir
-      size: 790430916
-      nfiles: 36
+      md5: e27b9216bc7455f8245d5b49f27b2707.dir
+      size: 753575768
+      nfiles: 30
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 33421d5e3a2d569dbe6d4486c568a2b7
-      size: 225
+      md5: 426a162284ca9e29c043eb1d72e547e6
+      size: 224
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -125,26 +128,28 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: b225d7b01356cecefb3794a9a3cd19b5.dir
-      size: 790430916
-      nfiles: 36
+      md5: e27b9216bc7455f8245d5b49f27b2707.dir
+      size: 753575768
+      nfiles: 30
     - path: data/prepared_data
       hash: md5
-      md5: 7cc090d55cb296ce5d360d655c06e861.dir
-      size: 46183314
+      md5: 5c56787d9e6450e26a78c15700e104c7.dir
+      size: 45746089
       nfiles: 2
     params:
       configs/settings.yaml:
         default.generate_predictions.input_dataclient_type: local
         default.generate_predictions.output_dataclient_type: local
         default.generate_predictions.predictions_column_name: predictions
-        default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet
-        default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet
+        default.generate_predictions.predictions_output_filepath:
+          ./data/predictions/predictions.parquet
+        default.generate_predictions.test_data_filepath:
+          ./data/prepared_data/test.parquet
     outs:
     - path: data/predictions/
       hash: md5
-      md5: bd6821db9abc95af8c74aa20effd7f37.dir
-      size: 487194
+      md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
+      size: 484524
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -155,13 +160,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: bd6821db9abc95af8c74aa20effd7f37.dir
-      size: 487194
+      md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
+      size: 484524
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 7cc090d55cb296ce5d360d655c06e861.dir
-      size: 46183314
+      md5: 5c56787d9e6450e26a78c15700e104c7.dir
+      size: 45746089
       nfiles: 2
     params:
       configs/settings.yaml:
@@ -171,15 +176,15 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 9c2a7802554f5c2f750b2242c6003026
+      md5: b9ae6d24424f2d5389697577e9076b91
       size: 223
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
     - path: 5_generate_scenarios.py
       hash: md5
-      md5: 40506749fefd926d47c60ff5b16db307
-      size: 5337
+      md5: 872b0c762ce1c8933fcbc5f54d5d4b5d
+      size: 5658
     params:
       configs/scenarios.yaml:
         default.scenarios:
@@ -192,9 +197,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: c01524a0cc2e61151c106d7049af3bf9
+      md5: 32d78c20d91fedf2f5dbb4162f323e25
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: a995c8ef7ffbe2ca254441150817ae21
+      md5: 52cbd19566151b0c300f9673252704d2
       size: 872
diff --git a/modules/ml-pipeline/src/pipeline/metrics/.gitignore b/modules/ml-pipeline/src/pipeline/metrics/.gitignore
index 6427764..eaea051 100644
--- a/modules/ml-pipeline/src/pipeline/metrics/.gitignore
+++ b/modules/ml-pipeline/src/pipeline/metrics/.gitignore
@@ -1,4 +1,2 @@
-/fit_metrics.json
-/metrics.json
 /scenario_table.md
 /scenario_metrics.md
diff --git a/modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json b/modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json
new file mode 100644
index 0000000..d0311f6
--- /dev/null
+++ b/modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json
@@ -0,0 +1 @@
+{"mean_absolute_error": 1.2158480882644653, "median_absolute_error": 0.8539352416992188, "mean_squared_error": 3.116994857788086, "mean_absolute_percentage_error": 0.01968802697956562, "symmetric_mape": 0.019615056540152054}
diff --git a/modules/ml-pipeline/src/pipeline/metrics/metrics.json b/modules/ml-pipeline/src/pipeline/metrics/metrics.json
new file mode 100644
index 0000000..b824a27
--- /dev/null
+++ b/modules/ml-pipeline/src/pipeline/metrics/metrics.json
@@ -0,0 +1 @@
+{"mean_absolute_error": 2.121211290359497, "median_absolute_error": 1.3063621520996094, "mean_squared_error": 11.15064525604248, "mean_absolute_percentage_error": 0.03622421622276306, "symmetric_mape": 0.035541225671999285}
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
index 4dc4c36..a70ecf8 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
@@ -1,7 +1,7 @@
-joblib==1.3.2
-boto3==1.28.17
-pandas==2.1.4
-autogluon.tabular[all]==1.0.0
-dynaconf==3.2.1
-pyarrow==13.0.0
-pre-commit==3.3.3
+joblib==1.5.2
+boto3==1.40.61
+pandas==2.2.3
+autogluon.tabular[all]==1.3
+dynaconf==3.2.12
+pyarrow==22.0.0
+pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
index 35bdb05..b9aa74c 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
@@ -1,7 +1,7 @@
-joblib==1.3.2
-boto3==1.28.17
-pandas==2.1.4
-autogluon.tabular[all]==1.0.0
-dynaconf==3.2.1
-pyarrow==13.0.0
-PyYAML==6.0.1
+joblib==1.5.2
+boto3==1.40.61
+pandas==2.2.3
+autogluon.tabular[all]==1.3
+dynaconf==3.2.12
+pyarrow==22.0.0
+PyYAML==6.0.3
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
index 93a042e..1e59b59 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
@@ -1,10 +1,10 @@
-joblib==1.3.2
-boto3==1.28.17
-pandas==2.1.4
-autogluon.tabular[all]==1.0.0
-ray==2.6.3
-dynaconf==3.2.1
-alibi==0.9.5
-shap==0.42.1
-pyarrow==13.0.0
-pre-commit==3.3.3
+joblib==1.5.2
+boto3==1.40.61
+pandas==2.2.3
+autogluon.tabular[all]==1.3
+ray==2.44.1
+dynaconf==3.2.12
+alibi==0.5.5
+shap==0.49.1
+pyarrow==22.0.0
+pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
index edeb764..84455e8 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
@@ -1,4 +1,4 @@
-boto3==1.28.41
-pandas==2.1.4
-autogluon.tabular[all]==1.0.0
-dynaconf==3.2.1
+boto3==1.40.61
+pandas==2.2.3
+autogluon.tabular[all]==1.3
+dynaconf==3.2.12

From fcdf5228d65acee167d09bf673d36ec1a7bb1144 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 11:42:29 +0000
Subject: [PATCH 15/59] fix gitignore

---
 modules/ml-pipeline/src/pipeline/metrics/.gitignore       | 2 ++
 modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json | 1 -
 modules/ml-pipeline/src/pipeline/metrics/metrics.json     | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)
 delete mode 100644 modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json
 delete mode 100644 modules/ml-pipeline/src/pipeline/metrics/metrics.json

diff --git a/modules/ml-pipeline/src/pipeline/metrics/.gitignore b/modules/ml-pipeline/src/pipeline/metrics/.gitignore
index eaea051..a19c4d2 100644
--- a/modules/ml-pipeline/src/pipeline/metrics/.gitignore
+++ b/modules/ml-pipeline/src/pipeline/metrics/.gitignore
@@ -1,2 +1,4 @@
 /scenario_table.md
 /scenario_metrics.md
+/metrics.json
+/fit_metrics.json
diff --git a/modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json b/modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json
deleted file mode 100644
index d0311f6..0000000
--- a/modules/ml-pipeline/src/pipeline/metrics/fit_metrics.json
+++ /dev/null
@@ -1 +0,0 @@
-{"mean_absolute_error": 1.2158480882644653, "median_absolute_error": 0.8539352416992188, "mean_squared_error": 3.116994857788086, "mean_absolute_percentage_error": 0.01968802697956562, "symmetric_mape": 0.019615056540152054}
diff --git a/modules/ml-pipeline/src/pipeline/metrics/metrics.json b/modules/ml-pipeline/src/pipeline/metrics/metrics.json
deleted file mode 100644
index b824a27..0000000
--- a/modules/ml-pipeline/src/pipeline/metrics/metrics.json
+++ /dev/null
@@ -1 +0,0 @@
-{"mean_absolute_error": 2.121211290359497, "median_absolute_error": 1.3063621520996094, "mean_squared_error": 11.15064525604248, "mean_absolute_percentage_error": 0.03622421622276306, "symmetric_mape": 0.035541225671999285}

From a083934ffbadd787d3e1d151bfc53ff5cf606f01 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 11:53:44 +0000
Subject: [PATCH 16/59] changed docker images python version and add github
 action

---
 .github/workflows/MLPipelinePullRequest.yml   | 31 +++++++++++++++++++
 deployment/Dockerfile.prediction.lambda       |  2 +-
 modules/ml-pipeline/src/Prediction.Dockerfile |  2 +-
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index 451b0a8..8cbab20 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -32,6 +32,37 @@ jobs:
   #       echo "Please choose one of these tags: 'major', 'major', 'patch'"
   #       exit(1)
 
+  Verify-Lambda:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Install packages to retrieve artifacts
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+      run: |
+        pip install --upgrade pip
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+
+    - name: Retrieve artifacts (dvc.lock)
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+      run: |
+        cd modules/ml-pipeline/src/pipeline
+        dvc pull -r experiments
+
+    - name: Build Lambda docker Image
+      run: |
+        cd deployment/
+        docker build . --file Dockerfile.prediction.lambda --tag lambda_test
+
+    - name: Run lambda docker container
+      run: |
+        docker run lambda_test
+
   Verify-Model:
 
     runs-on: ubuntu-latest
diff --git a/deployment/Dockerfile.prediction.lambda b/deployment/Dockerfile.prediction.lambda
index 9ee4306..22acf57 100644
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@@ -1,4 +1,4 @@
-FROM public.ecr.aws/lambda/python:3.10
+FROM public.ecr.aws/lambda/python:3.12
 
 # Set the working directory
 WORKDIR ${LAMBDA_TASK_ROOT}
diff --git a/modules/ml-pipeline/src/Prediction.Dockerfile b/modules/ml-pipeline/src/Prediction.Dockerfile
index deb8c45..c026ac2 100644
--- a/modules/ml-pipeline/src/Prediction.Dockerfile
+++ b/modules/ml-pipeline/src/Prediction.Dockerfile
@@ -1,5 +1,5 @@
 # Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
-FROM python:3.10.12-slim
+FROM python:3.12.12-slim
 
 RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev
 

From d4b70ecc7c3f0e75d5847f72de020235c6a49485 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 12:00:55 +0000
Subject: [PATCH 17/59] adjust build location

---
 .github/workflows/MLPipelinePullRequest.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index 8cbab20..94dc17e 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -56,8 +56,7 @@ jobs:
 
     - name: Build Lambda docker Image
       run: |
-        cd deployment/
-        docker build . --file Dockerfile.prediction.lambda --tag lambda_test
+        docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
 
     - name: Run lambda docker container
       run: |

From ab3b2bb1d0949e686122428ce334d4d5ed0d8e9b Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 12:03:24 +0000
Subject: [PATCH 18/59] adjust change yum to dnf

---
 deployment/Dockerfile.prediction.lambda | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deployment/Dockerfile.prediction.lambda b/deployment/Dockerfile.prediction.lambda
index 22acf57..ff4d5d0 100644
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@@ -2,14 +2,14 @@ FROM public.ecr.aws/lambda/python:3.12
 
 # Set the working directory
 WORKDIR ${LAMBDA_TASK_ROOT}
-ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
+ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
 
 # Environment variables
 ARG RUNTIME_ENVIRONMENT
 ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
 
 # Install necessary build tools - required to test locally
-RUN yum install -y gcc python3-devel gcc-c++
+RUN dnf install -y gcc python3-devel gcc-c++
 
 # Install python packages
 COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt

From e04f6125e0db47ec8d37761e1d281f8f6bef2a8c Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 13:11:33 +0000
Subject: [PATCH 19/59] add single row dataset for testing

---
 .github/workflows/MLPipelinePullRequest.yml   | 31 ++++++++-
 .../src/pipeline/1_prepare_data.py            |  5 ++
 .../src/pipeline/configs/build_model.yaml     |  2 +-
 .../src/pipeline/configs/settings.yaml        |  1 +
 modules/ml-pipeline/src/pipeline/dvc.lock     | 68 +++++++++----------
 .../src/pipeline/metrics/.gitignore           |  4 +-
 6 files changed, 72 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index 94dc17e..30641cf 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -54,13 +54,40 @@ jobs:
         cd modules/ml-pipeline/src/pipeline
         dvc pull -r experiments
 
+    - name: Set timestamp
+      id: set_timestamp
+      run: |
+        echo "timestamp=$(date +%Y%m%d)" >> $GITHUB_ENV
+        echo "Generated timestamp: $timestamp"
+
+    - name: Upload sample row dataset to S3
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+      run: |
+        cd modules/ml-pipeline/src/pipeline/data/prepared_data/
+        aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}_sample_test.parquet
+
     - name: Build Lambda docker Image
       run: |
         docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
 
-    - name: Run lambda docker container
+    - name: Remove uploaded sample row dataset from S3
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
       run: |
-        docker run lambda_test
+        aws s3 rm s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}_sample_test.parquet
+    # - name: Run lambda docker container
+    #   env:
+    #     AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+    #     AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+    #   run: |
+    #     docker run -p 9000:8080 \
+    #       -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
+    #       -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
+    #       -e RUNTIME_ENVIRONMENT=dev \
+    #       -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
 
   Verify-Model:
 
diff --git a/modules/ml-pipeline/src/pipeline/1_prepare_data.py b/modules/ml-pipeline/src/pipeline/1_prepare_data.py
index 75d784f..6b4ab84 100644
--- a/modules/ml-pipeline/src/pipeline/1_prepare_data.py
+++ b/modules/ml-pipeline/src/pipeline/1_prepare_data.py
@@ -29,6 +29,7 @@ data_filepath = prepare_data_params["data_filepath"]
 train_proportion = prepare_data_params["train_proportion"]
 output_train_filepath = prepare_data_params["output_train_filepath"]
 output_test_filepath = prepare_data_params["output_test_filepath"]
+sample_test_filepath = prepare_data_params["sample_test_filepath"]
 feature_processor_config = feature_process_params["feature_processor_config"]
 
 logger.info(f"--- Initiate DataClient ---")
@@ -99,6 +100,10 @@ def prepare_data(
 
     logger.info("--- Outputting data ---")
 
+    output_dataclient.save_data(
+        obj=data.sample(1), location=sample_test_filepath, save_config=None
+    )
+
     output_dataclient.save_data(
         obj=train, location=output_train_filepath, save_config=None
     )
diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index 38c0910..69349ba 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -14,7 +14,7 @@ default:
       output_filepath: ./data/model/allmodels/
       problem_type: regression
       eval_metric: mean_squared_error #mean_absolute_error
-      time_limit: 1800
+      time_limit: 180
       presets: medium_quality
       excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
       infer_limit: 0.0005
diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
index 28d5cd9..a6b493e 100644
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@@ -26,6 +26,7 @@ default:
     train_proportion: 0.9
     output_train_filepath: ./data/prepared_data/train.parquet
     output_test_filepath: ./data/prepared_data/test.parquet
+    sample_test_filepath: ./data/prepared_data/sample_test.parquet
 
   feature_processor:
     feature_processor_type: dataframe
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 5502f03..1a3143a 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -16,8 +16,8 @@ stages:
     deps:
     - path: 1_prepare_data.py
       hash: md5
-      md5: 11a3b8bfdfe199ab7ecc39ccc5652649
-      size: 4298
+      md5: a5ce162e1c402c0f811a80ef78cf4dd5
+      size: 4481
     params:
       configs/settings.yaml:
         default.feature_processor.feature_processor_config.drop_columns:
@@ -61,9 +61,9 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 5c56787d9e6450e26a78c15700e104c7.dir
-      size: 45746089
-      nfiles: 2
+      md5: 02b2c25e488f75c4a676540c127b8930.dir
+      size: 45890160
+      nfiles: 3
   build_model:
     cmd: python 2_build_model.py
     deps:
@@ -73,9 +73,9 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 5c56787d9e6450e26a78c15700e104c7.dir
-      size: 45746089
-      nfiles: 2
+      md5: 02b2c25e488f75c4a676540c127b8930.dir
+      size: 45890160
+      nfiles: 3
     params:
       configs/build_model.yaml:
         default:
@@ -91,7 +91,7 @@ stages:
               output_filepath: ./data/model/allmodels/
               problem_type: regression
               eval_metric: mean_squared_error
-              time_limit: 1800
+              time_limit: 180
               presets: medium_quality
               excluded_model_types:
               - RF
@@ -107,18 +107,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: 4fa77e3f129d2e6f9ef7222c44978c26.dir
-      size: 3474669
+      md5: 7f9a534daf824434262bee89e2ee2cfd.dir
+      size: 3475064
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: e27b9216bc7455f8245d5b49f27b2707.dir
-      size: 753575768
-      nfiles: 30
+      md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir
+      size: 414148418
+      nfiles: 24
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 426a162284ca9e29c043eb1d72e547e6
-      size: 224
+      md5: 7763f689b46c38ec8f0cc605deac4c2a
+      size: 221
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -128,14 +128,14 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: e27b9216bc7455f8245d5b49f27b2707.dir
-      size: 753575768
-      nfiles: 30
+      md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir
+      size: 414148418
+      nfiles: 24
     - path: data/prepared_data
       hash: md5
-      md5: 5c56787d9e6450e26a78c15700e104c7.dir
-      size: 45746089
-      nfiles: 2
+      md5: 02b2c25e488f75c4a676540c127b8930.dir
+      size: 45890160
+      nfiles: 3
     params:
       configs/settings.yaml:
         default.generate_predictions.input_dataclient_type: local
@@ -148,8 +148,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
-      size: 484524
+      md5: 2d9353f60e16d4f85dd4a08a71dce548.dir
+      size: 483856
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -160,14 +160,14 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
-      size: 484524
+      md5: 2d9353f60e16d4f85dd4a08a71dce548.dir
+      size: 483856
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 5c56787d9e6450e26a78c15700e104c7.dir
-      size: 45746089
-      nfiles: 2
+      md5: 02b2c25e488f75c4a676540c127b8930.dir
+      size: 45890160
+      nfiles: 3
     params:
       configs/settings.yaml:
         default.generate_metrics.dataclient_type: local
@@ -176,8 +176,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: b9ae6d24424f2d5389697577e9076b91
-      size: 223
+      md5: 8a52e3a0047c68b9de5c371a1d406f73
+      size: 224
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -197,9 +197,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 32d78c20d91fedf2f5dbb4162f323e25
-      size: 356
+      md5: 666f73f6fdb49484737f1a7edd798727
+      size: 363
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 52cbd19566151b0c300f9673252704d2
+      md5: 71c9fcb9ec304353aba0d7f5c58ca8b2
       size: 872
diff --git a/modules/ml-pipeline/src/pipeline/metrics/.gitignore b/modules/ml-pipeline/src/pipeline/metrics/.gitignore
index a19c4d2..6427764 100644
--- a/modules/ml-pipeline/src/pipeline/metrics/.gitignore
+++ b/modules/ml-pipeline/src/pipeline/metrics/.gitignore
@@ -1,4 +1,4 @@
+/fit_metrics.json
+/metrics.json
 /scenario_table.md
 /scenario_metrics.md
-/metrics.json
-/fit_metrics.json

From 96bfeb92f921116cde7d5dceb523aa2cdcb92bf9 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 13:24:54 +0000
Subject: [PATCH 20/59] test curling

---
 .github/workflows/MLPipelinePullRequest.yml | 41 ++++++++++++++-------
 deployment/handlers/prediction_app.py       |  9 ++++-
 2 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index 30641cf..c0576c3 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -58,7 +58,7 @@ jobs:
       id: set_timestamp
       run: |
         echo "timestamp=$(date +%Y%m%d)" >> $GITHUB_ENV
-        echo "Generated timestamp: $timestamp"
+        echo "Generated timestamp: ${timestamp}"
 
     - name: Upload sample row dataset to S3
       env:
@@ -66,28 +66,43 @@ jobs:
         AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
       run: |
         cd modules/ml-pipeline/src/pipeline/data/prepared_data/
-        aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}_sample_test.parquet
+        aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet
 
     - name: Build Lambda docker Image
       run: |
         docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
 
+    - name: Run lambda docker container
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+      run: |
+        docker run -d -p 9000:8080 \
+          -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
+          -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
+          -e RUNTIME_ENVIRONMENT=dev \
+          -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
+
+    - name: Test Lambda endpoint
+      run: |
+        sleep 5
+        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"cicd://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}')
+        echo "Lambda response: $RESPONSE"
+        if [[ $RESPONSE != *"predictions"* ]]; then
+          echo "Lambda invocation failed"
+          exit 1
+        fi
+
     - name: Remove uploaded sample row dataset from S3
       env:
         AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
       run: |
-        aws s3 rm s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}_sample_test.parquet
-    # - name: Run lambda docker container
-    #   env:
-    #     AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-    #     AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-    #   run: |
-    #     docker run -p 9000:8080 \
-    #       -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
-    #       -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
-    #       -e RUNTIME_ENVIRONMENT=dev \
-    #       -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
+        aws s3 rm --recursive s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/
+
+    - name: Stop Lambda container
+      run: |
+        docker stop lambda_test || echo "Container already stopped"
 
   Verify-Model:
 
diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index ac397b9..a0507bf 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -67,7 +67,14 @@ def handler(event, context):
 
         # TODO: Implement the loading of the model and prediction
 
-        storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
+        if body["file_location"].startswith("cicd://"):
+            storage_filepath = (
+                body["file_location"]
+                .replace("cicd://", "s3://")
+                .replace(".parquet", "_output.parquet")
+            )
+        else:
+            storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
 
         logger.info(f"--- Initiate MLModel ---")
 

From c3a7866df43dfb2441cd6685a17e0ae3786efb07 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 13:34:34 +0000
Subject: [PATCH 21/59] make changes to request body

---
 .github/workflows/MLPipelinePullRequest.yml | 3 ++-
 deployment/handlers/prediction_app.py       | 8 +++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index c0576c3..15aeefc 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -86,7 +86,7 @@ jobs:
     - name: Test Lambda endpoint
       run: |
         sleep 5
-        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"cicd://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}')
+        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": true}"}')
         echo "Lambda response: $RESPONSE"
         if [[ $RESPONSE != *"predictions"* ]]; then
           echo "Lambda invocation failed"
@@ -94,6 +94,7 @@ jobs:
         fi
 
     - name: Remove uploaded sample row dataset from S3
+      if: always()
       env:
         AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index a0507bf..25fa120 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -67,11 +67,9 @@ def handler(event, context):
 
         # TODO: Implement the loading of the model and prediction
 
-        if body["file_location"].startswith("cicd://"):
-            storage_filepath = (
-                body["file_location"]
-                .replace("cicd://", "s3://")
-                .replace(".parquet", "_output.parquet")
+        if "testing" in body:
+            storage_filepath = body["file_location"].replace(
+                ".parquet", "_output.parquet"
             )
         else:
             storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"

From 51f2c07b744f773a0204b9fdc96c9a7c3eb9f49d Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 13:46:57 +0000
Subject: [PATCH 22/59] add delay to deleting of s3 file

---
 .github/workflows/MLPipelinePullRequest.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index 15aeefc..e0a1d81 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -93,17 +93,19 @@ jobs:
           exit 1
         fi
 
+    - name: Stop Lambda container
+      run: |
+        docker stop lambda_test || echo "Container already stopped"
+
     - name: Remove uploaded sample row dataset from S3
       if: always()
       env:
         AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
       run: |
+        sleep 5
         aws s3 rm --recursive s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/
 
-    - name: Stop Lambda container
-      run: |
-        docker stop lambda_test || echo "Container already stopped"
 
   Verify-Model:
 

From ad26148bbc6430c57551fa59c004c43974bdf08f Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 13:54:16 +0000
Subject: [PATCH 23/59] test static folder

---
 .github/workflows/MLPipelinePullRequest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index e0a1d81..d08adfa 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -86,7 +86,7 @@ jobs:
     - name: Test Lambda endpoint
       run: |
         sleep 5
-        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": true}"}')
+        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/20251102/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": true}"}')
         echo "Lambda response: $RESPONSE"
         if [[ $RESPONSE != *"predictions"* ]]; then
           echo "Lambda invocation failed"

From 56eace2b7f855101c7f35ddab64c34d9c719485d Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 14:03:35 +0000
Subject: [PATCH 24/59] correct the quotations

---
 .github/workflows/MLPipelinePullRequest.yml | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index d08adfa..d426736 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -86,12 +86,12 @@ jobs:
     - name: Test Lambda endpoint
       run: |
         sleep 5
-        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/20251102/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": true}"}')
+        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": true}"}')
         echo "Lambda response: $RESPONSE"
-        if [[ $RESPONSE != *"predictions"* ]]; then
-          echo "Lambda invocation failed"
-          exit 1
-        fi
+
+    - name: Get Lambda logs
+      run: |
+        docker logs $(docker ps -al -q)
 
     - name: Stop Lambda container
       run: |
@@ -103,7 +103,6 @@ jobs:
         AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
       run: |
-        sleep 5
         aws s3 rm --recursive s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/
 
 

From 2c735737a8f2937df1bbfb3eb540aa6ee00cdd37 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 14:24:58 +0000
Subject: [PATCH 25/59] use correct escaping

---
 .github/workflows/MLPipelinePullRequest.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index d426736..f91fd28 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -86,8 +86,9 @@ jobs:
     - name: Test Lambda endpoint
       run: |
         sleep 5
-        RESPONSE=$(curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": true}"}')
-        echo "Lambda response: $RESPONSE"
+        curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
+          -H "Content-Type: application/json" \
+          -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
 
     - name: Get Lambda logs
       run: |

From 43aacd80bed83266551a0a13b0c5efe69811a3b5 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 17:26:40 +0000
Subject: [PATCH 26/59] update to autogluon 1.4

---
 deployment/Dockerfile.prediction.lambda       |  1 +
 modules/ml-pipeline/Makefile                  |  9 +--
 .../src/pipeline/configs/build_model.yaml     |  2 +-
 modules/ml-pipeline/src/pipeline/dvc.lock     | 56 +++++++++----------
 .../predictions/requirements-dev.txt          |  2 +-
 .../requirements/predictions/requirements.txt |  2 +-
 .../training/requirements-dev.txt             |  4 +-
 .../requirements/training/requirements.txt    |  2 +-
 8 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/deployment/Dockerfile.prediction.lambda b/deployment/Dockerfile.prediction.lambda
index ff4d5d0..ca21a8e 100644
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@@ -3,6 +3,7 @@ FROM public.ecr.aws/lambda/python:3.12
 # Set the working directory
 WORKDIR ${LAMBDA_TASK_ROOT}
 ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
+ENV MPLCONFIGDIR="${LAMBDA_TASK_ROOT}/tmp/matplotlib"
 
 # Environment variables
 ARG RUNTIME_ENVIRONMENT
diff --git a/modules/ml-pipeline/Makefile b/modules/ml-pipeline/Makefile
index ff83c27..09f9f15 100644
--- a/modules/ml-pipeline/Makefile
+++ b/modules/ml-pipeline/Makefile
@@ -1,7 +1,7 @@
 export PYENV_ROOT=$(HOME)/.pyenv
 export PATH := $(PYENV_ROOT)/bin:$(PATH)
 PYTHON_VERSION ?= 3.12.12
-CONDA_ENV=dev_env_pipeline_1
+CONDA_ENV=dev_env_pipeline
 CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
 
 .PHONY: init
@@ -16,10 +16,11 @@ dev-conda:
 	${CONDA_ACTIVATE} ${CONDA_ENV} && \
 		which pip && \
 		pip install --upgrade pip && \
-		pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
-		pip install -r src/pipeline/requirements/version_control/requirements.txt && \
+		pip install uv && \
+		uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
+		uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \
 		pre-commit install && \
-		pip install ipykernel
+		uv pip install ipykernel
 	echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
 	echo "conda activate ${CONDA_ENV}"
 
diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index 69349ba..38c0910 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -14,7 +14,7 @@ default:
       output_filepath: ./data/model/allmodels/
       problem_type: regression
       eval_metric: mean_squared_error #mean_absolute_error
-      time_limit: 180
+      time_limit: 1800
       presets: medium_quality
       excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
       infer_limit: 0.0005
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 1a3143a..f05f185 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -61,8 +61,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 02b2c25e488f75c4a676540c127b8930.dir
-      size: 45890160
+      md5: 2feba8772c240b507eb900934efcb8ca.dir
+      size: 46064555
       nfiles: 3
   build_model:
     cmd: python 2_build_model.py
@@ -73,8 +73,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 02b2c25e488f75c4a676540c127b8930.dir
-      size: 45890160
+      md5: 2feba8772c240b507eb900934efcb8ca.dir
+      size: 46064555
       nfiles: 3
     params:
       configs/build_model.yaml:
@@ -91,7 +91,7 @@ stages:
               output_filepath: ./data/model/allmodels/
               problem_type: regression
               eval_metric: mean_squared_error
-              time_limit: 180
+              time_limit: 1800
               presets: medium_quality
               excluded_model_types:
               - RF
@@ -107,18 +107,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: 7f9a534daf824434262bee89e2ee2cfd.dir
-      size: 3475064
+      md5: 29036f4f42b1fdcab7f9e40a87f38a8c.dir
+      size: 3474783
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir
-      size: 414148418
-      nfiles: 24
+      md5: 77cab231e3d51bbebbae5a7af310c18a.dir
+      size: 791390619
+      nfiles: 34
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 7763f689b46c38ec8f0cc605deac4c2a
-      size: 221
+      md5: 4f39064fb6b31c7c879299621bcea28d
+      size: 224
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -128,13 +128,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir
-      size: 414148418
-      nfiles: 24
+      md5: 77cab231e3d51bbebbae5a7af310c18a.dir
+      size: 791390619
+      nfiles: 34
     - path: data/prepared_data
       hash: md5
-      md5: 02b2c25e488f75c4a676540c127b8930.dir
-      size: 45890160
+      md5: 2feba8772c240b507eb900934efcb8ca.dir
+      size: 46064555
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -148,8 +148,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 2d9353f60e16d4f85dd4a08a71dce548.dir
-      size: 483856
+      md5: 8dfa69b48586da6b0ef33a6fbedb7c4a.dir
+      size: 484314
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -160,13 +160,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 2d9353f60e16d4f85dd4a08a71dce548.dir
-      size: 483856
+      md5: 8dfa69b48586da6b0ef33a6fbedb7c4a.dir
+      size: 484314
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 02b2c25e488f75c4a676540c127b8930.dir
-      size: 45890160
+      md5: 2feba8772c240b507eb900934efcb8ca.dir
+      size: 46064555
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -176,8 +176,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 8a52e3a0047c68b9de5c371a1d406f73
-      size: 224
+      md5: bf980dad2dc5b97651546b0b755419ae
+      size: 223
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -197,9 +197,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 666f73f6fdb49484737f1a7edd798727
-      size: 363
+      md5: 05e2cce8e61d5005398659e9f3465cd6
+      size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 71c9fcb9ec304353aba0d7f5c58ca8b2
+      md5: 92446d2f3836c6f790d06e3b268b05f3
       size: 872
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
index a70ecf8..30ec95f 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
@@ -1,7 +1,7 @@
 joblib==1.5.2
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.3
+autogluon.tabular[all]==1.4
 dynaconf==3.2.12
 pyarrow==22.0.0
 pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
index b9aa74c..63580ab 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
@@ -1,7 +1,7 @@
 joblib==1.5.2
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.3
+autogluon.tabular[all]==1.4
 dynaconf==3.2.12
 pyarrow==22.0.0
 PyYAML==6.0.3
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
index 1e59b59..a503ecf 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
@@ -1,10 +1,10 @@
 joblib==1.5.2
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.3
+autogluon.tabular[all]==1.4
 ray==2.44.1
 dynaconf==3.2.12
 alibi==0.5.5
 shap==0.49.1
-pyarrow==22.0.0
+pyarrow
 pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
index 84455e8..6e85ded 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
@@ -1,4 +1,4 @@
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.3
+autogluon.tabular[all]==1.4
 dynaconf==3.2.12

From 91d6455cdf8896cbe066bb29b1d836c0ebadc376 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 17:32:27 +0000
Subject: [PATCH 27/59] update pyarrow version

---
 .../src/pipeline/requirements/predictions/requirements-dev.txt  | 2 +-
 .../src/pipeline/requirements/predictions/requirements.txt      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
index 30ec95f..bf8b14c 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
@@ -3,5 +3,5 @@ boto3==1.40.61
 pandas==2.2.3
 autogluon.tabular[all]==1.4
 dynaconf==3.2.12
-pyarrow==22.0.0
+pyarrow==20.0.0
 pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
index 63580ab..0df33db 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
@@ -3,5 +3,5 @@ boto3==1.40.61
 pandas==2.2.3
 autogluon.tabular[all]==1.4
 dynaconf==3.2.12
-pyarrow==22.0.0
+pyarrow==20.0.0
 PyYAML==6.0.3

From bdc177baa929afd5e4475506be5080f3fd6d289d Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sun, 2 Nov 2025 22:49:36 +0000
Subject: [PATCH 28/59] roll back to autogluon 1.3.0 due to stabiulity issue

---
 .../src/pipeline/configs/build_model.yaml     |  3 +-
 .../ml-pipeline/src/pipeline/core/MLModels.py |  4 ++
 modules/ml-pipeline/src/pipeline/dvc.lock     | 51 ++++++++++---------
 .../predictions/requirements-dev.txt          |  2 +-
 .../requirements/predictions/requirements.txt |  2 +-
 .../training/requirements-dev.txt             |  4 +-
 .../requirements/training/requirements.txt    |  2 +-
 7 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index 38c0910..6e8845c 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -19,5 +19,6 @@ default:
       excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
       infer_limit: 0.0005
       infer_limit_batch_size: 10000
-      "fit_strategy": "parallel"
+      fit_strategy: "parallel"
       ag_args_ensemble: {'num_folds_parallel': 2}
+      num_gpus: auto
diff --git a/modules/ml-pipeline/src/pipeline/core/MLModels.py b/modules/ml-pipeline/src/pipeline/core/MLModels.py
index 437c69f..35f79c4 100644
--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@@ -153,6 +153,7 @@ class AutogluonAutoML:
         "infer_limit_batch_size",
         "ag_args_ensemble",
         "fit_strategy",
+        "num_gpus",
     ]
 
     def load_model(self, path: Union[Path, str]) -> None:
@@ -184,6 +185,8 @@ class AutogluonAutoML:
         """
         Method to train a model
         """
+        # Force Parallel Model fitting
+        os.environ["AG_FORCE_PARALLEL"] = "True"
 
         validate_dict_keys(
             keys_1=list(model_hyperparameters.keys()),
@@ -211,6 +214,7 @@ class AutogluonAutoML:
             infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
             ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
             fit_strategy=model_hyperparameters["fit_strategy"],
+            num_gpus=model_hyperparameters["num_gpus"],
         )
 
     def predict(
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index f05f185..7ed27d7 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -61,8 +61,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 2feba8772c240b507eb900934efcb8ca.dir
-      size: 46064555
+      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
+      size: 46092230
       nfiles: 3
   build_model:
     cmd: python 2_build_model.py
@@ -73,8 +73,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 2feba8772c240b507eb900934efcb8ca.dir
-      size: 46064555
+      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
+      size: 46092230
       nfiles: 3
     params:
       configs/build_model.yaml:
@@ -104,20 +104,21 @@ stages:
               fit_strategy: parallel
               ag_args_ensemble:
                 num_folds_parallel: 2
+              num_gpus: auto
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: 29036f4f42b1fdcab7f9e40a87f38a8c.dir
-      size: 3474783
+      md5: 01328a1cc5a1ff35e701a3c44902afc6.dir
+      size: 3474659
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 77cab231e3d51bbebbae5a7af310c18a.dir
-      size: 791390619
-      nfiles: 34
+      md5: 70f076a248524dfce60412f83969ae63.dir
+      size: 760254863
+      nfiles: 33
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 4f39064fb6b31c7c879299621bcea28d
+      md5: 4726c52b2f27650ab1bbf97b5bf61e54
       size: 224
   generate_predictions:
     cmd: python 3_generate_predictions.py
@@ -128,13 +129,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 77cab231e3d51bbebbae5a7af310c18a.dir
-      size: 791390619
-      nfiles: 34
+      md5: 70f076a248524dfce60412f83969ae63.dir
+      size: 760254863
+      nfiles: 33
     - path: data/prepared_data
       hash: md5
-      md5: 2feba8772c240b507eb900934efcb8ca.dir
-      size: 46064555
+      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
+      size: 46092230
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -148,8 +149,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 8dfa69b48586da6b0ef33a6fbedb7c4a.dir
-      size: 484314
+      md5: 312f9106eb18d34df75124f0536f0603.dir
+      size: 484470
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -160,13 +161,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 8dfa69b48586da6b0ef33a6fbedb7c4a.dir
-      size: 484314
+      md5: 312f9106eb18d34df75124f0536f0603.dir
+      size: 484470
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 2feba8772c240b507eb900934efcb8ca.dir
-      size: 46064555
+      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
+      size: 46092230
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -176,8 +177,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: bf980dad2dc5b97651546b0b755419ae
-      size: 223
+      md5: 661388682aa1ca888b256e4667211379
+      size: 222
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -197,9 +198,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 05e2cce8e61d5005398659e9f3465cd6
+      md5: 88ebca8dccf907692675301ffe06b10d
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 92446d2f3836c6f790d06e3b268b05f3
+      md5: 3ec419e883b812b254b331f055999cc9
       size: 872
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
index bf8b14c..d4eb1fd 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
@@ -1,7 +1,7 @@
 joblib==1.5.2
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.4
+autogluon.tabular[all]==1.3
 dynaconf==3.2.12
 pyarrow==20.0.0
 pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
index 0df33db..138a4ef 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
@@ -1,7 +1,7 @@
 joblib==1.5.2
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.4
+autogluon.tabular[all]==1.3
 dynaconf==3.2.12
 pyarrow==20.0.0
 PyYAML==6.0.3
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
index a503ecf..6fe98f7 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
@@ -1,10 +1,10 @@
 joblib==1.5.2
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.4
+autogluon.tabular[all]==1.3
 ray==2.44.1
 dynaconf==3.2.12
 alibi==0.5.5
 shap==0.49.1
-pyarrow
+pyarrow==20.0.0
 pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
index 6e85ded..84455e8 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
@@ -1,4 +1,4 @@
 boto3==1.40.61
 pandas==2.2.3
-autogluon.tabular[all]==1.4
+autogluon.tabular[all]==1.3
 dynaconf==3.2.12

From 541f2b268938b35b394a059d5ef4cf7206daff91 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 14:03:19 +0000
Subject: [PATCH 29/59] change libomp to conda install instead of brew due to
 segmentation errors, update back to 1.4

---
 modules/ml-pipeline/Makefile                  |   3 +-
 .../src/pipeline/configs/build_model.yaml     |  16 ++-
 .../ml-pipeline/src/pipeline/core/MLModels.py |   2 +
 modules/ml-pipeline/src/pipeline/dvc.lock     | 123 ++++++++++++++----
 .../predictions/requirements-dev.txt          |   4 +-
 .../requirements/predictions/requirements.txt |   4 +-
 .../training/requirements-dev.txt             |   6 +-
 .../requirements/training/requirements.txt    |   4 +-
 8 files changed, 122 insertions(+), 40 deletions(-)

diff --git a/modules/ml-pipeline/Makefile b/modules/ml-pipeline/Makefile
index 09f9f15..3e6be85 100644
--- a/modules/ml-pipeline/Makefile
+++ b/modules/ml-pipeline/Makefile
@@ -20,7 +20,8 @@ dev-conda:
 		uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
 		uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \
 		pre-commit install && \
-		uv pip install ipykernel
+		uv pip install ipykernel && \
+		conda install llvm-openmp -y
 	echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
 	echo "conda activate ${CONDA_ENV}"
 
diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index 6e8845c..50122ee 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -17,8 +17,20 @@ default:
       time_limit: 1800
       presets: medium_quality
       excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
-      infer_limit: 0.0005
+      infer_limit: 0.001
       infer_limit_batch_size: 10000
-      fit_strategy: "parallel"
+      fit_strategy: "sequential"
       ag_args_ensemble: {'num_folds_parallel': 2}
       num_gpus: auto
+      hyperparameters:
+        {
+        'NN_TORCH': [{}],
+        'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0, 'hyperparameter_tune_kwargs': 'auto'}}],
+        # 'GBM': [{}],
+        'CAT': [{}],
+        'XGB': [{}],
+        'FASTAI': [{}],
+        'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
+        'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
+        'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
+        }
diff --git a/modules/ml-pipeline/src/pipeline/core/MLModels.py b/modules/ml-pipeline/src/pipeline/core/MLModels.py
index 35f79c4..dabe154 100644
--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@@ -154,6 +154,7 @@ class AutogluonAutoML:
         "ag_args_ensemble",
         "fit_strategy",
         "num_gpus",
+        "hyperparameters",
     ]
 
     def load_model(self, path: Union[Path, str]) -> None:
@@ -215,6 +216,7 @@ class AutogluonAutoML:
             ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
             fit_strategy=model_hyperparameters["fit_strategy"],
             num_gpus=model_hyperparameters["num_gpus"],
+            hyperparameters=model_hyperparameters["hyperparameters"].to_dict(),
         )
 
     def predict(
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 7ed27d7..cffd1b3 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -61,8 +61,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
-      size: 46092230
+      md5: ba409a8c79863ddc407786b7aa7a053a.dir
+      size: 46113237
       nfiles: 3
   build_model:
     cmd: python 2_build_model.py
@@ -73,8 +73,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
-      size: 46092230
+      md5: ba409a8c79863ddc407786b7aa7a053a.dir
+      size: 46113237
       nfiles: 3
     params:
       configs/build_model.yaml:
@@ -99,27 +99,94 @@ stages:
               - NN_TORCH
               - KNN
               - XT
-              infer_limit: 0.0005
+              infer_limit: 0.001
               infer_limit_batch_size: 10000
-              fit_strategy: parallel
+              fit_strategy: sequential
               ag_args_ensemble:
                 num_folds_parallel: 2
               num_gpus: auto
+              hyperparameters:
+                NN_TORCH:
+                - {}
+                GBM:
+                - extra_trees: true
+                  ag_args:
+                    name_suffix: XT
+                - {}
+                - learning_rate: 0.03
+                  num_leaves: 128
+                  feature_fraction: 0.9
+                  min_data_in_leaf: 3
+                  ag_args:
+                    name_suffix: Large
+                    priority: 0
+                    hyperparameter_tune_kwargs: auto
+                CAT:
+                - {}
+                XGB:
+                - {}
+                FASTAI:
+                - {}
+                RF:
+                - criterion: gini
+                  ag_args:
+                    name_suffix: Gini
+                    problem_types:
+                    - binary
+                    - multiclass
+                - criterion: entropy
+                  ag_args:
+                    name_suffix: Entr
+                    problem_types:
+                    - binary
+                    - multiclass
+                - criterion: squared_error
+                  ag_args:
+                    name_suffix: MSE
+                    problem_types:
+                    - regression
+                    - quantile
+                XT:
+                - criterion: gini
+                  ag_args:
+                    name_suffix: Gini
+                    problem_types:
+                    - binary
+                    - multiclass
+                - criterion: entropy
+                  ag_args:
+                    name_suffix: Entr
+                    problem_types:
+                    - binary
+                    - multiclass
+                - criterion: squared_error
+                  ag_args:
+                    name_suffix: MSE
+                    problem_types:
+                    - regression
+                    - quantile
+                KNN:
+                - weights: uniform
+                  ag_args:
+                    name_suffix: Unif
+                - weights: distance
+                  ag_args:
+                    name_suffix: Dist
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: 01328a1cc5a1ff35e701a3c44902afc6.dir
-      size: 3474659
+      md5: a9361ab31ff8fc08c3e5e3b96cec06d4.dir
+      size: 3474690
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 70f076a248524dfce60412f83969ae63.dir
-      size: 760254863
-      nfiles: 33
+      md5: 19019e558886b1acd6d29442a47243d0.dir
+      size: 761937021
+      nfiles: 34
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 4726c52b2f27650ab1bbf97b5bf61e54
-      size: 224
+      md5: 3af168aedf1f81a22024bb8c815f5d12
+      size: 221
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -129,13 +196,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 70f076a248524dfce60412f83969ae63.dir
-      size: 760254863
-      nfiles: 33
+      md5: 19019e558886b1acd6d29442a47243d0.dir
+      size: 761937021
+      nfiles: 34
     - path: data/prepared_data
       hash: md5
-      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
-      size: 46092230
+      md5: ba409a8c79863ddc407786b7aa7a053a.dir
+      size: 46113237
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -149,8 +216,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 312f9106eb18d34df75124f0536f0603.dir
-      size: 484470
+      md5: a9f32d70a4817df8092e52c5513a445f.dir
+      size: 484694
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -161,13 +228,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 312f9106eb18d34df75124f0536f0603.dir
-      size: 484470
+      md5: a9f32d70a4817df8092e52c5513a445f.dir
+      size: 484694
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
-      size: 46092230
+      md5: ba409a8c79863ddc407786b7aa7a053a.dir
+      size: 46113237
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -177,8 +244,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 661388682aa1ca888b256e4667211379
-      size: 222
+      md5: 736ef69da7edb94577139ae9ede5ac0d
+      size: 224
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -198,9 +265,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 88ebca8dccf907692675301ffe06b10d
+      md5: adcc78833e7a0824ecb10ad78a646ea8
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 3ec419e883b812b254b331f055999cc9
+      md5: 35e704d0499e943c4110f66f1482d2ec
       size: 872
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
index d4eb1fd..ba7aebb 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
@@ -1,7 +1,7 @@
 joblib==1.5.2
 boto3==1.40.61
-pandas==2.2.3
-autogluon.tabular[all]==1.3
+pandas==2.3.3
+autogluon.tabular[all]==1.4.0
 dynaconf==3.2.12
 pyarrow==20.0.0
 pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
index 138a4ef..ccd84ab 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
@@ -1,7 +1,7 @@
 joblib==1.5.2
 boto3==1.40.61
-pandas==2.2.3
-autogluon.tabular[all]==1.3
+pandas==2.3.3
+autogluon.tabular[all]==1.4.0
 dynaconf==3.2.12
 pyarrow==20.0.0
 PyYAML==6.0.3
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
index 6fe98f7..eaef2a7 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
@@ -1,10 +1,10 @@
 joblib==1.5.2
 boto3==1.40.61
-pandas==2.2.3
-autogluon.tabular[all]==1.3
+pandas==2.3.3
+autogluon.tabular[all]==1.4.0
 ray==2.44.1
 dynaconf==3.2.12
-alibi==0.5.5
+# alibi
 shap==0.49.1
 pyarrow==20.0.0
 pre-commit==4.3.0
diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
index 84455e8..1d7704e 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
@@ -1,4 +1,4 @@
 boto3==1.40.61
-pandas==2.2.3
-autogluon.tabular[all]==1.3
+pandas==2.3.3
+autogluon.tabular[all]==1.4.0
 dynaconf==3.2.12

From 6220cd17d3e8f8b961d694c1488a1f9aa9706262 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 14:43:26 +0000
Subject: [PATCH 30/59] faster inference

---
 .../src/pipeline/configs/build_model.yaml     |  8 +--
 modules/ml-pipeline/src/pipeline/dvc.lock     | 57 +++++++++----------
 2 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index 50122ee..a03f430 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -17,15 +17,15 @@ default:
       time_limit: 1800
       presets: medium_quality
       excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
-      infer_limit: 0.001
+      infer_limit: 0.0005
       infer_limit_batch_size: 10000
-      fit_strategy: "sequential"
+      fit_strategy: "parallel"
       ag_args_ensemble: {'num_folds_parallel': 2}
-      num_gpus: auto
+      num_gpus: 0
       hyperparameters:
         {
         'NN_TORCH': [{}],
-        'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0, 'hyperparameter_tune_kwargs': 'auto'}}],
+        'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0,}}],
         # 'GBM': [{}],
         'CAT': [{}],
         'XGB': [{}],
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index cffd1b3..673285c 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -61,8 +61,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: ba409a8c79863ddc407786b7aa7a053a.dir
-      size: 46113237
+      md5: bdf32829fa7d4084293e54f73084a25c.dir
+      size: 46126494
       nfiles: 3
   build_model:
     cmd: python 2_build_model.py
@@ -73,8 +73,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: ba409a8c79863ddc407786b7aa7a053a.dir
-      size: 46113237
+      md5: bdf32829fa7d4084293e54f73084a25c.dir
+      size: 46126494
       nfiles: 3
     params:
       configs/build_model.yaml:
@@ -99,12 +99,12 @@ stages:
               - NN_TORCH
               - KNN
               - XT
-              infer_limit: 0.001
+              infer_limit: 0.0005
               infer_limit_batch_size: 10000
-              fit_strategy: sequential
+              fit_strategy: parallel
               ag_args_ensemble:
                 num_folds_parallel: 2
-              num_gpus: auto
+              num_gpus: 0
               hyperparameters:
                 NN_TORCH:
                 - {}
@@ -120,7 +120,6 @@ stages:
                   ag_args:
                     name_suffix: Large
                     priority: 0
-                    hyperparameter_tune_kwargs: auto
                 CAT:
                 - {}
                 XGB:
@@ -175,18 +174,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: a9361ab31ff8fc08c3e5e3b96cec06d4.dir
-      size: 3474690
+      md5: bb3a39098652aee04227b8157cffe6dc.dir
+      size: 3475209
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 19019e558886b1acd6d29442a47243d0.dir
-      size: 761937021
-      nfiles: 34
+      md5: 057ecca4ca9b6cabc60d81b36f5efee1.dir
+      size: 719633310
+      nfiles: 33
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 3af168aedf1f81a22024bb8c815f5d12
-      size: 221
+      md5: 5b63b6c51867279dea639df1c50b4416
+      size: 225
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -196,13 +195,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 19019e558886b1acd6d29442a47243d0.dir
-      size: 761937021
-      nfiles: 34
+      md5: 057ecca4ca9b6cabc60d81b36f5efee1.dir
+      size: 719633310
+      nfiles: 33
     - path: data/prepared_data
       hash: md5
-      md5: ba409a8c79863ddc407786b7aa7a053a.dir
-      size: 46113237
+      md5: bdf32829fa7d4084293e54f73084a25c.dir
+      size: 46126494
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -216,8 +215,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: a9f32d70a4817df8092e52c5513a445f.dir
-      size: 484694
+      md5: 76dd44be410b11c62f701c645a9003a9.dir
+      size: 484698
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -228,13 +227,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: a9f32d70a4817df8092e52c5513a445f.dir
-      size: 484694
+      md5: 76dd44be410b11c62f701c645a9003a9.dir
+      size: 484698
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: ba409a8c79863ddc407786b7aa7a053a.dir
-      size: 46113237
+      md5: bdf32829fa7d4084293e54f73084a25c.dir
+      size: 46126494
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -244,7 +243,7 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 736ef69da7edb94577139ae9ede5ac0d
+      md5: de4098ee21d5efbf8b769471ae2fa20f
       size: 224
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
@@ -265,9 +264,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: adcc78833e7a0824ecb10ad78a646ea8
+      md5: d4f3c1eeee96720270e2467fc50b0b29
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 35e704d0499e943c4110f66f1482d2ec
+      md5: 56e39ca1f444b0a97ce894cf9ff90d32
       size: 872

From a0a3d222d8219b04873e55259966cb9d4af958e2 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 17:24:49 +0000
Subject: [PATCH 31/59] new model

---
 .github/workflows/MLPipelinePullRequest.yml   |  2 +-
 .../src/pipeline/configs/build_model.yaml     |  4 +-
 modules/ml-pipeline/src/pipeline/dvc.lock     | 56 +++++++++----------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index f91fd28..bd0e857 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -85,7 +85,7 @@ jobs:
 
     - name: Test Lambda endpoint
       run: |
-        sleep 5
+        sleep 2
         curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
           -H "Content-Type: application/json" \
           -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index a03f430..9248902 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -14,10 +14,10 @@ default:
       output_filepath: ./data/model/allmodels/
       problem_type: regression
       eval_metric: mean_squared_error #mean_absolute_error
-      time_limit: 1800
+      time_limit: 3600
       presets: medium_quality
       excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
-      infer_limit: 0.0005
+      infer_limit: 1
       infer_limit_batch_size: 10000
       fit_strategy: "parallel"
       ag_args_ensemble: {'num_folds_parallel': 2}
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 673285c..8f177b9 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -61,8 +61,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: bdf32829fa7d4084293e54f73084a25c.dir
-      size: 46126494
+      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
+      size: 46095230
       nfiles: 3
   build_model:
     cmd: python 2_build_model.py
@@ -73,8 +73,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: bdf32829fa7d4084293e54f73084a25c.dir
-      size: 46126494
+      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
+      size: 46095230
       nfiles: 3
     params:
       configs/build_model.yaml:
@@ -91,7 +91,7 @@ stages:
               output_filepath: ./data/model/allmodels/
               problem_type: regression
               eval_metric: mean_squared_error
-              time_limit: 1800
+              time_limit: 3600
               presets: medium_quality
               excluded_model_types:
               - RF
@@ -99,7 +99,7 @@ stages:
               - NN_TORCH
               - KNN
               - XT
-              infer_limit: 0.0005
+              infer_limit: 1
               infer_limit_batch_size: 10000
               fit_strategy: parallel
               ag_args_ensemble:
@@ -174,18 +174,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: bb3a39098652aee04227b8157cffe6dc.dir
-      size: 3475209
+      md5: f29cfa6a2dadf4fbe81813b3d517fd10.dir
+      size: 3474971
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 057ecca4ca9b6cabc60d81b36f5efee1.dir
-      size: 719633310
-      nfiles: 33
+      md5: 1156f526fe9d11134e49f805c41c3781.dir
+      size: 763384978
+      nfiles: 35
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 5b63b6c51867279dea639df1c50b4416
-      size: 225
+      md5: 24b2f7c34e5e08b66f39289afac5d795
+      size: 226
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -195,13 +195,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 057ecca4ca9b6cabc60d81b36f5efee1.dir
-      size: 719633310
-      nfiles: 33
+      md5: 1156f526fe9d11134e49f805c41c3781.dir
+      size: 763384978
+      nfiles: 35
     - path: data/prepared_data
       hash: md5
-      md5: bdf32829fa7d4084293e54f73084a25c.dir
-      size: 46126494
+      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
+      size: 46095230
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -215,8 +215,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: 76dd44be410b11c62f701c645a9003a9.dir
-      size: 484698
+      md5: e9b1d9b94d1e44c999c17b7a2d096db9.dir
+      size: 484818
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -227,13 +227,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: 76dd44be410b11c62f701c645a9003a9.dir
-      size: 484698
+      md5: e9b1d9b94d1e44c999c17b7a2d096db9.dir
+      size: 484818
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: bdf32829fa7d4084293e54f73084a25c.dir
-      size: 46126494
+      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
+      size: 46095230
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -243,8 +243,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: de4098ee21d5efbf8b769471ae2fa20f
-      size: 224
+      md5: 88a4e49229cc3c329faf5bf0fcae3318
+      size: 226
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -264,9 +264,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: d4f3c1eeee96720270e2467fc50b0b29
+      md5: 3326cc2e59ac1671d99d3e1f27131f54
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 56e39ca1f444b0a97ce894cf9ff90d32
+      md5: 0a434e055463ec9ade5de2de9bde7154
       size: 872

From d331ee1649c1331e27e99c8e0e6480b330b44f2f Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 17:38:38 +0000
Subject: [PATCH 32/59] try multiple invocations

---
 .github/workflows/MLPipelinePullRequest.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index bd0e857..0d0f661 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -94,6 +94,17 @@ jobs:
       run: |
         docker logs $(docker ps -al -q)
 
+    - name: Test Lambda endpoint again
+      run: |
+        sleep 2
+        curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
+          -H "Content-Type: application/json" \
+          -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
+
+    - name: Get Lambda logs
+      run: |
+        docker logs $(docker ps -al -q)
+
     - name: Stop Lambda container
       run: |
         docker stop lambda_test || echo "Container already stopped"

From e0fc65ec8a90fbf4a9578aa075e2119d456fd33c Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 17:47:20 +0000
Subject: [PATCH 33/59] add conditional imports

---
 modules/ml-pipeline/src/pipeline/core/MLModels.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/core/MLModels.py b/modules/ml-pipeline/src/pipeline/core/MLModels.py
index dabe154..2444314 100644
--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@@ -11,9 +11,6 @@ import joblib
 import pandas as pd
 from pathlib import Path
 from typing import Union, List
-from sklearn import linear_model
-from sklearn.svm import SVR
-from autogluon.tabular import TabularDataset, TabularPredictor
 from core.interface.InterfaceModels import MLModel
 from core.Logger import logger
 
@@ -69,6 +66,8 @@ class SKLearnLinearRegression:
         """
         Method to train a model
         """
+        from sklearn import linear_model
+
         self.model = linear_model.LinearRegression()
 
         x_train = data.iloc[:, data.columns != target]
@@ -117,6 +116,7 @@ class SKLearnSVMRegression:
         """
         Method to train a model
         """
+        from sklearn.svm import SVR
 
         validate_dict_keys(
             list(model_hyperparameters.keys()),
@@ -161,6 +161,8 @@ class AutogluonAutoML:
         """
         Method to load a model
         """
+        from autogluon.tabular import TabularPredictor
+
         filepath = str(path)
         self.model = TabularPredictor.load(path=filepath)
 
@@ -186,6 +188,8 @@ class AutogluonAutoML:
         """
         Method to train a model
         """
+        from autogluon.tabular import TabularDataset, TabularPredictor
+
         # Force Parallel Model fitting
         os.environ["AG_FORCE_PARALLEL"] = "True"
 

From a20d0e67628cb890ba07c3a0b1eff2e72f85259a Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 18:05:33 +0000
Subject: [PATCH 34/59] add dummy invoke

---
 .github/workflows/MLPipelinePullRequest.yml |  2 +-
 deployment/handlers/prediction_app.py       | 31 +++++++++++++++------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index 0d0f661..cf6a231 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -88,7 +88,7 @@ jobs:
         sleep 2
         curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
           -H "Content-Type: application/json" \
-          -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
+          -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true, \\\"warm\\\": true}\"}"
 
     - name: Get Lambda logs
       run: |
diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index 25fa120..7deae3a 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -66,14 +66,6 @@ def handler(event, context):
         created_at = body["created_at"]
 
         # TODO: Implement the loading of the model and prediction
-
-        if "testing" in body:
-            storage_filepath = body["file_location"].replace(
-                ".parquet", "_output.parquet"
-            )
-        else:
-            storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
-
         logger.info(f"--- Initiate MLModel ---")
 
         build_model_params = settings.build_model
@@ -83,6 +75,27 @@ def handler(event, context):
 
         model = model_factory(build_model_params["model_type"])
 
+        model_filepath = build_model_params["model_save_filepath"]
+
+        if "testing" in body:
+            storage_filepath = body["file_location"].replace(
+                ".parquet", "_output.parquet"
+            )
+        elif "warm" in body:
+            logger.info("Warm up invocation - skipping prediction")
+
+            import pandas as pd
+
+            model.load_model(model_filepath)
+            return {
+                "statusCode": 200,
+                "body": json.dumps(
+                    {"message": f"{model.predict(data=pd.DataFrame({'a': [1]}))}"}
+                ),
+            }
+        else:
+            storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
+
         logger.info(f"--- Initiate Input DataClient ---")
         input_dataclient = dataclient_factory(
             dataclient_type="aws-s3",
@@ -100,7 +113,7 @@ def handler(event, context):
             output_dataclient=output_dataclient,
             model=model,
             target=feature_process_params["feature_processor_config"]["target"],
-            model_filepath=build_model_params["model_save_filepath"],
+            model_filepath=model_filepath,
             test_data_filepath=body["file_location"],
             predictions_output_filepath=storage_filepath,
             predictions_column_name=generate_predictions_params[

From 2c8f0142588a0f1158a5d283c436b7b4c095cf17 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 18:11:49 +0000
Subject: [PATCH 35/59] remove testing from first one

---
 .github/workflows/MLPipelinePullRequest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml
index cf6a231..69d1a07 100644
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@@ -88,7 +88,7 @@ jobs:
         sleep 2
         curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
           -H "Content-Type: application/json" \
-          -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true, \\\"warm\\\": true}\"}"
+          -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"warm\\\": true}\"}"
 
     - name: Get Lambda logs
       run: |

From d7370248a220820cbcba154626f6e1842cd5e3a5 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 22:40:35 +0000
Subject: [PATCH 36/59] use model columns as data

---
 deployment/handlers/prediction_app.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index 7deae3a..7555934 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -90,7 +90,9 @@ def handler(event, context):
             return {
                 "statusCode": 200,
                 "body": json.dumps(
-                    {"message": f"{model.predict(data=pd.DataFrame({'a': [1]}))}"}
+                    {
+                        "message": f"{model.predict(data=pd.DataFrame(columns=model.model.original_features))}"
+                    }
                 ),
             }
         else:

From b333b80d5cf90dddaeed1f09901a646268d42d52 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 22:58:28 +0000
Subject: [PATCH 37/59] add a row of fake data

---
 deployment/handlers/prediction_app.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index 7555934..9d98882 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -87,11 +87,19 @@ def handler(event, context):
             import pandas as pd
 
             model.load_model(model_filepath)
+
+            warmup_df = pd.DataFrame(columns=model.model.original_features)
+            warmup_df = pd.concat(
+                [warmup_df.T, pd.DataFrame([0] * len(warmup_df.T))], axis=1
+            ).T
+            warmup_df.fillna(0, inplace=True)
+
+            model.predict(data=warmup_df)
             return {
                 "statusCode": 200,
                 "body": json.dumps(
                     {
-                        "message": f"{model.predict(data=pd.DataFrame(columns=model.model.original_features))}"
+                        "message": "Successfully processed warm up invocation",
                     }
                 ),
             }

From 87b3c2798629f07d622d9ffa707a6feac3eae369 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 23:11:26 +0000
Subject: [PATCH 38/59] clean up prediction app and add logging

---
 deployment/handlers/prediction_app.py | 46 +++++++++++++++++----------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index 9d98882..5b9a807 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -47,6 +47,23 @@ def upload_dataframe_to_s3(df, bucket, s3_file_name):
         return False
 
 
+def warming_up_invocation(model_filepath: str):
+    """
+    Function to handle warm up invocations
+    """
+    import pandas as pd
+
+    model = model_factory(settings.build_model["model_type"])
+    model_filepath = settings.build_model["model_save_filepath"]
+    model.load_model(model_filepath)
+
+    warmup_df = pd.DataFrame(columns=model.model.original_features)
+    warmup_df = pd.concat([warmup_df.T, pd.DataFrame([0] * len(warmup_df.T))], axis=1).T
+    warmup_df.fillna(0, inplace=True)
+
+    model.predict(data=warmup_df)
+
+
 def handler(event, context):
     """
     Take in event and trigger the prediction pipeline
@@ -77,32 +94,27 @@ def handler(event, context):
 
         model_filepath = build_model_params["model_save_filepath"]
 
-        if "testing" in body:
-            storage_filepath = body["file_location"].replace(
-                ".parquet", "_output.parquet"
-            )
-        elif "warm" in body:
-            logger.info("Warm up invocation - skipping prediction")
+        if "warm" in body:
+            logger.info("Warm up invocation - synthetic prediction")
 
-            import pandas as pd
+            warming_up_invocation(model_filepath=model_filepath)
 
-            model.load_model(model_filepath)
-
-            warmup_df = pd.DataFrame(columns=model.model.original_features)
-            warmup_df = pd.concat(
-                [warmup_df.T, pd.DataFrame([0] * len(warmup_df.T))], axis=1
-            ).T
-            warmup_df.fillna(0, inplace=True)
-
-            model.predict(data=warmup_df)
             return {
                 "statusCode": 200,
                 "body": json.dumps(
                     {
-                        "message": "Successfully processed warm up invocation",
+                        "message": "Successfully warmed up invocation",
                     }
                 ),
             }
+
+        if "testing" in body:
+            logger.info(
+                "Testing invocation for CI/CD - save file to same location in S3"
+            )
+            storage_filepath = body["file_location"].replace(
+                ".parquet", "_output.parquet"
+            )
         else:
             storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
 

From ed360325190a060e0f11a259da0641cf7d39a65d Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Mon, 3 Nov 2025 23:18:29 +0000
Subject: [PATCH 39/59] add info to readme

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index 22a6002..078358a 100644
--- a/README.md
+++ b/README.md
@@ -83,3 +83,13 @@ curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d
 ```
 
 This will send a POST request to the running Lambda function and pass in the required data as JSON.
+
+For the testing of warm or testing of the lambda, use:
+
+```json
+curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": \"true\"}"}'
+```
+or
+```json
+curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"warm\": \"true\"}"}'
+```

From 630c3586ffc62791d6ef3ad310f6cbe4b04b1f5e Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 13:16:17 +0000
Subject: [PATCH 40/59] update gto

---
 .../src/pipeline/requirements/version_control/requirements.txt  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
index 173550d..fa93d82 100644
--- a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
@@ -1,4 +1,4 @@
 dvc==3.51.0
 dvc-s3==3.2.0
-gto==1.7.1
+gto==1.9.0
 pyOpenSSL==23.3.0

From 9fa44d8aca23d7e14ded11587e8db42de6356d88 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 13:24:56 +0000
Subject: [PATCH 41/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 1925c6e..3de8173 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.16.0",
+        "version": "v0.17.0",
         "stage": {
             "dev": "v0.16.0"
         },

From da1d815919ad630e19c175963b5aae0f81e689ec Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 13:25:33 +0000
Subject: [PATCH 42/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 3de8173..220f09a 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.17.0",
         "stage": {
-            "dev": "v0.16.0"
+            "dev": "v0.17.0"
         },
         "registered": true,
         "active": true

From 454e86db9c164b341a47509d30e8f548d52d01f6 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 14:34:58 +0000
Subject: [PATCH 43/59] change the MPLCONFIGDIR to /tmp/matplotlib

---
 deployment/Dockerfile.prediction.lambda | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deployment/Dockerfile.prediction.lambda b/deployment/Dockerfile.prediction.lambda
index ca21a8e..6d226dc 100644
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@@ -3,7 +3,7 @@ FROM public.ecr.aws/lambda/python:3.12
 # Set the working directory
 WORKDIR ${LAMBDA_TASK_ROOT}
 ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
-ENV MPLCONFIGDIR="${LAMBDA_TASK_ROOT}/tmp/matplotlib"
+ENV MPLCONFIGDIR="/tmp/matplotlib"
 
 # Environment variables
 ARG RUNTIME_ENVIRONMENT

From 284cfca3a983d4d38b27f4c3b592138d8e817807 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 14:45:21 +0000
Subject: [PATCH 44/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 220f09a..16823ca 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.17.0",
+        "version": "v0.17.1",
         "stage": {
             "dev": "v0.17.0"
         },

From 3b9699038a137f133e7e813f3347459fc62ae869 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 14:45:59 +0000
Subject: [PATCH 45/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 16823ca..15f2019 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.17.1",
         "stage": {
-            "dev": "v0.17.0"
+            "dev": "v0.17.1"
         },
         "registered": true,
         "active": true

From 175bf87a959fdc04efdfdab446dbacee60795a62 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 16:16:30 +0000
Subject: [PATCH 46/59] trigger only one model to warm up, speed up dataframe
 generation

---
 deployment/handlers/prediction_app.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index 5b9a807..e8b02ac 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -47,21 +47,28 @@ def upload_dataframe_to_s3(df, bucket, s3_file_name):
         return False
 
 
-def warming_up_invocation(model_filepath: str):
+def warming_up_invocation(
+    model,
+    model_filepath: str,
+):
     """
     Function to handle warm up invocations
     """
     import pandas as pd
+    import numpy as np
 
-    model = model_factory(settings.build_model["model_type"])
-    model_filepath = settings.build_model["model_save_filepath"]
     model.load_model(model_filepath)
 
-    warmup_df = pd.DataFrame(columns=model.model.original_features)
-    warmup_df = pd.concat([warmup_df.T, pd.DataFrame([0] * len(warmup_df.T))], axis=1).T
-    warmup_df.fillna(0, inplace=True)
+    warmup_df = pd.DataFrame(
+        np.zeros((1, len(model.model.original_features))),
+        columns=model.model.original_features,
+    )
 
-    model.predict(data=warmup_df)
+    model_names = model.model.model_names()
+    if "NeuralNetFastAI" in model_names:
+        model.model.predict(warmup_df, model="NeuralNetFastAI")
+    else:
+        model.predict(data=warmup_df)
 
 
 def handler(event, context):
@@ -97,7 +104,7 @@ def handler(event, context):
         if "warm" in body:
             logger.info("Warm up invocation - synthetic prediction")
 
-            warming_up_invocation(model_filepath=model_filepath)
+            warming_up_invocation(model=model, model_filepath=model_filepath)
 
             return {
                 "statusCode": 200,

From c1dda84206d9939ba863ccd14d7eb2cc80d0215e Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 16:24:08 +0000
Subject: [PATCH 47/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 15f2019..ae8508d 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.17.1",
+        "version": "v0.17.2",
         "stage": {
             "dev": "v0.17.1"
         },

From 3231c0724a8a0291169b275fc04b156387fb033a Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 16:24:47 +0000
Subject: [PATCH 48/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index ae8508d..b173c5c 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.17.2",
         "stage": {
-            "dev": "v0.17.1"
+            "dev": "v0.17.2"
         },
         "registered": true,
         "active": true

From 9cd9208e3ba925caab2a9fe22e642e9adfb15c24 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 16:50:23 +0000
Subject: [PATCH 49/59] try without fast ai

---
 .../src/pipeline/configs/build_model.yaml     |  2 +-
 modules/ml-pipeline/src/pipeline/dvc.lock     | 53 ++++++++++---------
 2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
index 9248902..f93fb3b 100644
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@@ -16,7 +16,7 @@ default:
       eval_metric: mean_squared_error #mean_absolute_error
       time_limit: 3600
       presets: medium_quality
-      excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
+      excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT', 'FASTAI']
       infer_limit: 1
       infer_limit_batch_size: 10000
       fit_strategy: "parallel"
diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock
index 8f177b9..eff64a2 100644
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@@ -61,8 +61,8 @@ stages:
     outs:
     - path: data/prepared_data/
       hash: md5
-      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
-      size: 46095230
+      md5: d798b73fafe6d59c96c0216baeaf085a.dir
+      size: 46090520
       nfiles: 3
   build_model:
     cmd: python 2_build_model.py
@@ -73,8 +73,8 @@ stages:
       size: 4820
     - path: data/prepared_data
       hash: md5
-      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
-      size: 46095230
+      md5: d798b73fafe6d59c96c0216baeaf085a.dir
+      size: 46090520
       nfiles: 3
     params:
       configs/build_model.yaml:
@@ -99,6 +99,7 @@ stages:
               - NN_TORCH
               - KNN
               - XT
+              - FASTAI
               infer_limit: 1
               infer_limit_batch_size: 10000
               fit_strategy: parallel
@@ -174,18 +175,18 @@ stages:
     outs:
     - path: data/fit_predictions/
       hash: md5
-      md5: f29cfa6a2dadf4fbe81813b3d517fd10.dir
-      size: 3474971
+      md5: 2d3627b9752e0eb6988d655cc76cb871.dir
+      size: 3474407
       nfiles: 1
     - path: data/model/
       hash: md5
-      md5: 1156f526fe9d11134e49f805c41c3781.dir
-      size: 763384978
-      nfiles: 35
+      md5: e4279fd1aff989b128e7477ad7e02d81.dir
+      size: 790249675
+      nfiles: 31
     - path: metrics/fit_metrics.json
       hash: md5
-      md5: 24b2f7c34e5e08b66f39289afac5d795
-      size: 226
+      md5: e45c166e089965e9c17d9b4a6656d6d6
+      size: 225
   generate_predictions:
     cmd: python 3_generate_predictions.py
     deps:
@@ -195,13 +196,13 @@ stages:
       size: 2464
     - path: data/model
       hash: md5
-      md5: 1156f526fe9d11134e49f805c41c3781.dir
-      size: 763384978
-      nfiles: 35
+      md5: e4279fd1aff989b128e7477ad7e02d81.dir
+      size: 790249675
+      nfiles: 31
     - path: data/prepared_data
       hash: md5
-      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
-      size: 46095230
+      md5: d798b73fafe6d59c96c0216baeaf085a.dir
+      size: 46090520
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -215,8 +216,8 @@ stages:
     outs:
     - path: data/predictions/
       hash: md5
-      md5: e9b1d9b94d1e44c999c17b7a2d096db9.dir
-      size: 484818
+      md5: fdebcc5ba775c2b416e33e8775dd450a.dir
+      size: 484710
       nfiles: 1
   generate_metrics:
     cmd: python 4_generate_metrics.py
@@ -227,13 +228,13 @@ stages:
       size: 3484
     - path: data/predictions
       hash: md5
-      md5: e9b1d9b94d1e44c999c17b7a2d096db9.dir
-      size: 484818
+      md5: fdebcc5ba775c2b416e33e8775dd450a.dir
+      size: 484710
       nfiles: 1
     - path: data/prepared_data
       hash: md5
-      md5: 54204b6a31ba369cfbd26b9b25bfa355.dir
-      size: 46095230
+      md5: d798b73fafe6d59c96c0216baeaf085a.dir
+      size: 46090520
       nfiles: 3
     params:
       configs/settings.yaml:
@@ -243,8 +244,8 @@ stages:
     outs:
     - path: metrics/metrics.json
       hash: md5
-      md5: 88a4e49229cc3c329faf5bf0fcae3318
-      size: 226
+      md5: a5f8e795d87356eaff446ae7006a47fe
+      size: 224
   generate_scenerio_metrics:
     cmd: python 5_generate_scenarios.py
     deps:
@@ -264,9 +265,9 @@ stages:
     outs:
     - path: metrics/scenario_metrics.md
       hash: md5
-      md5: 3326cc2e59ac1671d99d3e1f27131f54
+      md5: 86c9a8f2520cac8ed0796d62c03de278
       size: 356
     - path: metrics/scenario_table.md
       hash: md5
-      md5: 0a434e055463ec9ade5de2de9bde7154
+      md5: 686f3f5d966c82c0f68baaaa74617aa1
       size: 872

From 3e38b572ce65c5245e5bd3134cd619bd4867ae8f Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 16:52:24 +0000
Subject: [PATCH 50/59] remove the specific fast ai prediction

---
 deployment/handlers/prediction_app.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/deployment/handlers/prediction_app.py b/deployment/handlers/prediction_app.py
index e8b02ac..f1036d3 100644
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@@ -64,11 +64,11 @@ def warming_up_invocation(
         columns=model.model.original_features,
     )
 
-    model_names = model.model.model_names()
-    if "NeuralNetFastAI" in model_names:
-        model.model.predict(warmup_df, model="NeuralNetFastAI")
-    else:
-        model.predict(data=warmup_df)
+    # model_names = model.model.model_names()
+    # if "NeuralNetFastAI" in model_names:
+    #     model.model.predict(warmup_df, model="NeuralNetFastAI")
+    # else:
+    model.predict(data=warmup_df)
 
 
 def handler(event, context):

From 62deab922b53e935e538d563bbea9c7ed51f41ca Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 17:34:51 +0000
Subject: [PATCH 51/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index b173c5c..118973d 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.17.2",
+        "version": "v0.17.3",
         "stage": {
             "dev": "v0.17.2"
         },

From 2df774c1b10f57dfb4c32e2500f86d951b07c013 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 17:35:26 +0000
Subject: [PATCH 52/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 118973d..7300ca5 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.17.3",
         "stage": {
-            "dev": "v0.17.2"
+            "dev": "v0.17.3"
         },
         "registered": true,
         "active": true

From 1e2968763cdfb763657c1f3d3983f5168f2708e9 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 18:09:40 +0000
Subject: [PATCH 53/59] update the serverless.yml file

---
 deployment/serverless.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deployment/serverless.yml b/deployment/serverless.yml
index b23158d..b9f4ac4 100644
--- a/deployment/serverless.yml
+++ b/deployment/serverless.yml
@@ -4,6 +4,7 @@ provider:
   name: aws
   region: eu-west-2
   architecture: x86_64
+  memorySize: 4096
   environment:
     RUNTIME_ENVIRONMENT: ${env:RUNTIME_ENVIRONMENT}
     PREDICTIONS_BUCKET: ${env:PREDICTIONS_BUCKET}

From e20c6d1baefb70f7015def5cbf35cbb10da17a40 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 18:13:40 +0000
Subject: [PATCH 54/59]  fix location

---
 deployment/serverless.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deployment/serverless.yml b/deployment/serverless.yml
index b9f4ac4..009f7db 100644
--- a/deployment/serverless.yml
+++ b/deployment/serverless.yml
@@ -4,7 +4,6 @@ provider:
   name: aws
   region: eu-west-2
   architecture: x86_64
-  memorySize: 4096
   environment:
     RUNTIME_ENVIRONMENT: ${env:RUNTIME_ENVIRONMENT}
     PREDICTIONS_BUCKET: ${env:PREDICTIONS_BUCKET}
@@ -52,3 +51,4 @@ functions:
           path: /predict
           method: POST
     timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed
+    memorySize: 4096

From 13fbdebc8b5c4a97eaa2d40837357bab31ec5444 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 18:16:21 +0000
Subject: [PATCH 55/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 7300ca5..1d55771 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.17.3",
+        "version": "v0.17.4",
         "stage": {
             "dev": "v0.17.3"
         },

From ec29ef093a397097629d07d8a5a6af492135c0f5 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 18:16:55 +0000
Subject: [PATCH 56/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 1d55771..675a396 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.17.4",
         "stage": {
-            "dev": "v0.17.3"
+            "dev": "v0.17.4"
         },
         "registered": true,
         "active": true

From 1ca096bd4421d5c903f015adcc711c8f91b2ead6 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 4 Nov 2025 18:35:01 +0000
Subject: [PATCH 57/59] use serverless max memory of 3008

---
 deployment/serverless.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deployment/serverless.yml b/deployment/serverless.yml
index 009f7db..3596c2b 100644
--- a/deployment/serverless.yml
+++ b/deployment/serverless.yml
@@ -51,4 +51,4 @@ functions:
           path: /predict
           method: POST
     timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed
-    memorySize: 4096
+    memorySize: 3008

From 2e1eec8f932687ae0a8ae7f259d0f5a9adae2111 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 18:37:12 +0000
Subject: [PATCH 58/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index 675a396..ee8d751 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -8,7 +8,7 @@
         "active": true
     },
     "sap": {
-        "version": "v0.17.4",
+        "version": "v0.17.5",
         "stage": {
             "dev": "v0.17.4"
         },

From 171089410c4a9e26c40232d4da0e76664cfeb455 Mon Sep 17 00:00:00 2001
From: Github-Bot <Github-Bot@no-reply.com>
Date: Tue, 4 Nov 2025 18:37:49 +0000
Subject: [PATCH 59/59] Update Registry

---
 MODEL_REGISTRY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md
index ee8d751..dc3aa61 100644
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@@ -10,7 +10,7 @@
     "sap": {
         "version": "v0.17.5",
         "stage": {
-            "dev": "v0.17.4"
+            "dev": "v0.17.5"
         },
         "registered": true,
         "active": true