2026-06-08 11:17:25 +00:00
26 changed files with 379 additions and 992 deletions
--- a/.github/workflows/Deploy.yml
+++ b/.github/workflows/Deploy.yml
@ -2,17 +2,7 @@ name: Sap Change Model Deploy
 on:
  push:
-    branches:
+    branches: [ sap-dev, sap-prod, heat-dev, heat-prod, carbon-dev, carbon-prod]
      [
        sap-dev,
        sap-prod,
        heat-dev,
        heat-prod,
        carbon-dev,
        carbon-prod,
        heat_baseline-dev,
        heat_baseline-prod,
      ]
 jobs:
  deploy:
@ -41,8 +31,8 @@ jobs:
      - name: set secret prefix which is used across multiple steps
        id: secret_prefix
        run: |
-          # Convert branch name to uppercase and replace hyphens with underscores
+            # Convert branch name to uppercase and replace hyphens with underscores
-          echo "::set-output name=secret_prefix::$(echo "${{ github.ref_name }}" | tr 'a-z-' 'A-Z_')"
+            echo "::set-output name=secret_prefix::$(echo "${{ github.ref_name }}" | tr 'a-z-' 'A-Z_')"
      - name: Set domain name
        id: set_domain
@ -126,7 +116,7 @@ jobs:
        env:
          RUNTIME_ENVIRONMENT: ${{ steps.set_runtime_environment.outputs.runtime_environment }}
          PREDICTIONS_BUCKET: ${{ steps.set_s3_buckets.outputs.predictions_bucket }}
-          DATA_BUCKET: ${{ steps.set_s3_buckets.outputs.data_bucket }}
+          DATA_BUCKET:  ${{ steps.set_s3_buckets.outputs.data_bucket }}
          DOMAIN_NAME: ${{ steps.set_domain.outputs.domain }}
          ECR_URI: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}
          GITHUB_SHA: ${{ github.sha }}
--- a/.github/workflows/MLPipelinePostMerge.yml
+++ b/.github/workflows/MLPipelinePostMerge.yml
@ -13,7 +13,6 @@ on:
      - "sap-dev"
      - "heat-dev"
      - "carbon-dev"
      - "heat_baseline-dev"
 permissions: write-all
@ -22,171 +21,166 @@ jobs:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'major')) }}
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+    - uses: actions/checkout@v4
-        with:
+      with:
-          fetch-depth: 0
+        fetch-depth: 0
-      - name: Install packages to register model
+    - name: Install packages to register model
-        run: |
+      run: |
-          pip install --upgrade pip
+        pip install --upgrade pip
-          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-      - name: Register Model
+    - name: Register Model
-        run: |
+      run: |
-          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-          git config user.name "Github-Bot"
+        git config user.name "Github-Bot"
-          git config user.email "Github-Bot@no-reply.com"
+        git config user.email "Github-Bot@no-reply.com"
-          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false
+        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false
-          if [ -z "${latest_version}" ]; then
+        if [ -z "${latest_version}" ]; then
-            increment_version="1.0.0"
+          increment_version="1.0.0"
-          else
+        else
-            increment_version=$(echo ${latest_version} | awk 'BEGIN {
+          increment_version=$(echo ${latest_version} | awk 'BEGIN {
-                FS="\\."   # Set the field separator to a period
+              FS="\\."   # Set the field separator to a period
-                OFS="."    # Set the output field separator to a period
+              OFS="."    # Set the output field separator to a period
-            }
+          }
-            {
+          {
-                major = $1 + 1   # Increment the major version
+              major = $1 + 1   # Increment the major version
-                print major, "0", "0"   # Print the new version
+              print major, "0", "0"   # Print the new version
-            }')
+          }')
-          fi
+        fi
-          new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
+        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
-          git tag -a ${new_tag} -m "Registering new Major Version"
+        git tag -a ${new_tag} -m "Registering new Major Version"
-          git push origin ${new_tag}
+        git push origin ${new_tag}
-          gto show --json > MODEL_REGISTRY.md
+        gto show --json > MODEL_REGISTRY.md
-          git add .
+        git add .
-          git commit -m "Update Registry"
+        git commit -m "Update Registry"
-          git push
+        git push
  Register-Minor-Model-Dev:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }}
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+    - uses: actions/checkout@v4
-        with:
+      with:
-          fetch-depth: 0
+        fetch-depth: 0
-      - name: Install packages to register model
+    - name: Install packages to register model
-        run: |
+      run: |
-          pip install --upgrade pip
+        pip install --upgrade pip
-          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-      - name: Register Model
+    - name: Register Model
-        run: |
+      run: |
-          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-          git config user.name "Github-Bot"
+        git config user.name "Github-Bot"
-          git config user.email "Github-Bot@no-reply.com"
+        git config user.email "Github-Bot@no-reply.com"
-          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
+        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
-          if [ -z "${latest_version}" ]; then
+        if [ -z "${latest_version}" ]; then
-            increment_version="0.1.0"
+          increment_version="0.1.0"
-          else
+        else
-            increment_version=$(echo ${latest_version} | awk 'BEGIN {
+          increment_version=$(echo ${latest_version} | awk 'BEGIN {
-                FS="\\."   # Set the field separator to a period
+              FS="\\."   # Set the field separator to a period
-                OFS="."    # Set the output field separator to a period
+              OFS="."    # Set the output field separator to a period
-            }
+          }
-            {
+          {
-                minor = $2 + 1   # Increment the minor version
+              minor = $2 + 1   # Increment the minor version
-                print $1, minor, "0"   # Print the new version
+              print $1, minor, "0"   # Print the new version
-            }')
+          }')
-          fi
+        fi
-          new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
+        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
-          git tag -a ${new_tag} -m "Registering new Minor Version"
+        git tag -a ${new_tag} -m "Registering new Minor Version"
-          git push origin ${new_tag}
+        git push origin ${new_tag}
-          gto show --json > MODEL_REGISTRY.md
+        gto show --json > MODEL_REGISTRY.md
-          git add .
+        git add .
-          git commit -m "Update Registry"
+        git commit -m "Update Registry"
-          git push
+        git push
  Register-Patch-Model-Dev:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }}
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+    - uses: actions/checkout@v4
-        with:
+      with:
-          fetch-depth: 0
+        fetch-depth: 0
-      - name: Install packages to register model
+    - name: Install packages to register model
-        run: |
+      run: |
-          pip install --upgrade pip
+        pip install --upgrade pip
-          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-      - name: Register Model
+    - name: Register Model
-        run: |
+      run: |
-          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-          git config user.name "Github-Bot"
+        git config user.name "Github-Bot"
-          git config user.email "Github-Bot@no-reply.com"
+        git config user.email "Github-Bot@no-reply.com"
-          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
+        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
-          if [ -z "${latest_version}" ]; then
+        if [ -z "${latest_version}" ]; then
-            increment_version="0.0.1"
+          increment_version="0.0.1"
-          else
+        else
-            increment_version=$(echo ${latest_version} | awk 'BEGIN {
+          increment_version=$(echo ${latest_version} | awk 'BEGIN {
-                FS="\\."   # Set the field separator to a period
+              FS="\\."   # Set the field separator to a period
-                OFS="."    # Set the output field separator to a period
+              OFS="."    # Set the output field separator to a period
-            }
+          }
-            {
+          {
-                patch = $3 + 1   # Increment the patch version
+              patch = $3 + 1   # Increment the patch version
-                print $1, $2, patch   # Print the new version
+              print $1, $2, patch   # Print the new version
-            }')
+          }')
-          fi
+        fi
-          new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
+        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
-          git tag -a ${new_tag} -m "Registering new Patch Version"
+        git tag -a ${new_tag} -m "Registering new Patch Version"
-          git push origin ${new_tag}
+        git push origin ${new_tag}
-          gto show --json > MODEL_REGISTRY.md
+        gto show --json > MODEL_REGISTRY.md
-          git add .
+        git add .
-          git commit -m "Update Registry"
+        git commit -m "Update Registry"
-          git push
+        git push
  Promote-Artefacts-To-Dev:
    if: github.event.pull_request.merged == true
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+    - uses: actions/checkout@v3
-      - name: Install packages to retrieve artifacts
+    - name: Install packages to retrieve artifacts
-        run: |
+      run: |
-          pip install --upgrade pip
+        pip install --upgrade pip
-          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-      - name: Retrieve artifacts (dvc.lock)
+    - name: Retrieve artifacts (dvc.lock)
-        env:
+      env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-        run: |
+      run: |
-          cd modules/ml-pipeline/src/pipeline
+        cd modules/ml-pipeline/src/pipeline
-          dvc pull -r experiments
+        dvc pull -r experiments
-      - name: Push artifacts to Dev
+    - name: Push artifacts to Dev
-        env:
+      env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-        run: |
+      run: |
-          cd modules/ml-pipeline/src/pipeline
+        cd modules/ml-pipeline/src/pipeline
-          dvc push -r dev
+        dvc push -r dev
  Register-New-Model-Dev:
-    needs:
+    needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev]
      [
        Register-Major-Model-Dev,
        Register-Minor-Model-Dev,
        Register-Patch-Model-Dev,
      ]
    if: |
      always() &&
      (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') &&
@ -195,50 +189,50 @@ jobs:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+    - uses: actions/checkout@v3
-        with:
+      with:
-          fetch-depth: 0
+        fetch-depth: 0
-      - name: Install packages to register model
+    - name: Install packages to register model
-        run: |
+      run: |
-          pip install --upgrade pip
+        pip install --upgrade pip
-          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-      - name: Register Model
+    - name: Register Model
-        env:
+      env:
-          TARGET_BRANCH: ${{ github.base_ref }}
+        TARGET_BRANCH: ${{ github.base_ref }}
-        run: |
+      run: |
-          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-          git config user.name "Github-Bot"
+        git config user.name "Github-Bot"
-          git config user.email "Github-Bot@no-reply.com"
+        git config user.email "Github-Bot@no-reply.com"
-          latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}')
+        latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}')
-          if [ -z "${latest_dev_version}" ]; then
+        if [ -z "${latest_dev_version}" ]; then
-            increment_version="1"
+          increment_version="1"
-          else
+        else
-            increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}')
+          increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}')
-          fi
+        fi
-          new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version}
+        new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version}
-          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}')
+        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}')
-          echo ${new_tag}
+        echo ${new_tag}
-          commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}')
+        commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}')
-          git checkout ${commit_hash}
+        git checkout ${commit_hash}
-          # git pull #Get new model registry md file changes
+        # git pull #Get new model registry md file changes
-          git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}"
+        git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}"
-          git push origin ${new_tag}
+        git push origin ${new_tag}
-          git checkout ${TARGET_BRANCH}
+        git checkout ${TARGET_BRANCH}
-          git fetch --all
+        git fetch --all
-          git pull
+        git pull
-          gto show --json > MODEL_REGISTRY.md
+        gto show --json > MODEL_REGISTRY.md
-          git add .
+        git add .
-          git commit -m "Update Registry"
+        git commit -m "Update Registry"
-          git push origin ${TARGET_BRANCH}
+        git push origin ${TARGET_BRANCH}
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@ -5,21 +5,22 @@ on:
  #   branches:
  #     - "model-**"
  pull_request:
-    branches: ["sap-dev", "heat-dev", "carbon-dev", "heat_baseline-dev"]
+    branches: ["sap-dev", "heat-dev", "carbon-dev"]
  label:
    types: ["created", "edited"]
 permissions: write-all
 jobs:
  Check-Label:
    runs-on: ubuntu-latest
    steps:
-      - uses: yogevbd/enforce-label-action@2.1.0
+    - uses: yogevbd/enforce-label-action@2.1.0
-        with:
+      with:
-          REQUIRED_LABELS_ANY: "major,minor,patch"
+        REQUIRED_LABELS_ANY: "major,minor,patch"
-          REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['major','minor','patch']"
+        REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['major','minor','patch']"
-          BANNED_LABELS: "banned"
+        BANNED_LABELS: "banned"
  # No-Label:
  #   if: ${{ github.event.label.name != 'major' }} || ${{ github.event.label.name != 'minor' }}  || ${{ github.event.label.name != 'patch' }}
@ -31,168 +32,86 @@ jobs:
  #       echo "Please choose one of these tags: 'major', 'major', 'patch'"
  #       exit(1)
  Verify-Lambda:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - name: Install packages to retrieve artifacts
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          pip install --upgrade pip
          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
      - name: Retrieve artifacts (dvc.lock)
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          cd modules/ml-pipeline/src/pipeline
          dvc pull -r experiments
      - name: Set timestamp
        id: set_timestamp
        run: |
          echo "timestamp=$(date +%Y%m%d)" >> $GITHUB_ENV
          echo "Generated timestamp: ${timestamp}"
      - name: Upload sample row dataset to S3
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          cd modules/ml-pipeline/src/pipeline/data/prepared_data/
          aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet
      - name: Build Lambda docker Image
        run: |
          docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
      - name: Run lambda docker container
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          docker run -d -p 9000:8080 \
            -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
            -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
            -e RUNTIME_ENVIRONMENT=dev \
            -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
      - name: Test Lambda endpoint
        run: |
          sleep 2
          curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
            -H "Content-Type: application/json" \
            -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"warm\\\": true}\"}"
      - name: Get Lambda logs
        run: |
          docker logs $(docker ps -al -q)
      - name: Test Lambda endpoint again
        run: |
          sleep 2
          curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
            -H "Content-Type: application/json" \
            -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
      - name: Get Lambda logs
        run: |
          docker logs $(docker ps -al -q)
      - name: Stop Lambda container
        run: |
          docker stop lambda_test || echo "Container already stopped"
      - name: Remove uploaded sample row dataset from S3
        if: always()
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          aws s3 rm --recursive s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/
  Verify-Model:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+    - uses: actions/checkout@v3
-      - name: Install packages to retrieve artifacts
+    - name: Install packages to retrieve artifacts
-        env:
+      env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-        run: |
+      run: |
-          pip install --upgrade pip
+        pip install --upgrade pip
-          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-      - name: Retrieve artifacts (dvc.lock)
+    - name: Retrieve artifacts (dvc.lock)
-        env:
+      env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-        run: |
+      run: |
-          cd modules/ml-pipeline/src/pipeline
+        cd modules/ml-pipeline/src/pipeline
-          dvc pull -r experiments
+        dvc pull -r experiments
-      - name: Build Prediction docker Image
+    - name: Build Prediction docker Image
-        run: |
+      run: |
-          cd modules/ml-pipeline/src/
+        cd modules/ml-pipeline/src/
-          docker build . --file Prediction.Dockerfile --tag prediction_test
+        docker build . --file Prediction.Dockerfile --tag prediction_test
-      - name: Run Prediction docker container
+    - name: Run Prediction docker container
-        run: |
+      run: |
-          docker run prediction_test
+        docker run prediction_test
  Trigger-CML:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+    - uses: actions/checkout@v3
-      - name: Install packages to retrieve artifacts
+    - name: Install packages to retrieve artifacts
-        run: |
+      run: |
-          pip install --upgrade pip
+        pip install --upgrade pip
-          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-      - name: Retrieve artifacts (dvc.lock)
+    - name: Retrieve artifacts (dvc.lock)
-        env:
+      env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-        run: |
+      run: |
-          cd modules/ml-pipeline/src/pipeline
+        cd modules/ml-pipeline/src/pipeline
-          dvc pull -r experiments
+        dvc pull -r experiments
-      - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v4
-      - uses: iterative/setup-cml@v1
+    - uses: iterative/setup-cml@v1
-      - name: Generate report
+    - name: Generate report
-        env:
+      env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          TARGET_BRANCH: ${{ github.base_ref }}
+        TARGET_BRANCH: ${{ github.base_ref }}
-        run: |
+      run: |
-          cd modules/ml-pipeline/src/pipeline
+        cd modules/ml-pipeline/src/pipeline
-          echo "## Model metrics" > report.md
+        echo "## Model metrics" > report.md
-          # Compare metrics to master
+        # Compare metrics to master
-          git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
+        git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
-          dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
+        dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
-          echo "## Scenario comparison" >> report.md
+        echo "## Scenario comparison" >> report.md
-          cat metrics/scenario_table.md >> report.md
+        cat metrics/scenario_table.md >> report.md
-          echo "" >> report.md
+        echo "" >> report.md
-          echo "## Scenario metrics" >> report.md
+        echo "## Scenario metrics" >> report.md
-          cat metrics/scenario_metrics.md >> report.md
+        cat metrics/scenario_metrics.md >> report.md
-          cml comment create report.md
+        cml comment create report.md
-          # echo "## Residuals plot from model" >> report.md
+        # echo "## Residuals plot from model" >> report.md
-          # metrics_location=$(find . -maxdepth 10 -name "residuals.png")
+        # metrics_location=$(find . -maxdepth 10 -name "residuals.png")
-          # echo $metrics_location
+        # echo $metrics_location
-          # cd $metric_location
+        # cd $metric_location
-          # echo "![](./residuals.png)" >> report.md
+        # echo "![](./residuals.png)" >> report.md
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@ -8,65 +8,25 @@
        "active": true
    },
    "sap": {
-        "version": "v0.17.5",
+        "version": "v0.14.0",
        "stage": {
-            "dev": "v0.17.5"
+            "dev": "v0.14.0"
        },
        "registered": true,
        "active": true
    },
    "heat": {
-        "version": "v0.8.0",
+        "version": "v0.5.0",
        "stage": {
-            "dev": "v0.8.0"
+            "dev": "v0.5.0"
        },
        "registered": true,
        "active": true
    },
    "carbon": {
-        "version": "v0.8.0",
+        "version": "v0.5.0",
        "stage": {
-            "dev": "v0.7.0"
+            "dev": "v0.5.0"
        },
        "registered": true,
        "active": true
    },
    "hotwater": {
        "version": "v1.0.0",
        "stage": {
            "dev": "v1.0.0"
        },
        "registered": true,
        "active": true
    },
    "heating": {
        "version": "v1.0.0",
        "stage": {
            "dev": "v1.0.0"
        },
        "registered": true,
        "active": true
    },
    "lighting": {
        "version": "v1.0.0",
        "stage": {
            "dev": "v1.0.0"
        },
        "registered": true,
        "active": true
    },
    "hotwaterkwh": {
        "version": "v1.3.0",
        "stage": {
            "dev": "v1.3.0"
        },
        "registered": true,
        "active": true
    },
    "heatingkwh": {
        "version": "v1.5.0",
        "stage": {
            "dev": "v1.5.0"
        },
        "registered": true,
        "active": true
--- a/README.md
+++ b/README.md
@ -83,13 +83,3 @@ curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d
 ```
 This will send a POST request to the running Lambda function and pass in the required data as JSON.
 For the testing of warm or testing of the lambda, use:
 ```json
 curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": \"true\"}"}'
 ```
 or
 ```json
 curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"warm\": \"true\"}"}'
 ```
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@ -1,24 +1,19 @@
-FROM public.ecr.aws/lambda/python:3.12
+FROM public.ecr.aws/lambda/python:3.10
 # Set the working directory
 WORKDIR ${LAMBDA_TASK_ROOT}
-ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
+ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
 ENV MPLCONFIGDIR="/tmp/matplotlib"
 # Environment variables
 ARG RUNTIME_ENVIRONMENT
 ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
 # Install necessary build tools - required to test locally
-RUN dnf install -y gcc python3-devel gcc-c++
+RUN yum install -y gcc python3-devel gcc-c++
 # Install python packages
 COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt
-
+RUN pip install --no-cache-dir -r ./requirements.txt
 RUN pip install uv
 RUN uv pip install -r requirements.txt --system
 # RUN pip install --no-cache-dir -r ./requirements.txt
 # Copy the project code
 COPY modules/ml-pipeline/src/pipeline ./pipeline
@ -27,4 +22,4 @@ COPY deployment/handlers/prediction_app.py ./pipeline/prediction_app.py
 WORKDIR ${LAMBDA_TASK_ROOT}/pipeline
-CMD [ "prediction_app.handler" ]
+CMD [ "prediction_app.handler" ]
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@ -47,30 +47,6 @@ def upload_dataframe_to_s3(df, bucket, s3_file_name):
        return False
 def warming_up_invocation(
    model,
    model_filepath: str,
 ):
    """
    Function to handle warm up invocations
    """
    import pandas as pd
    import numpy as np
    model.load_model(model_filepath)
    warmup_df = pd.DataFrame(
        np.zeros((1, len(model.model.original_features))),
        columns=model.model.original_features,
    )
    # model_names = model.model.model_names()
    # if "NeuralNetFastAI" in model_names:
    #     model.model.predict(warmup_df, model="NeuralNetFastAI")
    # else:
    model.predict(data=warmup_df)
 def handler(event, context):
    """
    Take in event and trigger the prediction pipeline
@ -90,6 +66,9 @@ def handler(event, context):
        created_at = body["created_at"]
        # TODO: Implement the loading of the model and prediction
        storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
        logger.info(f"--- Initiate MLModel ---")
        build_model_params = settings.build_model
@ -99,32 +78,6 @@ def handler(event, context):
        model = model_factory(build_model_params["model_type"])
        model_filepath = build_model_params["model_save_filepath"]
        if "warm" in body:
            logger.info("Warm up invocation - synthetic prediction")
            warming_up_invocation(model=model, model_filepath=model_filepath)
            return {
                "statusCode": 200,
                "body": json.dumps(
                    {
                        "message": "Successfully warmed up invocation",
                    }
                ),
            }
        if "testing" in body:
            logger.info(
                "Testing invocation for CI/CD - save file to same location in S3"
            )
            storage_filepath = body["file_location"].replace(
                ".parquet", "_output.parquet"
            )
        else:
            storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
        logger.info(f"--- Initiate Input DataClient ---")
        input_dataclient = dataclient_factory(
            dataclient_type="aws-s3",
@ -142,7 +95,7 @@ def handler(event, context):
            output_dataclient=output_dataclient,
            model=model,
            target=feature_process_params["feature_processor_config"]["target"],
-            model_filepath=model_filepath,
+            model_filepath=build_model_params["model_save_filepath"],
            test_data_filepath=body["file_location"],
            predictions_output_filepath=storage_filepath,
            predictions_column_name=generate_predictions_params[
--- a/deployment/serverless.yml
+++ b/deployment/serverless.yml
@ -51,4 +51,3 @@ functions:
          path: /predict
          method: POST
    timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed
    memorySize: 3008
--- a/modules/ml-pipeline/Makefile
+++ b/modules/ml-pipeline/Makefile
@ -1,8 +1,7 @@
 export PYENV_ROOT=$(HOME)/.pyenv
 export PATH := $(PYENV_ROOT)/bin:$(PATH)
-PYTHON_VERSION ?= 3.12.12
+PYTHON_VERSION ?= 3.10.12
 CONDA_ENV=dev_env_pipeline
 CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
 .PHONY: init
 init: dev-conda
@ -13,15 +12,11 @@ dev-conda:
 	# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
 	conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
 	conda init bash
-	${CONDA_ACTIVATE} ${CONDA_ENV} && \
+	conda run -v -n ${CONDA_ENV} pip install --upgrade pip
-		which pip && \
+	conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/training/requirements-dev.txt
-		pip install --upgrade pip && \
+	conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt
-		pip install uv && \
+	conda run -v -n ${CONDA_ENV} pre-commit install
-		uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
+	conda run -v -n ${CONDA_ENV} pip install ipykernel
 		uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \
 		pre-commit install && \
 		uv pip install ipykernel && \
 		conda install llvm-openmp -y
 	echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
 	echo "conda activate ${CONDA_ENV}"
@ -38,4 +33,4 @@ dev-pyenv:
 .PHONY: dvc-init
 dvc-init:
-	. .dev_env_pipeline/bin/activate && dvc init --subdir
+	. .dev_env_pipeline/bin/activate && dvc init --subdir
--- a/modules/ml-pipeline/src/Prediction.Dockerfile
+++ b/modules/ml-pipeline/src/Prediction.Dockerfile
@ -1,21 +1,16 @@
 # Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
-FROM python:3.12.12-slim
+FROM python:3.10.12-slim
 RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev
 COPY pipeline/requirements/predictions/requirements.txt requirements.txt
 RUN pip install --upgrade pip
-
+RUN pip install -r requirements.txt
 RUN pip install uv
 RUN uv pip install -r requirements.txt --system
 # RUN pip install -r requirements.txt
 # Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script
 COPY pipeline/ /home/pipeline/
 WORKDIR /home/pipeline/
-CMD [ "python", "3_generate_predictions.py"]
+CMD [ "python", "3_generate_predictions.py"]
--- a/modules/ml-pipeline/src/pipeline/1_prepare_data.py
+++ b/modules/ml-pipeline/src/pipeline/1_prepare_data.py
@ -29,7 +29,6 @@ data_filepath = prepare_data_params["data_filepath"]
 train_proportion = prepare_data_params["train_proportion"]
 output_train_filepath = prepare_data_params["output_train_filepath"]
 output_test_filepath = prepare_data_params["output_test_filepath"]
 sample_test_filepath = prepare_data_params["sample_test_filepath"]
 feature_processor_config = feature_process_params["feature_processor_config"]
 logger.info(f"--- Initiate DataClient ---")
@ -100,10 +99,6 @@ def prepare_data(
    logger.info("--- Outputting data ---")
    output_dataclient.save_data(
        obj=data.sample(1), location=sample_test_filepath, save_config=None
    )
    output_dataclient.save_data(
        obj=train, location=output_train_filepath, save_config=None
    )
--- a/modules/ml-pipeline/src/pipeline/4_generate_metrics.py
+++ b/modules/ml-pipeline/src/pipeline/4_generate_metrics.py
@ -4,7 +4,9 @@ After the model is built, we can evaluate its performance
 """
 import os
 import yaml
 import pandas as pd
 from pathlib import Path
 from core.interface.InterfaceModels import MLModel
 from core.interface.InterfaceMetrics import MLMetrics
 from core.interface.InterfaceDataClient import DataClient
--- a/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py
+++ b/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py
@ -99,12 +99,6 @@ def generate_scenario_predictions(
            ]
        )
    # TEMPORARY FIX: ADD is_post_sap10_starting and is_post_sap10_ending if not present
    if "is_post_sap10_starting" not in scenario_data.columns:
        scenario_data["is_post_sap10_starting"] = False
    if "is_post_sap10_ending" not in scenario_data.columns:
        scenario_data["is_post_sap10_ending"] = False
    logger.info("--- Loading Model ---")
    model.load_model(model_filepath)
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@ -14,23 +14,9 @@ default:
      output_filepath: ./data/model/allmodels/
      problem_type: regression
      eval_metric: mean_squared_error #mean_absolute_error
-      time_limit: 3600
+      time_limit: 1800
      presets: medium_quality
-      excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT', 'FASTAI']
+      excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
-      infer_limit: 1
+      infer_limit: 0.05
      infer_limit_batch_size: 10000
      fit_strategy: "parallel"
      ag_args_ensemble: {'num_folds_parallel': 2}
      num_gpus: 0
      hyperparameters:
        {
        'NN_TORCH': [{}],
        'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0,}}],
        # 'GBM': [{}],
        'CAT': [{}],
        'XGB': [{}],
        'FASTAI': [{}],
        'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
        'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
        'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
        }
--- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
+++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
@ -18,60 +18,30 @@ def remove_starting_columns(df):
    return df
-def keep_negative_heat_change(df):
+def remove_floor_height_ending(df):
-    df = df[df["heat_demand_change"] < 0]
+    # df.describe(percentiles=[0.005,0.99])['FLOOR_HEIGHT_ENDING']
    # shows bottom 0.5 percentile is 1.665
    # So keep anything above this
    df = df[df["floor_height_ending"] > 1.665].reset_index(drop=True)
    print("we in here")
    return df
-def keep_negative_carbon_change(df):
+def remove_minimum_habitable_room_size(df):
-    df = df[df["carbon_change"] < 0]
+    # Need minimum of 6.5m per habitable room
    df = df[
        df["total_floor_area_ending"] / df["number_habitable_rooms"] > 6.5
    ].reset_index(drop=True)
    return df
-# TODO: Move to ETL pipeline
+def keep_flats(df):
-def remove_unreasonable_habitable_rooms(df):
+    df = df[df["property_type"] == "Flat"]
    """
    Assumption is that proportion of floor area to habitable rooms should be at least 6.5m2
    """
    minimum_room_size_index = (
        df["total_floor_area_ending"] / df["number_habitable_rooms"] >= 6.5
    )
    df = df[minimum_room_size_index]
    return df
-def remove_top_1_percent_heat_demand_starting(df):
+def keep_non_zero_rdsap(df):
-    # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
+    df = df[df["rdsap_change"] != 0]
    threshold_value = 860
    df = df[df["heat_demand_starting"] < threshold_value]
    return df
 def remove_negative_heat_demand_starting(df):
    # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
    threshold_value = 0
    df = df[df["heat_demand_starting"] > threshold_value]
    return df
 # def remove_top_1_percent_heat_demand_ending(df):
 #     # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
 #     threshold_value = 593
 #     df = df[df["heat_demand_ending"] < threshold_value]
 #     return df
 def remove_negative_heat_demand_ending(df):
    # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
    threshold_value = 0
    df = df[df["heat_demand_ending"] > threshold_value]
    return df
 def remove_top_1_percent_carbon(df):
    # threshold_value = df.describe(percentiles=[0.99])['CARBON_STARTING']['99%']
    threshold_value = 18
    df = df[df["carbon_starting"] < threshold_value]
    return df
@ -84,14 +54,10 @@ def remove_top_1_percent_carbon(df):
 #     return df
 business_logic = {
-    "remove_unreasonable_habitable_rooms": remove_unreasonable_habitable_rooms,
+    # "keep_non_zero_rdsap": keep_non_zero_rdsap,
-    "keep_negative_heat_change": keep_negative_heat_change,
+    # "keep_flats": keep_flats,
-    "keep_negative_carbon_change": keep_negative_carbon_change,
+    # "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,
-    "remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand_starting,
+    # "remove_floor_height_ending": remove_floor_height_ending
    "remove_negative_heat_demand_starting": remove_negative_heat_demand_starting,
    # "remove_top_1_percent_heat_demand_ending": remove_top_1_percent_heat_demand_ending,
    "remove_negative_heat_demand_ending": remove_negative_heat_demand_ending,
    "remove_top_1_percent_carbon": remove_top_1_percent_carbon,
    # "remove_starting_columns": remove_starting_columns
    # "keep_ENDING_COLUMNS": keep_ending_columns
 }
--- a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
+++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
@ -1,7 +1,6 @@
 """
 After predictions, we may want to apply some post processing to the predictions
 """
 import pandas as pd
@ -14,11 +13,10 @@ def clip_predictions_to_minimum_value(
    predictions_df = pd.concat([data, predictions], axis=1)
    # We expect all prediction to be atleast one point improvement
    replace_index = (
-        predictions_df["predictions"]
+        predictions_df["sap_starting"] + minimum_value > predictions_df["predictions"]
        > predictions_df["heat_demand_starting"] - minimum_value
    )
    predictions_df.loc[replace_index, "predictions"] = (
-        predictions_df.loc[replace_index, "heat_demand_starting"] - minimum_value
+        predictions_df.loc[replace_index, "sap_starting"] + minimum_value
    )
    predictions_new = predictions_df["predictions"]
@ -32,6 +30,6 @@ def clip_predictions_to_minimum_value(
 post_prediction_logic = {
-    # "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
+    "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
    # "round_predictions": round_predictions
 }
--- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
@ -8,6 +8,6 @@ default:
      # - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
      # - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
      # - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
-      # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
+      - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
    comparison_output_filepath: ./metrics/scenario_table.md
    metrics_output_filepath: ./metrics/scenario_metrics.md
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@ -12,163 +12,32 @@ default:
      AWS_ACCESS_KEY_ID: minio
      AWS_SECRET_ACCESS_KEY: minio123
      ENDPOINT_URL: http://localhost:9000
-    local: null
+    local:
      null
  prepare_data:
    input_dataclient_type: aws-s3
    output_dataclient_type: local
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-06-09-10-36-53/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet
-    data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet
    data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
    train_proportion: 0.9
    output_train_filepath: ./data/prepared_data/train.parquet
    output_test_filepath: ./data/prepared_data/test.parquet
    sample_test_filepath: ./data/prepared_data/sample_test.parquet
  feature_processor:
    feature_processor_type: dataframe
    feature_processor_config:
      subsample_amount: null
      subsample_seed: 0
-      target: heat_demand_starting
+      target: sap_ending
      identifier_columns: ["uprn"]
-      drop_columns:
+      # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
-        [
+      drop_columns: [
-          "heat_demand_ending",
+        "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending",
-          "potential_energy_efficiency",
+        'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
-          "environment_impact_potential",
+        'number_habitable_rooms', 'number_heated_rooms']
          "energy_consumption_potential",
          "co2_emissions_potential",
          "heat_demand_change",
          "carbon_change",
          "rdsap_change",
          "sap_starting",
          "sap_ending",
          "carbon_starting",
          "carbon_ending",
          "days_to_starting",
          "days_to_ending",
          "number_habitable_rooms_starting",
          "number_habitable_rooms_ending",
          "number_heated_rooms_starting",
          "number_heated_rooms_ending",
          "number_habitable_rooms",
          "number_heated_rooms",
          "lighting_cost_starting",
          "lighting_cost_ending",
          "heating_cost_starting",
          "heating_cost_ending",
          "hot_water_cost_starting",
          "hot_water_cost_ending",
          "floor_thermal_transmittance",
          "floor_thermal_transmittance_ending",
          "lodgement_date_starting",
          "lodgement_date_ending",
          "walls_thermal_transmittance_ending",
          "walls_thermal_transmittance_unit_ending",
          "is_filled_cavity_ending",
          "is_as_built_ending",
          "walls_is_assumed_ending",
          "is_park_home_ending",
          "walls_insulation_thickness_ending",
          "external_insulation_ending",
          "internal_insulation_ending",
          "floor_insulation_thickness_ending",
          "roof_thermal_transmittance_ending",
          "is_at_rafters_ending",
          "roof_insulation_thickness_ending",
          "heater_type_ending",
          "system_type_ending",
          "thermostat_characteristics_ending",
          "heating_scope_ending",
          "energy_recovery_ending",
          "hotwater_tariff_type_ending",
          "extra_features_ending",
          "chp_systems_ending",
          "distribution_system_ending",
          "no_system_present_ending",
          "appliance_ending",
          "has_radiators_ending",
          "has_fan_coil_units_ending",
          "has_pipes_in_screed_above_insulation_ending",
          "has_pipes_in_insulated_timber_floor_ending",
          "has_pipes_in_concrete_slab_ending",
          "has_boiler_ending",
          "has_air_source_heat_pump_ending",
          "has_room_heaters_ending",
          "has_electric_storage_heaters_ending",
          "has_warm_air_ending",
          "has_electric_underfloor_heating_ending",
          "has_electric_ceiling_heating_ending",
          "has_community_scheme_ending",
          "has_ground_source_heat_pump_ending",
          "has_no_system_present_ending",
          "has_portable_electric_heaters_ending",
          "has_water_source_heat_pump_ending",
          "has_electric_heat_pump_ending",
          "has_micro-cogeneration_ending",
          "has_solar_assisted_heat_pump_ending",
          "has_exhaust_source_heat_pump_ending",
          "has_community_heat_pump_ending",
          "has_hot-water-only_ending",
          "has_electric_ending",
          "has_mains_gas_ending",
          "has_wood_logs_ending",
          "has_coal_ending",
          "has_oil_ending",
          "has_wood_pellets_ending",
          "has_anthracite_ending",
          "has_dual_fuel_mineral_and_wood_ending",
          "has_smokeless_fuel_ending",
          "has_lpg_ending",
          "has_b30k_ending",
          "has_mineral_and_wood_ending",
          "has_dual_fuel_appliance_ending",
          "has_electricaire_ending",
          "has_assumed_for_most_rooms_ending",
          "has_underfloor_heating_ending",
          "thermostatic_control_ending",
          "charging_system_ending",
          "switch_system_ending",
          "no_control_ending",
          "dhw_control_ending",
          "community_heating_ending",
          "multiple_room_thermostats_ending",
          "auxiliary_systems_ending",
          "trvs_ending",
          "rate_control_ending",
          "glazing_type_ending",
          "fuel_type_ending",
          "main-fuel_tariff_type_ending",
          "is_community_ending",
          "no_individual_heating_or_community_network_ending",
          "complex_fuel_type_ending",
          "mechanical_ventilation_ending",
          "secondheat_description_ending",
          "glazed_type_ending",
          "multi_glaze_proportion_ending",
          "low_energy_lighting_ending",
          "number_open_fireplaces_ending",
          "solar_water_heating_flag_ending",
          "photo_supply_ending",
          "transaction_type_ending",
          "energy_tariff_ending",
          "extension_count_ending",
          "total_floor_area_ending",
          "floor_height_ending",
          "hot_water_energy_eff_ending",
          "floor_energy_eff_ending",
          "windows_energy_eff_ending",
          "walls_energy_eff_ending",
          "sheating_energy_eff_ending",
          "roof_energy_eff_ending",
          "mainheat_energy_eff_ending",
          "mainheatc_energy_eff_ending",
          "lighting_energy_eff_ending",
          "is_post_sap10_ending",
          "estimated_perimeter_ending",
        ]
      # retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
      retain_features: null
      # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
      #  'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
@ -209,4 +78,4 @@ default:
 dev:
  generate_predictions:
-    input_dataclient_type: aws-s3
+      input_dataclient_type: aws-s3
--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@ -1,4 +1,4 @@
-""" "
+""""
 Implementations of MLModels, all of which will have four methods to:
 - Load model
 - Save Model
@ -11,6 +11,9 @@ import joblib
 import pandas as pd
 from pathlib import Path
 from typing import Union, List
 from sklearn import linear_model
 from sklearn.svm import SVR
 from autogluon.tabular import TabularDataset, TabularPredictor
 from core.interface.InterfaceModels import MLModel
 from core.Logger import logger
@ -66,8 +69,6 @@ class SKLearnLinearRegression:
        """
        Method to train a model
        """
        from sklearn import linear_model
        self.model = linear_model.LinearRegression()
        x_train = data.iloc[:, data.columns != target]
@ -116,7 +117,6 @@ class SKLearnSVMRegression:
        """
        Method to train a model
        """
        from sklearn.svm import SVR
        validate_dict_keys(
            list(model_hyperparameters.keys()),
@ -152,17 +152,12 @@ class AutogluonAutoML:
        "infer_limit",
        "infer_limit_batch_size",
        "ag_args_ensemble",
        "fit_strategy",
        "num_gpus",
        "hyperparameters",
    ]
    def load_model(self, path: Union[Path, str]) -> None:
        """
        Method to load a model
        """
        from autogluon.tabular import TabularPredictor
        filepath = str(path)
        self.model = TabularPredictor.load(path=filepath)
@ -188,10 +183,6 @@ class AutogluonAutoML:
        """
        Method to train a model
        """
        from autogluon.tabular import TabularDataset, TabularPredictor
        # Force Parallel Model fitting
        os.environ["AG_FORCE_PARALLEL"] = "True"
        validate_dict_keys(
            keys_1=list(model_hyperparameters.keys()),
@ -218,9 +209,6 @@ class AutogluonAutoML:
            infer_limit=model_hyperparameters["infer_limit"],
            infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
            ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
            fit_strategy=model_hyperparameters["fit_strategy"],
            num_gpus=model_hyperparameters["num_gpus"],
            hyperparameters=model_hyperparameters["hyperparameters"].to_dict(),
        )
    def predict(
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@ -16,22 +16,15 @@ stages:
    deps:
    - path: 1_prepare_data.py
      hash: md5
-      md5: a5ce162e1c402c0f811a80ef78cf4dd5
+      md5: 11a3b8bfdfe199ab7ecc39ccc5652649
-      size: 4481
+      size: 4298
    params:
      configs/settings.yaml:
        default.feature_processor.feature_processor_config.drop_columns:
        - heat_demand_ending
        - potential_energy_efficiency
        - environment_impact_potential
        - energy_consumption_potential
        - co2_emissions_potential
        - heat_demand_change
        - carbon_change
        - rdsap_change
-        - sap_starting
+        - heat_demand_ending
        - sap_ending
        - carbon_starting
        - carbon_ending
        - days_to_starting
        - days_to_ending
@ -41,140 +34,24 @@ stages:
        - number_heated_rooms_ending
        - number_habitable_rooms
        - number_heated_rooms
        - lighting_cost_starting
        - lighting_cost_ending
        - heating_cost_starting
        - heating_cost_ending
        - hot_water_cost_starting
        - hot_water_cost_ending
        - floor_thermal_transmittance
        - floor_thermal_transmittance_ending
        - lodgement_date_starting
        - lodgement_date_ending
        - walls_thermal_transmittance_ending
        - walls_thermal_transmittance_unit_ending
        - is_filled_cavity_ending
        - is_as_built_ending
        - walls_is_assumed_ending
        - is_park_home_ending
        - walls_insulation_thickness_ending
        - external_insulation_ending
        - internal_insulation_ending
        - floor_insulation_thickness_ending
        - roof_thermal_transmittance_ending
        - is_at_rafters_ending
        - roof_insulation_thickness_ending
        - heater_type_ending
        - system_type_ending
        - thermostat_characteristics_ending
        - heating_scope_ending
        - energy_recovery_ending
        - hotwater_tariff_type_ending
        - extra_features_ending
        - chp_systems_ending
        - distribution_system_ending
        - no_system_present_ending
        - appliance_ending
        - has_radiators_ending
        - has_fan_coil_units_ending
        - has_pipes_in_screed_above_insulation_ending
        - has_pipes_in_insulated_timber_floor_ending
        - has_pipes_in_concrete_slab_ending
        - has_boiler_ending
        - has_air_source_heat_pump_ending
        - has_room_heaters_ending
        - has_electric_storage_heaters_ending
        - has_warm_air_ending
        - has_electric_underfloor_heating_ending
        - has_electric_ceiling_heating_ending
        - has_community_scheme_ending
        - has_ground_source_heat_pump_ending
        - has_no_system_present_ending
        - has_portable_electric_heaters_ending
        - has_water_source_heat_pump_ending
        - has_electric_heat_pump_ending
        - has_micro-cogeneration_ending
        - has_solar_assisted_heat_pump_ending
        - has_exhaust_source_heat_pump_ending
        - has_community_heat_pump_ending
        - has_hot-water-only_ending
        - has_electric_ending
        - has_mains_gas_ending
        - has_wood_logs_ending
        - has_coal_ending
        - has_oil_ending
        - has_wood_pellets_ending
        - has_anthracite_ending
        - has_dual_fuel_mineral_and_wood_ending
        - has_smokeless_fuel_ending
        - has_lpg_ending
        - has_b30k_ending
        - has_mineral_and_wood_ending
        - has_dual_fuel_appliance_ending
        - has_electricaire_ending
        - has_assumed_for_most_rooms_ending
        - has_underfloor_heating_ending
        - thermostatic_control_ending
        - charging_system_ending
        - switch_system_ending
        - no_control_ending
        - dhw_control_ending
        - community_heating_ending
        - multiple_room_thermostats_ending
        - auxiliary_systems_ending
        - trvs_ending
        - rate_control_ending
        - glazing_type_ending
        - fuel_type_ending
        - main-fuel_tariff_type_ending
        - is_community_ending
        - no_individual_heating_or_community_network_ending
        - complex_fuel_type_ending
        - mechanical_ventilation_ending
        - secondheat_description_ending
        - glazed_type_ending
        - multi_glaze_proportion_ending
        - low_energy_lighting_ending
        - number_open_fireplaces_ending
        - solar_water_heating_flag_ending
        - photo_supply_ending
        - transaction_type_ending
        - energy_tariff_ending
        - extension_count_ending
        - total_floor_area_ending
        - floor_height_ending
        - hot_water_energy_eff_ending
        - floor_energy_eff_ending
        - windows_energy_eff_ending
        - walls_energy_eff_ending
        - sheating_energy_eff_ending
        - roof_energy_eff_ending
        - mainheat_energy_eff_ending
        - mainheatc_energy_eff_ending
        - lighting_energy_eff_ending
        - is_post_sap10_ending
        - estimated_perimeter_ending
        default.feature_processor.feature_processor_config.retain_features:
        default.feature_processor.feature_processor_config.subsample_amount:
        default.feature_processor.feature_processor_config.subsample_seed: 0
-        default.feature_processor.feature_processor_config.target: 
+        default.feature_processor.feature_processor_config.target: sap_ending
          heat_demand_starting
        default.feature_processor.feature_processor_type: dataframe
-        default.prepare_data.data_filepath: 
+        default.prepare_data.data_filepath:
-          s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
+          s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
        default.prepare_data.input_dataclient_type: aws-s3
        default.prepare_data.output_dataclient_type: local
-        default.prepare_data.output_test_filepath: 
+        default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
-          ./data/prepared_data/test.parquet
+        default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
        default.prepare_data.output_train_filepath: 
          ./data/prepared_data/train.parquet
        default.prepare_data.train_proportion: 0.9
    outs:
    - path: data/prepared_data/
      hash: md5
-      md5: c293fbc1658af932f0d09cdce25acf67.dir
+      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 21779190
+      size: 45056059
-      nfiles: 3
+      nfiles: 2
  build_model:
    cmd: python 2_build_model.py
    deps:
@ -184,9 +61,9 @@ stages:
      size: 4820
    - path: data/prepared_data
      hash: md5
-      md5: c293fbc1658af932f0d09cdce25acf67.dir
+      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 21779190
+      size: 45056059
-      nfiles: 3
+      nfiles: 2
    params:
      configs/build_model.yaml:
        default:
@ -202,7 +79,7 @@ stages:
              output_filepath: ./data/model/allmodels/
              problem_type: regression
              eval_metric: mean_squared_error
-              time_limit: 3600
+              time_limit: 1800
              presets: medium_quality
              excluded_model_types:
              - RF
@ -210,94 +87,25 @@ stages:
              - NN_TORCH
              - KNN
              - XT
-              - FASTAI
+              infer_limit: 0.05
              infer_limit: 1
              infer_limit_batch_size: 10000
              fit_strategy: parallel
              ag_args_ensemble:
                num_folds_parallel: 2
              num_gpus: 0
              hyperparameters:
                NN_TORCH:
                - {}
                GBM:
                - extra_trees: true
                  ag_args:
                    name_suffix: XT
                - {}
                - learning_rate: 0.03
                  num_leaves: 128
                  feature_fraction: 0.9
                  min_data_in_leaf: 3
                  ag_args:
                    name_suffix: Large
                    priority: 0
                CAT:
                - {}
                XGB:
                - {}
                FASTAI:
                - {}
                RF:
                - criterion: gini
                  ag_args:
                    name_suffix: Gini
                    problem_types:
                    - binary
                    - multiclass
                - criterion: entropy
                  ag_args:
                    name_suffix: Entr
                    problem_types:
                    - binary
                    - multiclass
                - criterion: squared_error
                  ag_args:
                    name_suffix: MSE
                    problem_types:
                    - regression
                    - quantile
                XT:
                - criterion: gini
                  ag_args:
                    name_suffix: Gini
                    problem_types:
                    - binary
                    - multiclass
                - criterion: entropy
                  ag_args:
                    name_suffix: Entr
                    problem_types:
                    - binary
                    - multiclass
                - criterion: squared_error
                  ag_args:
                    name_suffix: MSE
                    problem_types:
                    - regression
                    - quantile
                KNN:
                - weights: uniform
                  ag_args:
                    name_suffix: Unif
                - weights: distance
                  ag_args:
                    name_suffix: Dist
    outs:
    - path: data/fit_predictions/
      hash: md5
-      md5: 6c4de55effeb468e37ee3db3838109db.dir
+      md5: d9c9afc05e8780db47c0548b19bf7d19.dir
-      size: 2976628
+      size: 3349989
      nfiles: 1
    - path: data/model/
      hash: md5
-      md5: 2ff63da0312853b1fd9338cac62ba0b0.dir
+      md5: 13c3100e1486c27a83a8a47491077842.dir
-      size: 592460869
+      size: 773523079
-      nfiles: 31
+      nfiles: 36
    - path: metrics/fit_metrics.json
      hash: md5
-      md5: c00465e99e9368afdb3302a52fca99b9
+      md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a
-      size: 223
+      size: 224
  generate_predictions:
    cmd: python 3_generate_predictions.py
    deps:
@ -307,46 +115,44 @@ stages:
      size: 2464
    - path: data/model
      hash: md5
-      md5: 2ff63da0312853b1fd9338cac62ba0b0.dir
+      md5: 13c3100e1486c27a83a8a47491077842.dir
-      size: 592460869
+      size: 773523079
-      nfiles: 31
+      nfiles: 36
    - path: data/prepared_data
      hash: md5
-      md5: c293fbc1658af932f0d09cdce25acf67.dir
+      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 21779190
+      size: 45056059
-      nfiles: 3
+      nfiles: 2
    params:
      configs/settings.yaml:
        default.generate_predictions.input_dataclient_type: local
        default.generate_predictions.output_dataclient_type: local
        default.generate_predictions.predictions_column_name: predictions
-        default.generate_predictions.predictions_output_filepath: 
+        default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet
-          ./data/predictions/predictions.parquet
+        default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet
        default.generate_predictions.test_data_filepath: 
          ./data/prepared_data/test.parquet
    outs:
    - path: data/predictions/
      hash: md5
-      md5: a960cadf88d5f38cc55942781a2db51e.dir
+      md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
-      size: 392728
+      size: 463197
      nfiles: 1
  generate_metrics:
    cmd: python 4_generate_metrics.py
    deps:
    - path: 4_generate_metrics.py
      hash: md5
-      md5: d61bb524f706917f6a3eb72b1ab8bc61
+      md5: 4fedb86d89d528f0a6597934ba3890a0
-      size: 3447
+      size: 3484
    - path: data/predictions
      hash: md5
-      md5: a960cadf88d5f38cc55942781a2db51e.dir
+      md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
-      size: 392728
+      size: 463197
      nfiles: 1
    - path: data/prepared_data
      hash: md5
-      md5: c293fbc1658af932f0d09cdce25acf67.dir
+      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
-      size: 21779190
+      size: 45056059
-      nfiles: 3
+      nfiles: 2
    params:
      configs/settings.yaml:
        default.generate_metrics.dataclient_type: local
@ -355,29 +161,30 @@ stages:
    outs:
    - path: metrics/metrics.json
      hash: md5
-      md5: c0241381a23b29831b18be3f063f75fd
+      md5: 3e08df02fd5c5d094bcf936e1338d596
-      size: 218
+      size: 223
  generate_scenerio_metrics:
    cmd: python 5_generate_scenarios.py
    deps:
    - path: 5_generate_scenarios.py
      hash: md5
-      md5: 872b0c762ce1c8933fcbc5f54d5d4b5d
+      md5: 40506749fefd926d47c60ff5b16db307
-      size: 5658
+      size: 5337
    params:
      configs/scenarios.yaml:
        default.scenarios:
          input_dataclient_type: aws-s3
          output_dataclient_type: local
          scenario_data_filepaths:
          - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
          comparison_output_filepath: ./metrics/scenario_table.md
          metrics_output_filepath: ./metrics/scenario_metrics.md
    outs:
    - path: metrics/scenario_metrics.md
      hash: md5
-      md5: d41d8cd98f00b204e9800998ecf8427e
+      md5: fa4d6d7bbd7818613800da5f8f37ea96
-      size: 0
+      size: 363
    - path: metrics/scenario_table.md
      hash: md5
-      md5: d41d8cd98f00b204e9800998ecf8427e
+      md5: d6baf100a1623cc2467c2f8221d314c9
-      size: 0
+      size: 2133
--- a/modules/ml-pipeline/src/pipeline/eda.py
+++ b/modules/ml-pipeline/src/pipeline/eda.py
@ -1,7 +1,6 @@
 """
 Doing some eda on dataset
 """
 # Look at response variable
 from matplotlib import pyplot as plt
@ -39,6 +38,7 @@ train_df[[target, "SAP_STARTING"]].plot(y=target, x="SAP_STARTING", style="o")
 train_df[[target, "HEAT_DEMAND_STARTING"]].plot(
    x=target, y="HEAT_DEMAND_STARTING", style="o"
 )
 # Both make sense: i.e. the higher the sap, the lower we predict and the higher the heat demand, the higher we predict
 # Load the autogluon model and check feature importance
@ -176,8 +176,6 @@ plot_permutation_importance(exp, fig_kw={"figwidth": 7, "figheight": 6})
 #
 #
 from core.MLMetrics import metrics_factory
 from core.MLModels import model_factory
 from core.DataClient import dataclient_factory
 import pandas as pd
@ -218,12 +216,6 @@ mix_df["residual"] = abs(mix_df[predictions_column_name] - mix_df[target])
 mix_df = mix_df.sort_values("residual", ascending=False)
 cosine_similarity_df = mix_df[mix_df.columns.difference(["predictions", "residual"])]
 metrics = metrics_factory("Regression")
 metrics.generate_metrics(mix_df["predictions"], mix_df["HEAT_DEMAND_ENDING"])
 cosine_similarity_df = mix_df[
    mix_df.columns.difference(["predictions", "residual", "SAP_ENDING"])
 ]
 from sklearn.metrics.pairwise import cosine_similarity
 row_index = 0
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
@ -1,7 +1,7 @@
-joblib==1.5.2
+joblib==1.3.2
-boto3==1.40.61
+boto3==1.28.17
-pandas==2.3.3
+pandas==2.1.4
-autogluon.tabular[all]==1.4.0
+autogluon.tabular[all]==1.0.0
-dynaconf==3.2.12
+dynaconf==3.2.1
-pyarrow==20.0.0
+pyarrow==13.0.0
-pre-commit==4.3.0
+pre-commit==3.3.3
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
@ -1,7 +1,7 @@
-joblib==1.5.2
+joblib==1.3.2
-boto3==1.40.61
+boto3==1.28.17
-pandas==2.3.3
+pandas==2.1.4
-autogluon.tabular[all]==1.4.0
+autogluon.tabular[all]==1.0.0
-dynaconf==3.2.12
+dynaconf==3.2.1
-pyarrow==20.0.0
+pyarrow==13.0.0
-PyYAML==6.0.3
+PyYAML==6.0.1
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
@ -1,10 +1,10 @@
-joblib==1.5.2
+joblib==1.3.2
-boto3==1.40.61
+boto3==1.28.17
-pandas==2.3.3
+pandas==2.1.4
-autogluon.tabular[all]==1.4.0
+autogluon.tabular[all]==1.0.0
-ray==2.44.1
+ray==2.6.3
-dynaconf==3.2.12
+dynaconf==3.2.1
-# alibi
+alibi==0.9.5
-shap==0.49.1
+shap==0.42.1
-pyarrow==20.0.0
+pyarrow==13.0.0
-pre-commit==4.3.0
+pre-commit==3.3.3
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
@ -1,4 +1,4 @@
-boto3==1.40.61
+boto3==1.28.41
-pandas==2.3.3
+pandas==2.1.4
-autogluon.tabular[all]==1.4.0
+autogluon.tabular[all]==1.0.0
-dynaconf==3.2.12
+dynaconf==3.2.1
--- a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
@ -1,4 +1,4 @@
-dvc==3.66.0
+dvc==3.51.0
-dvc-s3==3.2.2
+dvc-s3==3.2.0
-gto==1.9.0
+gto==1.7.1
-pyOpenSSL==23.3.0
+pyOpenSSL==23.3.0