Merge pull request #168 from Hestia-Homes/heat_baseline-dev-model

Initial model for heat_baseline
amend the deployment files
2026-06-08 11:17:25 +00:00 · 2026-03-13 18:38:19 +00:00 · 2026-01-09 10:41:17 +00:00 · 2026-01-09 10:39:36 +00:00 · 2025-11-05 15:24:44 +00:00 · 2025-11-05 15:24:02 +00:00
26 changed files with 993 additions and 380 deletions
--- a/.github/workflows/Deploy.yml
+++ b/.github/workflows/Deploy.yml
@ -2,7 +2,17 @@ name: Sap Change Model Deploy
 on:
  push:
-    branches: [ sap-dev, sap-prod, heat-dev, heat-prod, carbon-dev, carbon-prod]
+    branches:
      [
        sap-dev,
        sap-prod,
        heat-dev,
        heat-prod,
        carbon-dev,
        carbon-prod,
        heat_baseline-dev,
        heat_baseline-prod,
      ]
 jobs:
  deploy:
@ -31,8 +41,8 @@ jobs:
      - name: set secret prefix which is used across multiple steps
        id: secret_prefix
        run: |
-            # Convert branch name to uppercase and replace hyphens with underscores
+          # Convert branch name to uppercase and replace hyphens with underscores
-            echo "::set-output name=secret_prefix::$(echo "${{ github.ref_name }}" | tr 'a-z-' 'A-Z_')"
+          echo "::set-output name=secret_prefix::$(echo "${{ github.ref_name }}" | tr 'a-z-' 'A-Z_')"
      - name: Set domain name
        id: set_domain
@ -116,7 +126,7 @@ jobs:
        env:
          RUNTIME_ENVIRONMENT: ${{ steps.set_runtime_environment.outputs.runtime_environment }}
          PREDICTIONS_BUCKET: ${{ steps.set_s3_buckets.outputs.predictions_bucket }}
-          DATA_BUCKET:  ${{ steps.set_s3_buckets.outputs.data_bucket }}
+          DATA_BUCKET: ${{ steps.set_s3_buckets.outputs.data_bucket }}
          DOMAIN_NAME: ${{ steps.set_domain.outputs.domain }}
          ECR_URI: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}
          GITHUB_SHA: ${{ github.sha }}
--- a/.github/workflows/MLPipelinePostMerge.yml
+++ b/.github/workflows/MLPipelinePostMerge.yml
@ -13,6 +13,7 @@ on:
      - "sap-dev"
      - "heat-dev"
      - "carbon-dev"
      - "heat_baseline-dev"
 permissions: write-all
@ -21,166 +22,171 @@ jobs:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'major')) }}
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+      - uses: actions/checkout@v4
-      with:
+        with:
-        fetch-depth: 0
+          fetch-depth: 0
-    - name: Install packages to register model
+      - name: Install packages to register model
-      run: |
+        run: |
-        pip install --upgrade pip
+          pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-    - name: Register Model
+      - name: Register Model
-      run: |
+        run: |
-        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-        git config user.name "Github-Bot"
+          git config user.name "Github-Bot"
-        git config user.email "Github-Bot@no-reply.com"
+          git config user.email "Github-Bot@no-reply.com"
-        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false
+          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false
-        if [ -z "${latest_version}" ]; then
+          if [ -z "${latest_version}" ]; then
-          increment_version="1.0.0"
+            increment_version="1.0.0"
-        else
+          else
-          increment_version=$(echo ${latest_version} | awk 'BEGIN {
+            increment_version=$(echo ${latest_version} | awk 'BEGIN {
-              FS="\\."   # Set the field separator to a period
+                FS="\\."   # Set the field separator to a period
-              OFS="."    # Set the output field separator to a period
+                OFS="."    # Set the output field separator to a period
-          }
+            }
-          {
+            {
-              major = $1 + 1   # Increment the major version
+                major = $1 + 1   # Increment the major version
-              print major, "0", "0"   # Print the new version
+                print major, "0", "0"   # Print the new version
-          }')
+            }')
-        fi
+          fi
-        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
+          new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
-        git tag -a ${new_tag} -m "Registering new Major Version"
+          git tag -a ${new_tag} -m "Registering new Major Version"
-        git push origin ${new_tag}
+          git push origin ${new_tag}
-        gto show --json > MODEL_REGISTRY.md
+          gto show --json > MODEL_REGISTRY.md
-        git add .
+          git add .
-        git commit -m "Update Registry"
+          git commit -m "Update Registry"
-        git push
+          git push
  Register-Minor-Model-Dev:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }}
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+      - uses: actions/checkout@v4
-      with:
+        with:
-        fetch-depth: 0
+          fetch-depth: 0
-    - name: Install packages to register model
+      - name: Install packages to register model
-      run: |
+        run: |
-        pip install --upgrade pip
+          pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-    - name: Register Model
+      - name: Register Model
-      run: |
+        run: |
-        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-        git config user.name "Github-Bot"
+          git config user.name "Github-Bot"
-        git config user.email "Github-Bot@no-reply.com"
+          git config user.email "Github-Bot@no-reply.com"
-        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
+          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
-        if [ -z "${latest_version}" ]; then
+          if [ -z "${latest_version}" ]; then
-          increment_version="0.1.0"
+            increment_version="0.1.0"
-        else
+          else
-          increment_version=$(echo ${latest_version} | awk 'BEGIN {
+            increment_version=$(echo ${latest_version} | awk 'BEGIN {
-              FS="\\."   # Set the field separator to a period
+                FS="\\."   # Set the field separator to a period
-              OFS="."    # Set the output field separator to a period
+                OFS="."    # Set the output field separator to a period
-          }
+            }
-          {
+            {
-              minor = $2 + 1   # Increment the minor version
+                minor = $2 + 1   # Increment the minor version
-              print $1, minor, "0"   # Print the new version
+                print $1, minor, "0"   # Print the new version
-          }')
+            }')
-        fi
+          fi
-        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
+          new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
-        git tag -a ${new_tag} -m "Registering new Minor Version"
+          git tag -a ${new_tag} -m "Registering new Minor Version"
-        git push origin ${new_tag}
+          git push origin ${new_tag}
-        gto show --json > MODEL_REGISTRY.md
+          gto show --json > MODEL_REGISTRY.md
-        git add .
+          git add .
-        git commit -m "Update Registry"
+          git commit -m "Update Registry"
-        git push
+          git push
  Register-Patch-Model-Dev:
    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }}
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+      - uses: actions/checkout@v4
-      with:
+        with:
-        fetch-depth: 0
+          fetch-depth: 0
-    - name: Install packages to register model
+      - name: Install packages to register model
-      run: |
+        run: |
-        pip install --upgrade pip
+          pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-    - name: Register Model
+      - name: Register Model
-      run: |
+        run: |
-        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-        git config user.name "Github-Bot"
+          git config user.name "Github-Bot"
-        git config user.email "Github-Bot@no-reply.com"
+          git config user.email "Github-Bot@no-reply.com"
-        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
+          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
-        if [ -z "${latest_version}" ]; then
+          if [ -z "${latest_version}" ]; then
-          increment_version="0.0.1"
+            increment_version="0.0.1"
-        else
+          else
-          increment_version=$(echo ${latest_version} | awk 'BEGIN {
+            increment_version=$(echo ${latest_version} | awk 'BEGIN {
-              FS="\\."   # Set the field separator to a period
+                FS="\\."   # Set the field separator to a period
-              OFS="."    # Set the output field separator to a period
+                OFS="."    # Set the output field separator to a period
-          }
+            }
-          {
+            {
-              patch = $3 + 1   # Increment the patch version
+                patch = $3 + 1   # Increment the patch version
-              print $1, $2, patch   # Print the new version
+                print $1, $2, patch   # Print the new version
-          }')
+            }')
-        fi
+          fi
-        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
+          new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
-        git tag -a ${new_tag} -m "Registering new Patch Version"
+          git tag -a ${new_tag} -m "Registering new Patch Version"
-        git push origin ${new_tag}
+          git push origin ${new_tag}
-        gto show --json > MODEL_REGISTRY.md
+          gto show --json > MODEL_REGISTRY.md
-        git add .
+          git add .
-        git commit -m "Update Registry"
+          git commit -m "Update Registry"
-        git push
+          git push
  Promote-Artefacts-To-Dev:
    if: github.event.pull_request.merged == true
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
+      - uses: actions/checkout@v3
-    - name: Install packages to retrieve artifacts
+      - name: Install packages to retrieve artifacts
-      run: |
+        run: |
-        pip install --upgrade pip
+          pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-    - name: Retrieve artifacts (dvc.lock)
+      - name: Retrieve artifacts (dvc.lock)
-      env:
+        env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
+        run: |
-        cd modules/ml-pipeline/src/pipeline
+          cd modules/ml-pipeline/src/pipeline
-        dvc pull -r experiments
+          dvc pull -r experiments
-    - name: Push artifacts to Dev
+      - name: Push artifacts to Dev
-      env:
+        env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
+        run: |
-        cd modules/ml-pipeline/src/pipeline
+          cd modules/ml-pipeline/src/pipeline
-        dvc push -r dev
+          dvc push -r dev
  Register-New-Model-Dev:
-    needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev]
+    needs:
      [
        Register-Major-Model-Dev,
        Register-Minor-Model-Dev,
        Register-Patch-Model-Dev,
      ]
    if: |
      always() &&
      (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') &&
@ -189,50 +195,50 @@ jobs:
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
+      - uses: actions/checkout@v3
-      with:
+        with:
-        fetch-depth: 0
+          fetch-depth: 0
-    - name: Install packages to register model
+      - name: Install packages to register model
-      run: |
+        run: |
-        pip install --upgrade pip
+          pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-    - name: Register Model
+      - name: Register Model
-      env:
+        env:
-        TARGET_BRANCH: ${{ github.base_ref }}
+          TARGET_BRANCH: ${{ github.base_ref }}
-      run: |
+        run: |
-        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+          REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+          # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
-        git config user.name "Github-Bot"
+          git config user.name "Github-Bot"
-        git config user.email "Github-Bot@no-reply.com"
+          git config user.email "Github-Bot@no-reply.com"
-        latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}')
+          latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}')
-        if [ -z "${latest_dev_version}" ]; then
+          if [ -z "${latest_dev_version}" ]; then
-          increment_version="1"
+            increment_version="1"
-        else
+          else
-          increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}')
+            increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}')
-        fi
+          fi
-        new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version}
+          new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version}
-        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}')
+          latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}')
-        echo ${new_tag}
+          echo ${new_tag}
-        commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}')
+          commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}')
-        git checkout ${commit_hash}
+          git checkout ${commit_hash}
-        # git pull #Get new model registry md file changes
+          # git pull #Get new model registry md file changes
-        git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}"
+          git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}"
-        git push origin ${new_tag}
+          git push origin ${new_tag}
-        git checkout ${TARGET_BRANCH}
+          git checkout ${TARGET_BRANCH}
-        git fetch --all
+          git fetch --all
-        git pull
+          git pull
-        gto show --json > MODEL_REGISTRY.md
+          gto show --json > MODEL_REGISTRY.md
-        git add .
+          git add .
-        git commit -m "Update Registry"
+          git commit -m "Update Registry"
-        git push origin ${TARGET_BRANCH}
+          git push origin ${TARGET_BRANCH}
--- a/.github/workflows/MLPipelinePullRequest.yml
+++ b/.github/workflows/MLPipelinePullRequest.yml
@ -5,22 +5,21 @@ on:
  #   branches:
  #     - "model-**"
  pull_request:
-    branches: ["sap-dev", "heat-dev", "carbon-dev"]
+    branches: ["sap-dev", "heat-dev", "carbon-dev", "heat_baseline-dev"]
  label:
    types: ["created", "edited"]
 permissions: write-all
 jobs:
  Check-Label:
    runs-on: ubuntu-latest
    steps:
-    - uses: yogevbd/enforce-label-action@2.1.0
+      - uses: yogevbd/enforce-label-action@2.1.0
-      with:
+        with:
-        REQUIRED_LABELS_ANY: "major,minor,patch"
+          REQUIRED_LABELS_ANY: "major,minor,patch"
-        REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['major','minor','patch']"
+          REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['major','minor','patch']"
-        BANNED_LABELS: "banned"
+          BANNED_LABELS: "banned"
  # No-Label:
  #   if: ${{ github.event.label.name != 'major' }} || ${{ github.event.label.name != 'minor' }}  || ${{ github.event.label.name != 'patch' }}
@ -32,86 +31,168 @@ jobs:
  #       echo "Please choose one of these tags: 'major', 'major', 'patch'"
  #       exit(1)
-  Verify-Model:
+  Verify-Lambda:
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
+      - uses: actions/checkout@v3
-    - name: Install packages to retrieve artifacts
+      - name: Install packages to retrieve artifacts
-      env:
+        env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
+        run: |
-        pip install --upgrade pip
+          pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-    - name: Retrieve artifacts (dvc.lock)
+      - name: Retrieve artifacts (dvc.lock)
-      env:
+        env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
+        run: |
-        cd modules/ml-pipeline/src/pipeline
+          cd modules/ml-pipeline/src/pipeline
-        dvc pull -r experiments
+          dvc pull -r experiments
-    - name: Build Prediction docker Image
+      - name: Set timestamp
-      run: |
+        id: set_timestamp
-        cd modules/ml-pipeline/src/
+        run: |
-        docker build . --file Prediction.Dockerfile --tag prediction_test
+          echo "timestamp=$(date +%Y%m%d)" >> $GITHUB_ENV
          echo "Generated timestamp: ${timestamp}"
-    - name: Run Prediction docker container
+      - name: Upload sample row dataset to S3
-      run: |
+        env:
-        docker run prediction_test
+          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          cd modules/ml-pipeline/src/pipeline/data/prepared_data/
          aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet
      - name: Build Lambda docker Image
        run: |
          docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
      - name: Run lambda docker container
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          docker run -d -p 9000:8080 \
            -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
            -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
            -e RUNTIME_ENVIRONMENT=dev \
            -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
      - name: Test Lambda endpoint
        run: |
          sleep 2
          curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
            -H "Content-Type: application/json" \
            -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"warm\\\": true}\"}"
      - name: Get Lambda logs
        run: |
          docker logs $(docker ps -al -q)
      - name: Test Lambda endpoint again
        run: |
          sleep 2
          curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
            -H "Content-Type: application/json" \
            -d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
      - name: Get Lambda logs
        run: |
          docker logs $(docker ps -al -q)
      - name: Stop Lambda container
        run: |
          docker stop lambda_test || echo "Container already stopped"
      - name: Remove uploaded sample row dataset from S3
        if: always()
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          aws s3 rm --recursive s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/
  Verify-Model:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - name: Install packages to retrieve artifacts
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          pip install --upgrade pip
          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
      - name: Retrieve artifacts (dvc.lock)
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
        run: |
          cd modules/ml-pipeline/src/pipeline
          dvc pull -r experiments
      - name: Build Prediction docker Image
        run: |
          cd modules/ml-pipeline/src/
          docker build . --file Prediction.Dockerfile --tag prediction_test
      - name: Run Prediction docker container
        run: |
          docker run prediction_test
  Trigger-CML:
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
+      - uses: actions/checkout@v3
-    - name: Install packages to retrieve artifacts
+      - name: Install packages to retrieve artifacts
-      run: |
+        run: |
-        pip install --upgrade pip
+          pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+          pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
-    - name: Retrieve artifacts (dvc.lock)
+      - name: Retrieve artifacts (dvc.lock)
-      env:
+        env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
+        run: |
-        cd modules/ml-pipeline/src/pipeline
+          cd modules/ml-pipeline/src/pipeline
-        dvc pull -r experiments
+          dvc pull -r experiments
-    - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v4
-    - uses: iterative/setup-cml@v1
+      - uses: iterative/setup-cml@v1
-    - name: Generate report
+      - name: Generate report
-      env:
+        env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-        REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        TARGET_BRANCH: ${{ github.base_ref }}
+          TARGET_BRANCH: ${{ github.base_ref }}
-      run: |
+        run: |
-        cd modules/ml-pipeline/src/pipeline
+          cd modules/ml-pipeline/src/pipeline
-        echo "## Model metrics" > report.md
+          echo "## Model metrics" > report.md
-        # Compare metrics to master
+          # Compare metrics to master
-        git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
+          git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
-        dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
+          dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
-        echo "## Scenario comparison" >> report.md
+          echo "## Scenario comparison" >> report.md
-        cat metrics/scenario_table.md >> report.md
+          cat metrics/scenario_table.md >> report.md
-        echo "" >> report.md
+          echo "" >> report.md
-        echo "## Scenario metrics" >> report.md
+          echo "## Scenario metrics" >> report.md
-        cat metrics/scenario_metrics.md >> report.md
+          cat metrics/scenario_metrics.md >> report.md
-        cml comment create report.md
+          cml comment create report.md
-        # echo "## Residuals plot from model" >> report.md
+          # echo "## Residuals plot from model" >> report.md
-        # metrics_location=$(find . -maxdepth 10 -name "residuals.png")
+          # metrics_location=$(find . -maxdepth 10 -name "residuals.png")
-        # echo $metrics_location
+          # echo $metrics_location
-        # cd $metric_location
+          # cd $metric_location
-        # echo "![](./residuals.png)" >> report.md
+          # echo "![](./residuals.png)" >> report.md
--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@ -8,25 +8,65 @@
        "active": true
    },
    "sap": {
-        "version": "v0.14.0",
+        "version": "v0.17.5",
        "stage": {
-            "dev": "v0.14.0"
+            "dev": "v0.17.5"
        },
        "registered": true,
        "active": true
    },
    "heat": {
-        "version": "v0.5.0",
+        "version": "v0.8.0",
        "stage": {
-            "dev": "v0.5.0"
+            "dev": "v0.8.0"
        },
        "registered": true,
        "active": true
    },
    "carbon": {
-        "version": "v0.5.0",
+        "version": "v0.8.0",
        "stage": {
-            "dev": "v0.5.0"
+            "dev": "v0.7.0"
        },
        "registered": true,
        "active": true
    },
    "hotwater": {
        "version": "v1.0.0",
        "stage": {
            "dev": "v1.0.0"
        },
        "registered": true,
        "active": true
    },
    "heating": {
        "version": "v1.0.0",
        "stage": {
            "dev": "v1.0.0"
        },
        "registered": true,
        "active": true
    },
    "lighting": {
        "version": "v1.0.0",
        "stage": {
            "dev": "v1.0.0"
        },
        "registered": true,
        "active": true
    },
    "hotwaterkwh": {
        "version": "v1.3.0",
        "stage": {
            "dev": "v1.3.0"
        },
        "registered": true,
        "active": true
    },
    "heatingkwh": {
        "version": "v1.5.0",
        "stage": {
            "dev": "v1.5.0"
        },
        "registered": true,
        "active": true
--- a/README.md
+++ b/README.md
@ -83,3 +83,13 @@ curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d
 ```
 This will send a POST request to the running Lambda function and pass in the required data as JSON.
 For the testing of warm or testing of the lambda, use:
 ```json
 curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": \"true\"}"}'
 ```
 or
 ```json
 curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"warm\": \"true\"}"}'
 ```
--- a/deployment/Dockerfile.prediction.lambda
+++ b/deployment/Dockerfile.prediction.lambda
@ -1,19 +1,24 @@
-FROM public.ecr.aws/lambda/python:3.10
+FROM public.ecr.aws/lambda/python:3.12
 # Set the working directory
 WORKDIR ${LAMBDA_TASK_ROOT}
-ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
+ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
 ENV MPLCONFIGDIR="/tmp/matplotlib"
 # Environment variables
 ARG RUNTIME_ENVIRONMENT
 ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
 # Install necessary build tools - required to test locally
-RUN yum install -y gcc python3-devel gcc-c++
+RUN dnf install -y gcc python3-devel gcc-c++
 # Install python packages
 COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt
-RUN pip install --no-cache-dir -r ./requirements.txt
+
 RUN pip install uv
 RUN uv pip install -r requirements.txt --system
 # RUN pip install --no-cache-dir -r ./requirements.txt
 # Copy the project code
 COPY modules/ml-pipeline/src/pipeline ./pipeline
@ -22,4 +27,4 @@ COPY deployment/handlers/prediction_app.py ./pipeline/prediction_app.py
 WORKDIR ${LAMBDA_TASK_ROOT}/pipeline
-CMD [ "prediction_app.handler" ]
+CMD [ "prediction_app.handler" ]
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@ -47,6 +47,30 @@ def upload_dataframe_to_s3(df, bucket, s3_file_name):
        return False
 def warming_up_invocation(
    model,
    model_filepath: str,
 ):
    """
    Function to handle warm up invocations
    """
    import pandas as pd
    import numpy as np
    model.load_model(model_filepath)
    warmup_df = pd.DataFrame(
        np.zeros((1, len(model.model.original_features))),
        columns=model.model.original_features,
    )
    # model_names = model.model.model_names()
    # if "NeuralNetFastAI" in model_names:
    #     model.model.predict(warmup_df, model="NeuralNetFastAI")
    # else:
    model.predict(data=warmup_df)
 def handler(event, context):
    """
    Take in event and trigger the prediction pipeline
@ -66,9 +90,6 @@ def handler(event, context):
        created_at = body["created_at"]
        # TODO: Implement the loading of the model and prediction
        storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
        logger.info(f"--- Initiate MLModel ---")
        build_model_params = settings.build_model
@ -78,6 +99,32 @@ def handler(event, context):
        model = model_factory(build_model_params["model_type"])
        model_filepath = build_model_params["model_save_filepath"]
        if "warm" in body:
            logger.info("Warm up invocation - synthetic prediction")
            warming_up_invocation(model=model, model_filepath=model_filepath)
            return {
                "statusCode": 200,
                "body": json.dumps(
                    {
                        "message": "Successfully warmed up invocation",
                    }
                ),
            }
        if "testing" in body:
            logger.info(
                "Testing invocation for CI/CD - save file to same location in S3"
            )
            storage_filepath = body["file_location"].replace(
                ".parquet", "_output.parquet"
            )
        else:
            storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
        logger.info(f"--- Initiate Input DataClient ---")
        input_dataclient = dataclient_factory(
            dataclient_type="aws-s3",
@ -95,7 +142,7 @@ def handler(event, context):
            output_dataclient=output_dataclient,
            model=model,
            target=feature_process_params["feature_processor_config"]["target"],
-            model_filepath=build_model_params["model_save_filepath"],
+            model_filepath=model_filepath,
            test_data_filepath=body["file_location"],
            predictions_output_filepath=storage_filepath,
            predictions_column_name=generate_predictions_params[
--- a/deployment/serverless.yml
+++ b/deployment/serverless.yml
@ -51,3 +51,4 @@ functions:
          path: /predict
          method: POST
    timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed
    memorySize: 3008
--- a/modules/ml-pipeline/Makefile
+++ b/modules/ml-pipeline/Makefile
@ -1,7 +1,8 @@
 export PYENV_ROOT=$(HOME)/.pyenv
 export PATH := $(PYENV_ROOT)/bin:$(PATH)
-PYTHON_VERSION ?= 3.10.12
+PYTHON_VERSION ?= 3.12.12
 CONDA_ENV=dev_env_pipeline
 CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
 .PHONY: init
 init: dev-conda
@ -12,11 +13,15 @@ dev-conda:
 	# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
 	conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
 	conda init bash
-	conda run -v -n ${CONDA_ENV} pip install --upgrade pip
+	${CONDA_ACTIVATE} ${CONDA_ENV} && \
-	conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/training/requirements-dev.txt
+		which pip && \
-	conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt
+		pip install --upgrade pip && \
-	conda run -v -n ${CONDA_ENV} pre-commit install
+		pip install uv && \
-	conda run -v -n ${CONDA_ENV} pip install ipykernel
+		uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
 		uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \
 		pre-commit install && \
 		uv pip install ipykernel && \
 		conda install llvm-openmp -y
 	echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
 	echo "conda activate ${CONDA_ENV}"
@ -33,4 +38,4 @@ dev-pyenv:
 .PHONY: dvc-init
 dvc-init:
-	. .dev_env_pipeline/bin/activate && dvc init --subdir
+	. .dev_env_pipeline/bin/activate && dvc init --subdir
--- a/modules/ml-pipeline/src/Prediction.Dockerfile
+++ b/modules/ml-pipeline/src/Prediction.Dockerfile
@ -1,16 +1,21 @@
 # Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
-FROM python:3.10.12-slim
+FROM python:3.12.12-slim
 RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev
 COPY pipeline/requirements/predictions/requirements.txt requirements.txt
 RUN pip install --upgrade pip
-RUN pip install -r requirements.txt
+
 RUN pip install uv
 RUN uv pip install -r requirements.txt --system
 # RUN pip install -r requirements.txt
 # Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script
 COPY pipeline/ /home/pipeline/
 WORKDIR /home/pipeline/
-CMD [ "python", "3_generate_predictions.py"]
+CMD [ "python", "3_generate_predictions.py"]
--- a/modules/ml-pipeline/src/pipeline/1_prepare_data.py
+++ b/modules/ml-pipeline/src/pipeline/1_prepare_data.py
@ -29,6 +29,7 @@ data_filepath = prepare_data_params["data_filepath"]
 train_proportion = prepare_data_params["train_proportion"]
 output_train_filepath = prepare_data_params["output_train_filepath"]
 output_test_filepath = prepare_data_params["output_test_filepath"]
 sample_test_filepath = prepare_data_params["sample_test_filepath"]
 feature_processor_config = feature_process_params["feature_processor_config"]
 logger.info(f"--- Initiate DataClient ---")
@ -99,6 +100,10 @@ def prepare_data(
    logger.info("--- Outputting data ---")
    output_dataclient.save_data(
        obj=data.sample(1), location=sample_test_filepath, save_config=None
    )
    output_dataclient.save_data(
        obj=train, location=output_train_filepath, save_config=None
    )
--- a/modules/ml-pipeline/src/pipeline/4_generate_metrics.py
+++ b/modules/ml-pipeline/src/pipeline/4_generate_metrics.py
@ -4,9 +4,7 @@ After the model is built, we can evaluate its performance
 """
 import os
 import yaml
 import pandas as pd
 from pathlib import Path
 from core.interface.InterfaceModels import MLModel
 from core.interface.InterfaceMetrics import MLMetrics
 from core.interface.InterfaceDataClient import DataClient
--- a/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py
+++ b/modules/ml-pipeline/src/pipeline/5_generate_scenarios.py
@ -99,6 +99,12 @@ def generate_scenario_predictions(
            ]
        )
    # TEMPORARY FIX: ADD is_post_sap10_starting and is_post_sap10_ending if not present
    if "is_post_sap10_starting" not in scenario_data.columns:
        scenario_data["is_post_sap10_starting"] = False
    if "is_post_sap10_ending" not in scenario_data.columns:
        scenario_data["is_post_sap10_ending"] = False
    logger.info("--- Loading Model ---")
    model.load_model(model_filepath)
--- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml
@ -14,9 +14,23 @@ default:
      output_filepath: ./data/model/allmodels/
      problem_type: regression
      eval_metric: mean_squared_error #mean_absolute_error
-      time_limit: 1800
+      time_limit: 3600
      presets: medium_quality
-      excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
+      excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT', 'FASTAI']
-      infer_limit: 0.05
+      infer_limit: 1
      infer_limit_batch_size: 10000
      fit_strategy: "parallel"
      ag_args_ensemble: {'num_folds_parallel': 2}
      num_gpus: 0
      hyperparameters:
        {
        'NN_TORCH': [{}],
        'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0,}}],
        # 'GBM': [{}],
        'CAT': [{}],
        'XGB': [{}],
        'FASTAI': [{}],
        'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
        'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
        'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
        }
--- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
+++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py
@ -18,30 +18,60 @@ def remove_starting_columns(df):
    return df
-def remove_floor_height_ending(df):
+def keep_negative_heat_change(df):
-    # df.describe(percentiles=[0.005,0.99])['FLOOR_HEIGHT_ENDING']
+    df = df[df["heat_demand_change"] < 0]
    # shows bottom 0.5 percentile is 1.665
    # So keep anything above this
    df = df[df["floor_height_ending"] > 1.665].reset_index(drop=True)
    print("we in here")
    return df
-def remove_minimum_habitable_room_size(df):
+def keep_negative_carbon_change(df):
-    # Need minimum of 6.5m per habitable room
+    df = df[df["carbon_change"] < 0]
    df = df[
        df["total_floor_area_ending"] / df["number_habitable_rooms"] > 6.5
    ].reset_index(drop=True)
    return df
-def keep_flats(df):
+# TODO: Move to ETL pipeline
-    df = df[df["property_type"] == "Flat"]
+def remove_unreasonable_habitable_rooms(df):
    """
    Assumption is that proportion of floor area to habitable rooms should be at least 6.5m2
    """
    minimum_room_size_index = (
        df["total_floor_area_ending"] / df["number_habitable_rooms"] >= 6.5
    )
    df = df[minimum_room_size_index]
    return df
-def keep_non_zero_rdsap(df):
+def remove_top_1_percent_heat_demand_starting(df):
-    df = df[df["rdsap_change"] != 0]
+    # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
    threshold_value = 860
    df = df[df["heat_demand_starting"] < threshold_value]
    return df
 def remove_negative_heat_demand_starting(df):
    # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
    threshold_value = 0
    df = df[df["heat_demand_starting"] > threshold_value]
    return df
 # def remove_top_1_percent_heat_demand_ending(df):
 #     # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
 #     threshold_value = 593
 #     df = df[df["heat_demand_ending"] < threshold_value]
 #     return df
 def remove_negative_heat_demand_ending(df):
    # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
    threshold_value = 0
    df = df[df["heat_demand_ending"] > threshold_value]
    return df
 def remove_top_1_percent_carbon(df):
    # threshold_value = df.describe(percentiles=[0.99])['CARBON_STARTING']['99%']
    threshold_value = 18
    df = df[df["carbon_starting"] < threshold_value]
    return df
@ -54,10 +84,14 @@ def keep_non_zero_rdsap(df):
 #     return df
 business_logic = {
-    # "keep_non_zero_rdsap": keep_non_zero_rdsap,
+    "remove_unreasonable_habitable_rooms": remove_unreasonable_habitable_rooms,
-    # "keep_flats": keep_flats,
+    "keep_negative_heat_change": keep_negative_heat_change,
-    # "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,
+    "keep_negative_carbon_change": keep_negative_carbon_change,
-    # "remove_floor_height_ending": remove_floor_height_ending
+    "remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand_starting,
    "remove_negative_heat_demand_starting": remove_negative_heat_demand_starting,
    # "remove_top_1_percent_heat_demand_ending": remove_top_1_percent_heat_demand_ending,
    "remove_negative_heat_demand_ending": remove_negative_heat_demand_ending,
    "remove_top_1_percent_carbon": remove_top_1_percent_carbon,
    # "remove_starting_columns": remove_starting_columns
    # "keep_ENDING_COLUMNS": keep_ending_columns
 }
--- a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
+++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py
@ -1,6 +1,7 @@
 """
 After predictions, we may want to apply some post processing to the predictions
 """
 import pandas as pd
@ -13,10 +14,11 @@ def clip_predictions_to_minimum_value(
    predictions_df = pd.concat([data, predictions], axis=1)
    # We expect all prediction to be atleast one point improvement
    replace_index = (
-        predictions_df["sap_starting"] + minimum_value > predictions_df["predictions"]
+        predictions_df["predictions"]
        > predictions_df["heat_demand_starting"] - minimum_value
    )
    predictions_df.loc[replace_index, "predictions"] = (
-        predictions_df.loc[replace_index, "sap_starting"] + minimum_value
+        predictions_df.loc[replace_index, "heat_demand_starting"] - minimum_value
    )
    predictions_new = predictions_df["predictions"]
@ -30,6 +32,6 @@ def clip_predictions_to_minimum_value(
 post_prediction_logic = {
-    "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
+    # "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
    # "round_predictions": round_predictions
 }
--- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml
@ -8,6 +8,6 @@ default:
      # - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
      # - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
      # - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
-      - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
+      # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
    comparison_output_filepath: ./metrics/scenario_table.md
    metrics_output_filepath: ./metrics/scenario_metrics.md
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@ -12,32 +12,163 @@ default:
      AWS_ACCESS_KEY_ID: minio
      AWS_SECRET_ACCESS_KEY: minio123
      ENDPOINT_URL: http://localhost:9000
-    local:
+    local: null
      null
  prepare_data:
    input_dataclient_type: aws-s3
    output_dataclient_type: local
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-06-09-10-36-53/dataset_rooms.parquet
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet
+    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
-    # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet
+    data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
    data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
    train_proportion: 0.9
    output_train_filepath: ./data/prepared_data/train.parquet
    output_test_filepath: ./data/prepared_data/test.parquet
    sample_test_filepath: ./data/prepared_data/sample_test.parquet
  feature_processor:
    feature_processor_type: dataframe
    feature_processor_config:
      subsample_amount: null
      subsample_seed: 0
-      target: sap_ending
+      target: heat_demand_starting
      identifier_columns: ["uprn"]
-      # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
+      drop_columns:
-      drop_columns: [
+        [
-        "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending",
+          "heat_demand_ending",
-        'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
+          "potential_energy_efficiency",
-        'number_habitable_rooms', 'number_heated_rooms']
+          "environment_impact_potential",
          "energy_consumption_potential",
          "co2_emissions_potential",
          "heat_demand_change",
          "carbon_change",
          "rdsap_change",
          "sap_starting",
          "sap_ending",
          "carbon_starting",
          "carbon_ending",
          "days_to_starting",
          "days_to_ending",
          "number_habitable_rooms_starting",
          "number_habitable_rooms_ending",
          "number_heated_rooms_starting",
          "number_heated_rooms_ending",
          "number_habitable_rooms",
          "number_heated_rooms",
          "lighting_cost_starting",
          "lighting_cost_ending",
          "heating_cost_starting",
          "heating_cost_ending",
          "hot_water_cost_starting",
          "hot_water_cost_ending",
          "floor_thermal_transmittance",
          "floor_thermal_transmittance_ending",
          "lodgement_date_starting",
          "lodgement_date_ending",
          "walls_thermal_transmittance_ending",
          "walls_thermal_transmittance_unit_ending",
          "is_filled_cavity_ending",
          "is_as_built_ending",
          "walls_is_assumed_ending",
          "is_park_home_ending",
          "walls_insulation_thickness_ending",
          "external_insulation_ending",
          "internal_insulation_ending",
          "floor_insulation_thickness_ending",
          "roof_thermal_transmittance_ending",
          "is_at_rafters_ending",
          "roof_insulation_thickness_ending",
          "heater_type_ending",
          "system_type_ending",
          "thermostat_characteristics_ending",
          "heating_scope_ending",
          "energy_recovery_ending",
          "hotwater_tariff_type_ending",
          "extra_features_ending",
          "chp_systems_ending",
          "distribution_system_ending",
          "no_system_present_ending",
          "appliance_ending",
          "has_radiators_ending",
          "has_fan_coil_units_ending",
          "has_pipes_in_screed_above_insulation_ending",
          "has_pipes_in_insulated_timber_floor_ending",
          "has_pipes_in_concrete_slab_ending",
          "has_boiler_ending",
          "has_air_source_heat_pump_ending",
          "has_room_heaters_ending",
          "has_electric_storage_heaters_ending",
          "has_warm_air_ending",
          "has_electric_underfloor_heating_ending",
          "has_electric_ceiling_heating_ending",
          "has_community_scheme_ending",
          "has_ground_source_heat_pump_ending",
          "has_no_system_present_ending",
          "has_portable_electric_heaters_ending",
          "has_water_source_heat_pump_ending",
          "has_electric_heat_pump_ending",
          "has_micro-cogeneration_ending",
          "has_solar_assisted_heat_pump_ending",
          "has_exhaust_source_heat_pump_ending",
          "has_community_heat_pump_ending",
          "has_hot-water-only_ending",
          "has_electric_ending",
          "has_mains_gas_ending",
          "has_wood_logs_ending",
          "has_coal_ending",
          "has_oil_ending",
          "has_wood_pellets_ending",
          "has_anthracite_ending",
          "has_dual_fuel_mineral_and_wood_ending",
          "has_smokeless_fuel_ending",
          "has_lpg_ending",
          "has_b30k_ending",
          "has_mineral_and_wood_ending",
          "has_dual_fuel_appliance_ending",
          "has_electricaire_ending",
          "has_assumed_for_most_rooms_ending",
          "has_underfloor_heating_ending",
          "thermostatic_control_ending",
          "charging_system_ending",
          "switch_system_ending",
          "no_control_ending",
          "dhw_control_ending",
          "community_heating_ending",
          "multiple_room_thermostats_ending",
          "auxiliary_systems_ending",
          "trvs_ending",
          "rate_control_ending",
          "glazing_type_ending",
          "fuel_type_ending",
          "main-fuel_tariff_type_ending",
          "is_community_ending",
          "no_individual_heating_or_community_network_ending",
          "complex_fuel_type_ending",
          "mechanical_ventilation_ending",
          "secondheat_description_ending",
          "glazed_type_ending",
          "multi_glaze_proportion_ending",
          "low_energy_lighting_ending",
          "number_open_fireplaces_ending",
          "solar_water_heating_flag_ending",
          "photo_supply_ending",
          "transaction_type_ending",
          "energy_tariff_ending",
          "extension_count_ending",
          "total_floor_area_ending",
          "floor_height_ending",
          "hot_water_energy_eff_ending",
          "floor_energy_eff_ending",
          "windows_energy_eff_ending",
          "walls_energy_eff_ending",
          "sheating_energy_eff_ending",
          "roof_energy_eff_ending",
          "mainheat_energy_eff_ending",
          "mainheatc_energy_eff_ending",
          "lighting_energy_eff_ending",
          "is_post_sap10_ending",
          "estimated_perimeter_ending",
        ]
      # retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
      retain_features: null
      # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
      #  'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
@ -78,4 +209,4 @@ default:
 dev:
  generate_predictions:
-      input_dataclient_type: aws-s3
+    input_dataclient_type: aws-s3
--- a/modules/ml-pipeline/src/pipeline/core/MLModels.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py
@ -1,4 +1,4 @@
-""""
+""" "
 Implementations of MLModels, all of which will have four methods to:
 - Load model
 - Save Model
@ -11,9 +11,6 @@ import joblib
 import pandas as pd
 from pathlib import Path
 from typing import Union, List
 from sklearn import linear_model
 from sklearn.svm import SVR
 from autogluon.tabular import TabularDataset, TabularPredictor
 from core.interface.InterfaceModels import MLModel
 from core.Logger import logger
@ -69,6 +66,8 @@ class SKLearnLinearRegression:
        """
        Method to train a model
        """
        from sklearn import linear_model
        self.model = linear_model.LinearRegression()
        x_train = data.iloc[:, data.columns != target]
@ -117,6 +116,7 @@ class SKLearnSVMRegression:
        """
        Method to train a model
        """
        from sklearn.svm import SVR
        validate_dict_keys(
            list(model_hyperparameters.keys()),
@ -152,12 +152,17 @@ class AutogluonAutoML:
        "infer_limit",
        "infer_limit_batch_size",
        "ag_args_ensemble",
        "fit_strategy",
        "num_gpus",
        "hyperparameters",
    ]
    def load_model(self, path: Union[Path, str]) -> None:
        """
        Method to load a model
        """
        from autogluon.tabular import TabularPredictor
        filepath = str(path)
        self.model = TabularPredictor.load(path=filepath)
@ -183,6 +188,10 @@ class AutogluonAutoML:
        """
        Method to train a model
        """
        from autogluon.tabular import TabularDataset, TabularPredictor
        # Force Parallel Model fitting
        os.environ["AG_FORCE_PARALLEL"] = "True"
        validate_dict_keys(
            keys_1=list(model_hyperparameters.keys()),
@ -209,6 +218,9 @@ class AutogluonAutoML:
            infer_limit=model_hyperparameters["infer_limit"],
            infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
            ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
            fit_strategy=model_hyperparameters["fit_strategy"],
            num_gpus=model_hyperparameters["num_gpus"],
            hyperparameters=model_hyperparameters["hyperparameters"].to_dict(),
        )
    def predict(
--- a/modules/ml-pipeline/src/pipeline/dvc.lock
+++ b/modules/ml-pipeline/src/pipeline/dvc.lock
@ -16,15 +16,22 @@ stages:
    deps:
    - path: 1_prepare_data.py
      hash: md5
-      md5: 11a3b8bfdfe199ab7ecc39ccc5652649
+      md5: a5ce162e1c402c0f811a80ef78cf4dd5
-      size: 4298
+      size: 4481
    params:
      configs/settings.yaml:
        default.feature_processor.feature_processor_config.drop_columns:
        - heat_demand_ending
        - potential_energy_efficiency
        - environment_impact_potential
        - energy_consumption_potential
        - co2_emissions_potential
        - heat_demand_change
        - carbon_change
        - rdsap_change
-        - heat_demand_ending
+        - sap_starting
        - sap_ending
        - carbon_starting
        - carbon_ending
        - days_to_starting
        - days_to_ending
@ -34,24 +41,140 @@ stages:
        - number_heated_rooms_ending
        - number_habitable_rooms
        - number_heated_rooms
        - lighting_cost_starting
        - lighting_cost_ending
        - heating_cost_starting
        - heating_cost_ending
        - hot_water_cost_starting
        - hot_water_cost_ending
        - floor_thermal_transmittance
        - floor_thermal_transmittance_ending
        - lodgement_date_starting
        - lodgement_date_ending
        - walls_thermal_transmittance_ending
        - walls_thermal_transmittance_unit_ending
        - is_filled_cavity_ending
        - is_as_built_ending
        - walls_is_assumed_ending
        - is_park_home_ending
        - walls_insulation_thickness_ending
        - external_insulation_ending
        - internal_insulation_ending
        - floor_insulation_thickness_ending
        - roof_thermal_transmittance_ending
        - is_at_rafters_ending
        - roof_insulation_thickness_ending
        - heater_type_ending
        - system_type_ending
        - thermostat_characteristics_ending
        - heating_scope_ending
        - energy_recovery_ending
        - hotwater_tariff_type_ending
        - extra_features_ending
        - chp_systems_ending
        - distribution_system_ending
        - no_system_present_ending
        - appliance_ending
        - has_radiators_ending
        - has_fan_coil_units_ending
        - has_pipes_in_screed_above_insulation_ending
        - has_pipes_in_insulated_timber_floor_ending
        - has_pipes_in_concrete_slab_ending
        - has_boiler_ending
        - has_air_source_heat_pump_ending
        - has_room_heaters_ending
        - has_electric_storage_heaters_ending
        - has_warm_air_ending
        - has_electric_underfloor_heating_ending
        - has_electric_ceiling_heating_ending
        - has_community_scheme_ending
        - has_ground_source_heat_pump_ending
        - has_no_system_present_ending
        - has_portable_electric_heaters_ending
        - has_water_source_heat_pump_ending
        - has_electric_heat_pump_ending
        - has_micro-cogeneration_ending
        - has_solar_assisted_heat_pump_ending
        - has_exhaust_source_heat_pump_ending
        - has_community_heat_pump_ending
        - has_hot-water-only_ending
        - has_electric_ending
        - has_mains_gas_ending
        - has_wood_logs_ending
        - has_coal_ending
        - has_oil_ending
        - has_wood_pellets_ending
        - has_anthracite_ending
        - has_dual_fuel_mineral_and_wood_ending
        - has_smokeless_fuel_ending
        - has_lpg_ending
        - has_b30k_ending
        - has_mineral_and_wood_ending
        - has_dual_fuel_appliance_ending
        - has_electricaire_ending
        - has_assumed_for_most_rooms_ending
        - has_underfloor_heating_ending
        - thermostatic_control_ending
        - charging_system_ending
        - switch_system_ending
        - no_control_ending
        - dhw_control_ending
        - community_heating_ending
        - multiple_room_thermostats_ending
        - auxiliary_systems_ending
        - trvs_ending
        - rate_control_ending
        - glazing_type_ending
        - fuel_type_ending
        - main-fuel_tariff_type_ending
        - is_community_ending
        - no_individual_heating_or_community_network_ending
        - complex_fuel_type_ending
        - mechanical_ventilation_ending
        - secondheat_description_ending
        - glazed_type_ending
        - multi_glaze_proportion_ending
        - low_energy_lighting_ending
        - number_open_fireplaces_ending
        - solar_water_heating_flag_ending
        - photo_supply_ending
        - transaction_type_ending
        - energy_tariff_ending
        - extension_count_ending
        - total_floor_area_ending
        - floor_height_ending
        - hot_water_energy_eff_ending
        - floor_energy_eff_ending
        - windows_energy_eff_ending
        - walls_energy_eff_ending
        - sheating_energy_eff_ending
        - roof_energy_eff_ending
        - mainheat_energy_eff_ending
        - mainheatc_energy_eff_ending
        - lighting_energy_eff_ending
        - is_post_sap10_ending
        - estimated_perimeter_ending
        default.feature_processor.feature_processor_config.retain_features:
        default.feature_processor.feature_processor_config.subsample_amount:
        default.feature_processor.feature_processor_config.subsample_seed: 0
-        default.feature_processor.feature_processor_config.target: sap_ending
+        default.feature_processor.feature_processor_config.target: 
          heat_demand_starting
        default.feature_processor.feature_processor_type: dataframe
-        default.prepare_data.data_filepath:
+        default.prepare_data.data_filepath: 
-          s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
+          s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
        default.prepare_data.input_dataclient_type: aws-s3
        default.prepare_data.output_dataclient_type: local
-        default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
+        default.prepare_data.output_test_filepath: 
-        default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
+          ./data/prepared_data/test.parquet
        default.prepare_data.output_train_filepath: 
          ./data/prepared_data/train.parquet
        default.prepare_data.train_proportion: 0.9
    outs:
    - path: data/prepared_data/
      hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
+      md5: c293fbc1658af932f0d09cdce25acf67.dir
-      size: 45056059
+      size: 21779190
-      nfiles: 2
+      nfiles: 3
  build_model:
    cmd: python 2_build_model.py
    deps:
@ -61,9 +184,9 @@ stages:
      size: 4820
    - path: data/prepared_data
      hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
+      md5: c293fbc1658af932f0d09cdce25acf67.dir
-      size: 45056059
+      size: 21779190
-      nfiles: 2
+      nfiles: 3
    params:
      configs/build_model.yaml:
        default:
@ -79,7 +202,7 @@ stages:
              output_filepath: ./data/model/allmodels/
              problem_type: regression
              eval_metric: mean_squared_error
-              time_limit: 1800
+              time_limit: 3600
              presets: medium_quality
              excluded_model_types:
              - RF
@ -87,25 +210,94 @@ stages:
              - NN_TORCH
              - KNN
              - XT
-              infer_limit: 0.05
+              - FASTAI
              infer_limit: 1
              infer_limit_batch_size: 10000
              fit_strategy: parallel
              ag_args_ensemble:
                num_folds_parallel: 2
              num_gpus: 0
              hyperparameters:
                NN_TORCH:
                - {}
                GBM:
                - extra_trees: true
                  ag_args:
                    name_suffix: XT
                - {}
                - learning_rate: 0.03
                  num_leaves: 128
                  feature_fraction: 0.9
                  min_data_in_leaf: 3
                  ag_args:
                    name_suffix: Large
                    priority: 0
                CAT:
                - {}
                XGB:
                - {}
                FASTAI:
                - {}
                RF:
                - criterion: gini
                  ag_args:
                    name_suffix: Gini
                    problem_types:
                    - binary
                    - multiclass
                - criterion: entropy
                  ag_args:
                    name_suffix: Entr
                    problem_types:
                    - binary
                    - multiclass
                - criterion: squared_error
                  ag_args:
                    name_suffix: MSE
                    problem_types:
                    - regression
                    - quantile
                XT:
                - criterion: gini
                  ag_args:
                    name_suffix: Gini
                    problem_types:
                    - binary
                    - multiclass
                - criterion: entropy
                  ag_args:
                    name_suffix: Entr
                    problem_types:
                    - binary
                    - multiclass
                - criterion: squared_error
                  ag_args:
                    name_suffix: MSE
                    problem_types:
                    - regression
                    - quantile
                KNN:
                - weights: uniform
                  ag_args:
                    name_suffix: Unif
                - weights: distance
                  ag_args:
                    name_suffix: Dist
    outs:
    - path: data/fit_predictions/
      hash: md5
-      md5: d9c9afc05e8780db47c0548b19bf7d19.dir
+      md5: 6c4de55effeb468e37ee3db3838109db.dir
-      size: 3349989
+      size: 2976628
      nfiles: 1
    - path: data/model/
      hash: md5
-      md5: 13c3100e1486c27a83a8a47491077842.dir
+      md5: 2ff63da0312853b1fd9338cac62ba0b0.dir
-      size: 773523079
+      size: 592460869
-      nfiles: 36
+      nfiles: 31
    - path: metrics/fit_metrics.json
      hash: md5
-      md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a
+      md5: c00465e99e9368afdb3302a52fca99b9
-      size: 224
+      size: 223
  generate_predictions:
    cmd: python 3_generate_predictions.py
    deps:
@ -115,44 +307,46 @@ stages:
      size: 2464
    - path: data/model
      hash: md5
-      md5: 13c3100e1486c27a83a8a47491077842.dir
+      md5: 2ff63da0312853b1fd9338cac62ba0b0.dir
-      size: 773523079
+      size: 592460869
-      nfiles: 36
+      nfiles: 31
    - path: data/prepared_data
      hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
+      md5: c293fbc1658af932f0d09cdce25acf67.dir
-      size: 45056059
+      size: 21779190
-      nfiles: 2
+      nfiles: 3
    params:
      configs/settings.yaml:
        default.generate_predictions.input_dataclient_type: local
        default.generate_predictions.output_dataclient_type: local
        default.generate_predictions.predictions_column_name: predictions
-        default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet
+        default.generate_predictions.predictions_output_filepath: 
-        default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet
+          ./data/predictions/predictions.parquet
        default.generate_predictions.test_data_filepath: 
          ./data/prepared_data/test.parquet
    outs:
    - path: data/predictions/
      hash: md5
-      md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
+      md5: a960cadf88d5f38cc55942781a2db51e.dir
-      size: 463197
+      size: 392728
      nfiles: 1
  generate_metrics:
    cmd: python 4_generate_metrics.py
    deps:
    - path: 4_generate_metrics.py
      hash: md5
-      md5: 4fedb86d89d528f0a6597934ba3890a0
+      md5: d61bb524f706917f6a3eb72b1ab8bc61
-      size: 3484
+      size: 3447
    - path: data/predictions
      hash: md5
-      md5: 5d07bcebf3160a72bb18dfd79106e85c.dir
+      md5: a960cadf88d5f38cc55942781a2db51e.dir
-      size: 463197
+      size: 392728
      nfiles: 1
    - path: data/prepared_data
      hash: md5
-      md5: 80c9e138146a1d96b9d16091c207e2e8.dir
+      md5: c293fbc1658af932f0d09cdce25acf67.dir
-      size: 45056059
+      size: 21779190
-      nfiles: 2
+      nfiles: 3
    params:
      configs/settings.yaml:
        default.generate_metrics.dataclient_type: local
@ -161,30 +355,29 @@ stages:
    outs:
    - path: metrics/metrics.json
      hash: md5
-      md5: 3e08df02fd5c5d094bcf936e1338d596
+      md5: c0241381a23b29831b18be3f063f75fd
-      size: 223
+      size: 218
  generate_scenerio_metrics:
    cmd: python 5_generate_scenarios.py
    deps:
    - path: 5_generate_scenarios.py
      hash: md5
-      md5: 40506749fefd926d47c60ff5b16db307
+      md5: 872b0c762ce1c8933fcbc5f54d5d4b5d
-      size: 5337
+      size: 5658
    params:
      configs/scenarios.yaml:
        default.scenarios:
          input_dataclient_type: aws-s3
          output_dataclient_type: local
          scenario_data_filepaths:
          - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
          comparison_output_filepath: ./metrics/scenario_table.md
          metrics_output_filepath: ./metrics/scenario_metrics.md
    outs:
    - path: metrics/scenario_metrics.md
      hash: md5
-      md5: fa4d6d7bbd7818613800da5f8f37ea96
+      md5: d41d8cd98f00b204e9800998ecf8427e
-      size: 363
+      size: 0
    - path: metrics/scenario_table.md
      hash: md5
-      md5: d6baf100a1623cc2467c2f8221d314c9
+      md5: d41d8cd98f00b204e9800998ecf8427e
-      size: 2133
+      size: 0
--- a/modules/ml-pipeline/src/pipeline/eda.py
+++ b/modules/ml-pipeline/src/pipeline/eda.py
@ -1,6 +1,7 @@
 """
 Doing some eda on dataset
 """
 # Look at response variable
 from matplotlib import pyplot as plt
@ -38,7 +39,6 @@ train_df[[target, "SAP_STARTING"]].plot(y=target, x="SAP_STARTING", style="o")
 train_df[[target, "HEAT_DEMAND_STARTING"]].plot(
    x=target, y="HEAT_DEMAND_STARTING", style="o"
 )
 # Both make sense: i.e. the higher the sap, the lower we predict and the higher the heat demand, the higher we predict
 # Load the autogluon model and check feature importance
@ -176,6 +176,8 @@ plot_permutation_importance(exp, fig_kw={"figwidth": 7, "figheight": 6})
 #
 #
 from core.MLMetrics import metrics_factory
 from core.MLModels import model_factory
 from core.DataClient import dataclient_factory
 import pandas as pd
@ -216,6 +218,12 @@ mix_df["residual"] = abs(mix_df[predictions_column_name] - mix_df[target])
 mix_df = mix_df.sort_values("residual", ascending=False)
 cosine_similarity_df = mix_df[mix_df.columns.difference(["predictions", "residual"])]
 metrics = metrics_factory("Regression")
 metrics.generate_metrics(mix_df["predictions"], mix_df["HEAT_DEMAND_ENDING"])
 cosine_similarity_df = mix_df[
    mix_df.columns.difference(["predictions", "residual", "SAP_ENDING"])
 ]
 from sklearn.metrics.pairwise import cosine_similarity
 row_index = 0
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt
@ -1,7 +1,7 @@
-joblib==1.3.2
+joblib==1.5.2
-boto3==1.28.17
+boto3==1.40.61
-pandas==2.1.4
+pandas==2.3.3
-autogluon.tabular[all]==1.0.0
+autogluon.tabular[all]==1.4.0
-dynaconf==3.2.1
+dynaconf==3.2.12
-pyarrow==13.0.0
+pyarrow==20.0.0
-pre-commit==3.3.3
+pre-commit==4.3.0
--- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt
@ -1,7 +1,7 @@
-joblib==1.3.2
+joblib==1.5.2
-boto3==1.28.17
+boto3==1.40.61
-pandas==2.1.4
+pandas==2.3.3
-autogluon.tabular[all]==1.0.0
+autogluon.tabular[all]==1.4.0
-dynaconf==3.2.1
+dynaconf==3.2.12
-pyarrow==13.0.0
+pyarrow==20.0.0
-PyYAML==6.0.1
+PyYAML==6.0.3
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt
@ -1,10 +1,10 @@
-joblib==1.3.2
+joblib==1.5.2
-boto3==1.28.17
+boto3==1.40.61
-pandas==2.1.4
+pandas==2.3.3
-autogluon.tabular[all]==1.0.0
+autogluon.tabular[all]==1.4.0
-ray==2.6.3
+ray==2.44.1
-dynaconf==3.2.1
+dynaconf==3.2.12
-alibi==0.9.5
+# alibi
-shap==0.42.1
+shap==0.49.1
-pyarrow==13.0.0
+pyarrow==20.0.0
-pre-commit==3.3.3
+pre-commit==4.3.0
--- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt
@ -1,4 +1,4 @@
-boto3==1.28.41
+boto3==1.40.61
-pandas==2.1.4
+pandas==2.3.3
-autogluon.tabular[all]==1.0.0
+autogluon.tabular[all]==1.4.0
-dynaconf==3.2.1
+dynaconf==3.2.12
--- a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
+++ b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
@ -1,4 +1,4 @@
-dvc==3.51.0
+dvc==3.66.0
-dvc-s3==3.2.0
+dvc-s3==3.2.2
-gto==1.7.1
+gto==1.9.0
-pyOpenSSL==23.3.0
+pyOpenSSL==23.3.0
Author	SHA1	Message	Date
KhalimCK	55af9dc77d	Merge pull request #168 from Hestia-Homes/heat_baseline-dev-model Initial model for heat_baseline	2026-03-13 18:38:19 +00:00
Michael Duong	e7b4bbe524	amend the deployment files	2026-01-09 10:41:17 +00:00
Michael Duong	a99501abe0	initial model for heat baseline	2026-01-09 10:39:36 +00:00
Github-Bot	6deceb5ede	Update Registry	2025-11-05 15:24:44 +00:00
Github-Bot	ac268ee80a	Update Registry	2025-11-05 15:24:02 +00:00
quandanrepo	4924759e8b	Merge pull request #162 from Hestia-Homes/heat-dev-update new heat model, lambda changes, general fixes	2025-11-05 15:23:23 +00:00
Michael Duong	5d2b37866f	new heat model, lambda changes, general fixes	2025-11-04 23:19:23 +00:00
Github-Bot	b8947331c4	Update Registry	2024-10-08 15:41:04 +00:00
Github-Bot	b34b42f5e7	Update Registry	2024-10-08 15:40:25 +00:00
KhalimCK	a409accbfb	Merge pull request #120 from Hestia-Homes/heat-dev-model Heat dev model	2024-10-08 16:39:52 +01:00
Michael Duong	343d508cad	add carbon model with september data	2024-10-06 21:38:34 +01:00
Michael Duong	a5574cd1e7	try new data from epc with adjusted feature processing	2024-06-09 11:28:34 +01:00
Michael Duong	8cfe43b22d	reduce strictness of the lower threshold to below 0	2024-06-08 10:05:38 +01:00
Michael Duong	6921f7d2a9	Merge branch 'heat-dev' of github.com:Hestia-Homes/ML into heat-dev-model	2024-06-06 23:15:34 +01:00
Michael Duong	ddf24b3cbc	add clipping for both starting and ending for top 1 and 99 percent	2024-06-06 23:15:05 +01:00
Github-Bot	55d4d6df7f	Update Registry	2024-05-31 12:41:07 +00:00
Github-Bot	0cc734def7	Update Registry	2024-05-31 12:40:24 +00:00
KhalimCK	f6c775fdcf	Merge pull request #118 from Hestia-Homes/heat-dev-model Heat dev model	2024-05-31 13:39:45 +01:00
Michael Duong	e695a10c14	remove the scenario data as we don't have these eprs	2024-05-30 21:04:32 +01:00
Michael Duong	8650840058	remove the scenario data as we don't have these eprs	2024-05-30 20:59:19 +01:00
Michael Duong	2298895a39	run a new heat model for new data	2024-05-30 20:54:03 +01:00
Michael Duong	45e21383fe	run a new heat model for new data	2024-05-30 20:53:23 +01:00
Github-Bot	ff032f122f	Update Registry	2024-03-28 17:23:00 +00:00
Github-Bot	a798385639	Update Registry	2024-03-28 17:22:22 +00:00
KhalimCK	6fa2625250	Merge pull request #109 from Hestia-Homes/heat-dev-model Heat dev model	2024-03-28 17:21:46 +00:00
Michael Duong	5415cc972d	add new model	2024-03-28 17:00:08 +00:00
Michael Duong	bc29731c69	add new model	2024-03-28 16:58:42 +00:00
Michael Duong	d8ff8cc16a	add new model	2024-03-28 16:52:23 +00:00
Michael Duong	8e6b1c2690	use new data for heat	2024-03-28 16:26:26 +00:00
Github-Bot	5290a0c769	Update Registry	2024-01-30 10:38:50 +00:00
Github-Bot	11d2be463e	Update Registry	2024-01-30 10:38:06 +00:00
KhalimCK	e8dea4c105	Merge pull request #95 from Hestia-Homes/heat-dev-model Heat dev model	2024-01-30 10:37:20 +00:00
Michael Duong	7d44b82583	Merge branch 'heat-dev' of github.com:Hestia-Homes/ML into heat-dev-model	2024-01-29 20:37:53 +00:00
Michael Duong	66ff6e1e22	Using all permutation data with all data used in training, nteral cross validation	2024-01-29 20:37:13 +00:00
Github-Bot	273dcdad31	Update Registry	2024-01-18 10:38:15 +00:00
Github-Bot	4b81ce9374	Update Registry	2024-01-18 10:37:20 +00:00
KhalimCK	469f77d8fb	Merge pull request #93 from Hestia-Homes/heat-dev-model Heat dev model	2024-01-18 10:36:22 +00:00
Michael Duong	55da3d0339	Merge branch 'heat-dev' of github.com:Hestia-Homes/ML into heat-dev-model	2024-01-18 00:14:36 +00:00
Michael Duong	66f54a92e2	train new 600 second model with new data	2024-01-18 00:14:20 +00:00
Github-Bot	ba1971498c	Update Registry	2023-11-28 15:02:13 +00:00
Github-Bot	2cb28616bb	Update Registry	2023-11-28 15:01:27 +00:00
quandanrepo	7554988070	Merge pull request #87 from Hestia-Homes/heat-dev-model add restriction to datast	2023-11-28 15:00:46 +00:00
Michael Duong	9271df34e0	add restriction to datast	2023-11-28 14:51:55 +00:00
Github-Bot	7f984e6cbf	Update Registry	2023-11-27 22:18:17 +00:00
Github-Bot	d8d5a66537	Update Registry	2023-11-27 22:17:29 +00:00
quandanrepo	676539e6a7	Merge pull request #86 from Hestia-Homes/heat-dev-model Heat dev model	2023-11-27 22:16:44 +00:00
quandanrepo	890ca15193	Merge branch 'heat-dev' into heat-dev-model	2023-11-27 22:09:53 +00:00
Michael Duong	5a9eb608bd	commit first heat-model	2023-11-27 22:06:18 +00:00
Michael Duong	f4f8dc2bf2	Merge branch 'master' of github.com:Hestia-Homes/ML into heat-dev-model	2023-11-27 21:51:03 +00:00
Github-Bot	2d331736a4	Update Registry	2023-10-10 12:47:01 +00:00
Github-Bot	7d685caaf5	Update Registry	2023-10-10 12:46:02 +00:00
quandanrepo	dffb01bf8e	Merge pull request #67 from Hestia-Homes/heat-dev-model Heat dev model	2023-10-10 13:45:23 +01:00
Michael Duong	d2a7615e3b	Merge branch 'master' of github.com:Hestia-Homes/ML into heat-dev-model	2023-10-10 12:33:51 +00:00
Michael Duong	4c6c5330d8	add new model, new branch	2023-10-10 12:33:44 +00:00
Michael Duong	9e7d0fa538	add new model	2023-10-10 12:32:25 +00:00
Michael Duong	ad2c266727	initial model for heat-dev	2023-10-09 17:52:47 +00:00