Compare commits

...

59 commits

Author SHA1 Message Date
KhalimCK
44d59916c5
Merge pull request #167 from Hestia-Homes/carbon_baseline-dev-model
update the workflow files
2026-03-19 18:48:12 +00:00
Khalim Conn-Kowlessar
0740ccfcf5 Cleaning stackname for sls delpoy 2026-03-19 17:35:43 +00:00
Khalim Conn-Kowlessar
ba3d4ab2ea added boto3 to version control requirements 2026-03-19 16:28:44 +00:00
Michael Duong
6fbfdacf85 amend versions for version control packages 2026-01-09 10:06:31 +00:00
Michael Duong
a62c2f09fd update the workflow files 2026-01-09 10:02:49 +00:00
Michael Duong
637ea9d5e2 initial commit for carbon_baseline 2026-01-09 09:57:31 +00:00
Github-Bot
08efb32e04 Update Registry 2025-11-05 15:24:57 +00:00
Github-Bot
be1331f8a9 Update Registry 2025-11-05 15:24:17 +00:00
quandanrepo
0e5efd4e06
Merge pull request #161 from Hestia-Homes/carbon-dev-update
model with new data, moew cicd tests, lambda with more memory
2025-11-05 15:23:43 +00:00
Michael Duong
75c40a4a3c model with new data, moew cicd tests, lambda with more memory 2025-11-04 22:38:56 +00:00
Github-Bot
341bfb5aa2 Update Registry 2024-10-08 15:40:33 +00:00
Github-Bot
77232475bc Update Registry 2024-10-08 15:39:55 +00:00
KhalimCK
9fe171ad9b
Merge pull request #146 from Hestia-Homes/carbon-dev-model
new carbon model with september data
2024-10-08 16:39:15 +01:00
Michael Duong
2fa6a51c98 new carbon model with september data 2024-10-06 15:50:00 +01:00
Github-Bot
eb4efdbb2a Update Registry 2024-05-31 12:40:23 +00:00
Github-Bot
ee7b72214a Update Registry 2024-05-31 12:39:44 +00:00
KhalimCK
ddcc67d049
Merge pull request #119 from Hestia-Homes/carbon-dev-model
Carbon dev model
2024-05-31 13:39:04 +01:00
Michael Duong
c7aedcde04 add new model for new data 2024-05-30 21:44:21 +01:00
Michael Duong
c89ae0f38a fixed merge conflict 2024-05-30 21:13:46 +01:00
Michael Duong
132cafebde fixed merge conflict 2024-05-30 21:13:25 +01:00
Github-Bot
b1e8ed1fd4 Update Registry 2024-03-28 16:23:19 +00:00
Github-Bot
ca71cbb3b0 Update Registry 2024-03-28 16:22:37 +00:00
KhalimCK
c7edb7c611
Merge pull request #107 from Hestia-Homes/carbon-dev-model
Carbon dev model
2024-03-28 16:21:52 +00:00
Michael Duong
bb3af26c3f add binary to prediction docker, change requiremnets 2024-03-28 16:06:43 +00:00
Michael Duong
78bf0a490d use 0.9 training data 2024-03-27 23:43:07 +00:00
Michael Duong
2da24aa017 run carbon model with new data 2024-03-27 23:13:29 +00:00
Michael Duong
c0dc934be6 run carbon model with new data 2024-03-27 23:10:36 +00:00
Github-Bot
869a276d67 Update Registry 2024-01-30 10:39:26 +00:00
Github-Bot
96765cee05 Update Registry 2024-01-30 10:38:43 +00:00
KhalimCK
f99c0aee2c
Merge pull request #96 from Hestia-Homes/carbon-dev-model
Carbon dev model
2024-01-30 10:38:05 +00:00
Michael Duong
76d414417a Merge branch 'carbon-dev' of github.com:Hestia-Homes/ML into carbon-dev-model 2024-01-30 10:26:43 +00:00
Michael Duong
1887a52230 use new modesl with carbon model 2024-01-30 10:26:28 +00:00
Github-Bot
9880ebed4c Update Registry 2024-01-18 10:38:17 +00:00
Github-Bot
5d23992d05 Update Registry 2024-01-18 10:37:29 +00:00
KhalimCK
d4836e02cb
Merge pull request #92 from Hestia-Homes/carbon-dev-model
Carbon dev model
2024-01-18 10:36:46 +00:00
Michael Duong
9b29e838af update requirements for dvc 2024-01-17 23:45:07 +00:00
Michael Duong
79a55ba8b5 train 600 second model on new data 2024-01-17 23:35:50 +00:00
Michael Duong
e78a4bb30e Merge branch 'carbon-dev' of github.com:Hestia-Homes/ML into carbon-dev-model 2024-01-17 23:12:26 +00:00
Michael Duong
ae53499742 add keep only non negative carbon change to carbon model 2023-12-22 09:51:57 +00:00
Github-Bot
db29bece80 Update Registry 2023-11-28 15:27:34 +00:00
Github-Bot
65335468b4 Update Registry 2023-11-28 15:26:50 +00:00
quandanrepo
53afbd26d8
Merge pull request #88 from Hestia-Homes/carbon-dev-model
Carbon dev model
2023-11-28 15:26:04 +00:00
Michael Duong
718003b3d9 Merge branch 'carbon-dev' of github.com:Hestia-Homes/ML into carbon-dev-model 2023-11-28 15:14:09 +00:00
Michael Duong
888bfc30c6 Merge branch 'master' of github.com:Hestia-Homes/ML into carbon-dev-model 2023-11-28 15:13:50 +00:00
Michael Duong
2b1e8b912b restrict dataset 2023-11-28 15:13:42 +00:00
Github-Bot
62f2f83b0a Update Registry 2023-11-27 19:22:00 +00:00
Github-Bot
03322a13e7 Update Registry 2023-11-27 19:21:22 +00:00
KhalimCK
5f3d9efa92
Merge pull request #85 from Hestia-Homes/carbon-dev-model
Carbon dev model
2023-11-27 19:20:40 +00:00
Michael Duong
f29d6af6a2 change readme 2023-11-27 19:13:23 +00:00
Michael Duong
7afc4b06b2 Merge branch 'master' of github.com:Hestia-Homes/ML into carbon-dev-model 2023-11-27 19:12:40 +00:00
Michael Duong
217fb3dca8 add inference speed check 2023-11-27 18:52:47 +00:00
Michael Duong
9a04ffde3b Merge branch 'master' of github.com:Hestia-Homes/ML into carbon-dev-model 2023-11-27 18:30:10 +00:00
Michael Duong
e6c7b2f58c Merge branch 'carbon-dev' of github.com:Hestia-Homes/ML into carbon-dev-model 2023-10-12 08:39:24 +00:00
Michael Duong
f2cc32f4b4 using good model 4000s 2023-10-12 08:38:55 +00:00
Github-Bot
2f9092f447 Update Registry 2023-10-11 15:48:52 +00:00
Github-Bot
bb2db16f61 Update Registry 2023-10-11 15:48:04 +00:00
quandanrepo
5aaebd7f44
Merge pull request #71 from Hestia-Homes/carbon-dev-model
400 second model
2023-10-11 16:47:13 +01:00
Michael Duong
680e879503 400 second model 2023-10-11 15:38:55 +00:00
Michael Duong
f4e91162ec initial model 2023-10-11 13:23:54 +00:00
26 changed files with 968 additions and 379 deletions

View file

@ -2,7 +2,17 @@ name: Sap Change Model Deploy
on: on:
push: push:
branches: [ sap-dev, sap-prod, heat-dev, heat-prod, carbon-dev, carbon-prod] branches:
[
sap-dev,
sap-prod,
heat-dev,
heat-prod,
carbon-dev,
carbon-prod,
carbon_baseline-dev,
carbon-baseline-prod,
]
jobs: jobs:
deploy: deploy:
@ -31,8 +41,8 @@ jobs:
- name: set secret prefix which is used across multiple steps - name: set secret prefix which is used across multiple steps
id: secret_prefix id: secret_prefix
run: | run: |
# Convert branch name to uppercase and replace hyphens with underscores # Convert branch name to uppercase and replace hyphens with underscores
echo "::set-output name=secret_prefix::$(echo "${{ github.ref_name }}" | tr 'a-z-' 'A-Z_')" echo "::set-output name=secret_prefix::$(echo "${{ github.ref_name }}" | tr 'a-z-' 'A-Z_')"
- name: Set domain name - name: Set domain name
id: set_domain id: set_domain
@ -55,7 +65,8 @@ jobs:
id: set_stack_name id: set_stack_name
run: | run: |
# Take branch prefix and add "model" for stack name # Take branch prefix and add "model" for stack name
stack_name=$( echo ${{ github.ref_name }} | awk -F"-" '{print $1}' | sed 's/$/model/g') # stack_name=$( echo ${{ github.ref_name }} | awk -F"-" '{print $1}' | sed 's/$/model/g')
stack_name=$(echo ${{ github.ref_name }} | awk -F"-" '{gsub("_", "", $1); print $1}' | sed 's/$/model/g')
if [ -z "${stack_name}" ]; then if [ -z "${stack_name}" ]; then
echo "::set-output name=stack_name::" echo "::set-output name=stack_name::"
else else
@ -116,7 +127,7 @@ jobs:
env: env:
RUNTIME_ENVIRONMENT: ${{ steps.set_runtime_environment.outputs.runtime_environment }} RUNTIME_ENVIRONMENT: ${{ steps.set_runtime_environment.outputs.runtime_environment }}
PREDICTIONS_BUCKET: ${{ steps.set_s3_buckets.outputs.predictions_bucket }} PREDICTIONS_BUCKET: ${{ steps.set_s3_buckets.outputs.predictions_bucket }}
DATA_BUCKET: ${{ steps.set_s3_buckets.outputs.data_bucket }} DATA_BUCKET: ${{ steps.set_s3_buckets.outputs.data_bucket }}
DOMAIN_NAME: ${{ steps.set_domain.outputs.domain }} DOMAIN_NAME: ${{ steps.set_domain.outputs.domain }}
ECR_URI: ${{ steps.set_ecr_credentials.outputs.ecr_uri }} ECR_URI: ${{ steps.set_ecr_credentials.outputs.ecr_uri }}
GITHUB_SHA: ${{ github.sha }} GITHUB_SHA: ${{ github.sha }}

View file

@ -13,6 +13,7 @@ on:
- "sap-dev" - "sap-dev"
- "heat-dev" - "heat-dev"
- "carbon-dev" - "carbon-dev"
- "carbon_baseline-dev"
permissions: write-all permissions: write-all
@ -21,166 +22,171 @@ jobs:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'major')) }} if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'major')) }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Install packages to register model - name: Install packages to register model
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Register Model - name: Register Model
run: | run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot" git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com" git config user.email "Github-Bot@no-reply.com"
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false
if [ -z "${latest_version}" ]; then if [ -z "${latest_version}" ]; then
increment_version="1.0.0" increment_version="1.0.0"
else else
increment_version=$(echo ${latest_version} | awk 'BEGIN { increment_version=$(echo ${latest_version} | awk 'BEGIN {
FS="\\." # Set the field separator to a period FS="\\." # Set the field separator to a period
OFS="." # Set the output field separator to a period OFS="." # Set the output field separator to a period
} }
{ {
major = $1 + 1 # Increment the major version major = $1 + 1 # Increment the major version
print major, "0", "0" # Print the new version print major, "0", "0" # Print the new version
}') }')
fi fi
new_tag=${REGISTER_MODEL_NAME}@v${increment_version} new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
git tag -a ${new_tag} -m "Registering new Major Version" git tag -a ${new_tag} -m "Registering new Major Version"
git push origin ${new_tag} git push origin ${new_tag}
gto show --json > MODEL_REGISTRY.md gto show --json > MODEL_REGISTRY.md
git add . git add .
git commit -m "Update Registry" git commit -m "Update Registry"
git push git push
Register-Minor-Model-Dev: Register-Minor-Model-Dev:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }} if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Install packages to register model - name: Install packages to register model
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Register Model - name: Register Model
run: | run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot" git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com" git config user.email "Github-Bot@no-reply.com"
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
if [ -z "${latest_version}" ]; then if [ -z "${latest_version}" ]; then
increment_version="0.1.0" increment_version="0.1.0"
else else
increment_version=$(echo ${latest_version} | awk 'BEGIN { increment_version=$(echo ${latest_version} | awk 'BEGIN {
FS="\\." # Set the field separator to a period FS="\\." # Set the field separator to a period
OFS="." # Set the output field separator to a period OFS="." # Set the output field separator to a period
} }
{ {
minor = $2 + 1 # Increment the minor version minor = $2 + 1 # Increment the minor version
print $1, minor, "0" # Print the new version print $1, minor, "0" # Print the new version
}') }')
fi fi
new_tag=${REGISTER_MODEL_NAME}@v${increment_version} new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
git tag -a ${new_tag} -m "Registering new Minor Version" git tag -a ${new_tag} -m "Registering new Minor Version"
git push origin ${new_tag} git push origin ${new_tag}
gto show --json > MODEL_REGISTRY.md gto show --json > MODEL_REGISTRY.md
git add . git add .
git commit -m "Update Registry" git commit -m "Update Registry"
git push git push
Register-Patch-Model-Dev: Register-Patch-Model-Dev:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }} if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Install packages to register model - name: Install packages to register model
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Register Model - name: Register Model
run: | run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot" git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com" git config user.email "Github-Bot@no-reply.com"
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
if [ -z "${latest_version}" ]; then if [ -z "${latest_version}" ]; then
increment_version="0.0.1" increment_version="0.0.1"
else else
increment_version=$(echo ${latest_version} | awk 'BEGIN { increment_version=$(echo ${latest_version} | awk 'BEGIN {
FS="\\." # Set the field separator to a period FS="\\." # Set the field separator to a period
OFS="." # Set the output field separator to a period OFS="." # Set the output field separator to a period
} }
{ {
patch = $3 + 1 # Increment the patch version patch = $3 + 1 # Increment the patch version
print $1, $2, patch # Print the new version print $1, $2, patch # Print the new version
}') }')
fi fi
new_tag=${REGISTER_MODEL_NAME}@v${increment_version} new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
git tag -a ${new_tag} -m "Registering new Patch Version" git tag -a ${new_tag} -m "Registering new Patch Version"
git push origin ${new_tag} git push origin ${new_tag}
gto show --json > MODEL_REGISTRY.md gto show --json > MODEL_REGISTRY.md
git add . git add .
git commit -m "Update Registry" git commit -m "Update Registry"
git push git push
Promote-Artefacts-To-Dev: Promote-Artefacts-To-Dev:
if: github.event.pull_request.merged == true if: github.event.pull_request.merged == true
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install packages to retrieve artifacts - name: Install packages to retrieve artifacts
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Retrieve artifacts (dvc.lock) - name: Retrieve artifacts (dvc.lock)
env: env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: | run: |
cd modules/ml-pipeline/src/pipeline cd modules/ml-pipeline/src/pipeline
dvc pull -r experiments dvc pull -r experiments
- name: Push artifacts to Dev - name: Push artifacts to Dev
env: env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: | run: |
cd modules/ml-pipeline/src/pipeline cd modules/ml-pipeline/src/pipeline
dvc push -r dev dvc push -r dev
Register-New-Model-Dev: Register-New-Model-Dev:
needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev] needs:
[
Register-Major-Model-Dev,
Register-Minor-Model-Dev,
Register-Patch-Model-Dev,
]
if: | if: |
always() && always() &&
(needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') && (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') &&
@ -189,50 +195,50 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Install packages to register model - name: Install packages to register model
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Register Model - name: Register Model
env: env:
TARGET_BRANCH: ${{ github.base_ref }} TARGET_BRANCH: ${{ github.base_ref }}
run: | run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot" git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com" git config user.email "Github-Bot@no-reply.com"
latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}') latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}')
if [ -z "${latest_dev_version}" ]; then if [ -z "${latest_dev_version}" ]; then
increment_version="1" increment_version="1"
else else
increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}') increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}')
fi fi
new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version} new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version}
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}') latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}')
echo ${new_tag} echo ${new_tag}
commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}') commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}')
git checkout ${commit_hash} git checkout ${commit_hash}
# git pull #Get new model registry md file changes # git pull #Get new model registry md file changes
git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}" git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}"
git push origin ${new_tag} git push origin ${new_tag}
git checkout ${TARGET_BRANCH} git checkout ${TARGET_BRANCH}
git fetch --all git fetch --all
git pull git pull
gto show --json > MODEL_REGISTRY.md gto show --json > MODEL_REGISTRY.md
git add . git add .
git commit -m "Update Registry" git commit -m "Update Registry"
git push origin ${TARGET_BRANCH} git push origin ${TARGET_BRANCH}

View file

@ -5,22 +5,21 @@ on:
# branches: # branches:
# - "model-**" # - "model-**"
pull_request: pull_request:
branches: ["sap-dev", "heat-dev", "carbon-dev"] branches: ["sap-dev", "heat-dev", "carbon-dev", "carbon_baseline-dev"]
label: label:
types: ["created", "edited"] types: ["created", "edited"]
permissions: write-all permissions: write-all
jobs: jobs:
Check-Label: Check-Label:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: yogevbd/enforce-label-action@2.1.0 - uses: yogevbd/enforce-label-action@2.1.0
with: with:
REQUIRED_LABELS_ANY: "major,minor,patch" REQUIRED_LABELS_ANY: "major,minor,patch"
REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['major','minor','patch']" REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['major','minor','patch']"
BANNED_LABELS: "banned" BANNED_LABELS: "banned"
# No-Label: # No-Label:
# if: ${{ github.event.label.name != 'major' }} || ${{ github.event.label.name != 'minor' }} || ${{ github.event.label.name != 'patch' }} # if: ${{ github.event.label.name != 'major' }} || ${{ github.event.label.name != 'minor' }} || ${{ github.event.label.name != 'patch' }}
@ -32,86 +31,157 @@ jobs:
# echo "Please choose one of these tags: 'major', 'major', 'patch'" # echo "Please choose one of these tags: 'major', 'major', 'patch'"
# exit(1) # exit(1)
Verify-Model: Verify-Lambda:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install packages to retrieve artifacts - name: Install packages to retrieve artifacts
env: env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Retrieve artifacts (dvc.lock)
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-pipeline/src/pipeline
dvc pull -r experiments
- name: Set timestamp
id: set_timestamp
run: |
echo "timestamp=$(date +%Y%m%d)" >> $GITHUB_ENV
echo "Generated timestamp: ${timestamp}"
- name: Upload sample row dataset to S3
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-pipeline/src/pipeline/data/prepared_data/
aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet
- name: Build Lambda docker Image
run: |
docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
- name: Run lambda docker container
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
docker run -d -p 9000:8080 \
-e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
-e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
-e RUNTIME_ENVIRONMENT=dev \
-e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
- name: Test Lambda endpoint
run: |
sleep 2
curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
-H "Content-Type: application/json" \
-d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"warm\\\": true}\"}"
- name: Get Lambda logs
run: |
docker logs $(docker ps -al -q)
- name: Test Lambda endpoint again
run: |
sleep 2
curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
-H "Content-Type: application/json" \
-d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
- name: Get Lambda logs
run: |
docker logs $(docker ps -al -q)
- name: Stop Lambda container
run: |
docker stop lambda_test || echo "Container already stopped"
- name: Remove uploaded sample row dataset from S3
if: always()
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
aws s3 rm --recursive s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/
- name: Retrieve artifacts (dvc.lock) Verify-Model:
env: runs-on: ubuntu-latest
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-pipeline/src/pipeline
dvc pull -r experiments
- name: Build Prediction docker Image steps:
run: | - uses: actions/checkout@v3
cd modules/ml-pipeline/src/ - name: Install packages to retrieve artifacts
docker build . --file Prediction.Dockerfile --tag prediction_test env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Run Prediction docker container - name: Retrieve artifacts (dvc.lock)
run: | env:
docker run prediction_test AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-pipeline/src/pipeline
dvc pull -r experiments
- name: Build Prediction docker Image
run: |
cd modules/ml-pipeline/src/
docker build . --file Prediction.Dockerfile --tag prediction_test
- name: Run Prediction docker container
run: |
docker run prediction_test
Trigger-CML: Trigger-CML:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install packages to retrieve artifacts - name: Install packages to retrieve artifacts
run: | run: |
pip install --upgrade pip pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Retrieve artifacts (dvc.lock) - name: Retrieve artifacts (dvc.lock)
env: env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: | run: |
cd modules/ml-pipeline/src/pipeline cd modules/ml-pipeline/src/pipeline
dvc pull -r experiments dvc pull -r experiments
- uses: actions/setup-python@v4 - uses: actions/setup-python@v4
- uses: iterative/setup-cml@v1 - uses: iterative/setup-cml@v1
- name: Generate report - name: Generate report
env: env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TARGET_BRANCH: ${{ github.base_ref }} TARGET_BRANCH: ${{ github.base_ref }}
run: | run: |
cd modules/ml-pipeline/src/pipeline cd modules/ml-pipeline/src/pipeline
echo "## Model metrics" > report.md echo "## Model metrics" > report.md
# Compare metrics to master # Compare metrics to master
git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH} git fetch --depth=1 origin ${TARGET_BRANCH}:${TARGET_BRANCH}
dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md dvc metrics diff --md --all ${TARGET_BRANCH} >> report.md
echo "## Scenario comparison" >> report.md echo "## Scenario comparison" >> report.md
cat metrics/scenario_table.md >> report.md cat metrics/scenario_table.md >> report.md
echo "" >> report.md echo "" >> report.md
echo "## Scenario metrics" >> report.md echo "## Scenario metrics" >> report.md
cat metrics/scenario_metrics.md >> report.md cat metrics/scenario_metrics.md >> report.md
cml comment create report.md cml comment create report.md
# echo "## Residuals plot from model" >> report.md # echo "## Residuals plot from model" >> report.md
# metrics_location=$(find . -maxdepth 10 -name "residuals.png") # metrics_location=$(find . -maxdepth 10 -name "residuals.png")
# echo $metrics_location # echo $metrics_location
# cd $metric_location # cd $metric_location
# echo "![](./residuals.png)" >> report.md # echo "![](./residuals.png)" >> report.md

View file

@ -8,25 +8,65 @@
"active": true "active": true
}, },
"sap": { "sap": {
"version": "v0.14.0", "version": "v0.17.5",
"stage": { "stage": {
"dev": "v0.14.0" "dev": "v0.17.5"
}, },
"registered": true, "registered": true,
"active": true "active": true
}, },
"heat": { "heat": {
"version": "v0.5.0", "version": "v0.8.0",
"stage": { "stage": {
"dev": "v0.5.0" "dev": "v0.8.0"
}, },
"registered": true, "registered": true,
"active": true "active": true
}, },
"carbon": { "carbon": {
"version": "v0.5.0", "version": "v0.8.0",
"stage": { "stage": {
"dev": "v0.5.0" "dev": "v0.8.0"
},
"registered": true,
"active": true
},
"hotwater": {
"version": "v1.0.0",
"stage": {
"dev": "v1.0.0"
},
"registered": true,
"active": true
},
"heating": {
"version": "v1.0.0",
"stage": {
"dev": "v1.0.0"
},
"registered": true,
"active": true
},
"lighting": {
"version": "v1.0.0",
"stage": {
"dev": "v1.0.0"
},
"registered": true,
"active": true
},
"hotwaterkwh": {
"version": "v1.3.0",
"stage": {
"dev": "v1.3.0"
},
"registered": true,
"active": true
},
"heatingkwh": {
"version": "v1.5.0",
"stage": {
"dev": "v1.5.0"
}, },
"registered": true, "registered": true,
"active": true "active": true

View file

@ -83,3 +83,13 @@ curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d
``` ```
This will send a POST request to the running Lambda function and pass in the required data as JSON. This will send a POST request to the running Lambda function and pass in the required data as JSON.
For the testing of warm or testing of the lambda, use:
```json
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": \"true\"}"}'
```
or
```json
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"warm\": \"true\"}"}'
```

View file

@ -1,19 +1,24 @@
FROM public.ecr.aws/lambda/python:3.10 FROM public.ecr.aws/lambda/python:3.12
# Set the working directory # Set the working directory
WORKDIR ${LAMBDA_TASK_ROOT} WORKDIR ${LAMBDA_TASK_ROOT}
ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}" ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
ENV MPLCONFIGDIR="/tmp/matplotlib"
# Environment variables # Environment variables
ARG RUNTIME_ENVIRONMENT ARG RUNTIME_ENVIRONMENT
ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT} ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
# Install necessary build tools - required to test locally # Install necessary build tools - required to test locally
RUN yum install -y gcc python3-devel gcc-c++ RUN dnf install -y gcc python3-devel gcc-c++
# Install python packages # Install python packages
COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r ./requirements.txt
RUN pip install uv
RUN uv pip install -r requirements.txt --system
# RUN pip install --no-cache-dir -r ./requirements.txt
# Copy the project code # Copy the project code
COPY modules/ml-pipeline/src/pipeline ./pipeline COPY modules/ml-pipeline/src/pipeline ./pipeline

View file

@ -47,6 +47,30 @@ def upload_dataframe_to_s3(df, bucket, s3_file_name):
return False return False
def warming_up_invocation(
model,
model_filepath: str,
):
"""
Function to handle warm up invocations
"""
import pandas as pd
import numpy as np
model.load_model(model_filepath)
warmup_df = pd.DataFrame(
np.zeros((1, len(model.model.original_features))),
columns=model.model.original_features,
)
# model_names = model.model.model_names()
# if "NeuralNetFastAI" in model_names:
# model.model.predict(warmup_df, model="NeuralNetFastAI")
# else:
model.predict(data=warmup_df)
def handler(event, context): def handler(event, context):
""" """
Take in event and trigger the prediction pipeline Take in event and trigger the prediction pipeline
@ -66,9 +90,6 @@ def handler(event, context):
created_at = body["created_at"] created_at = body["created_at"]
# TODO: Implement the loading of the model and prediction # TODO: Implement the loading of the model and prediction
storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
logger.info(f"--- Initiate MLModel ---") logger.info(f"--- Initiate MLModel ---")
build_model_params = settings.build_model build_model_params = settings.build_model
@ -78,6 +99,32 @@ def handler(event, context):
model = model_factory(build_model_params["model_type"]) model = model_factory(build_model_params["model_type"])
model_filepath = build_model_params["model_save_filepath"]
if "warm" in body:
logger.info("Warm up invocation - synthetic prediction")
warming_up_invocation(model=model, model_filepath=model_filepath)
return {
"statusCode": 200,
"body": json.dumps(
{
"message": "Successfully warmed up invocation",
}
),
}
if "testing" in body:
logger.info(
"Testing invocation for CI/CD - save file to same location in S3"
)
storage_filepath = body["file_location"].replace(
".parquet", "_output.parquet"
)
else:
storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
logger.info(f"--- Initiate Input DataClient ---") logger.info(f"--- Initiate Input DataClient ---")
input_dataclient = dataclient_factory( input_dataclient = dataclient_factory(
dataclient_type="aws-s3", dataclient_type="aws-s3",
@ -95,7 +142,7 @@ def handler(event, context):
output_dataclient=output_dataclient, output_dataclient=output_dataclient,
model=model, model=model,
target=feature_process_params["feature_processor_config"]["target"], target=feature_process_params["feature_processor_config"]["target"],
model_filepath=build_model_params["model_save_filepath"], model_filepath=model_filepath,
test_data_filepath=body["file_location"], test_data_filepath=body["file_location"],
predictions_output_filepath=storage_filepath, predictions_output_filepath=storage_filepath,
predictions_column_name=generate_predictions_params[ predictions_column_name=generate_predictions_params[

View file

@ -51,3 +51,4 @@ functions:
path: /predict path: /predict
method: POST method: POST
timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed
memorySize: 3008

View file

@ -1,7 +1,8 @@
export PYENV_ROOT=$(HOME)/.pyenv export PYENV_ROOT=$(HOME)/.pyenv
export PATH := $(PYENV_ROOT)/bin:$(PATH) export PATH := $(PYENV_ROOT)/bin:$(PATH)
PYTHON_VERSION ?= 3.10.12 PYTHON_VERSION ?= 3.12.12
CONDA_ENV=dev_env_pipeline CONDA_ENV=dev_env_pipeline
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
.PHONY: init .PHONY: init
init: dev-conda init: dev-conda
@ -12,11 +13,15 @@ dev-conda:
# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously" # conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
conda init bash conda init bash
conda run -v -n ${CONDA_ENV} pip install --upgrade pip ${CONDA_ACTIVATE} ${CONDA_ENV} && \
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/training/requirements-dev.txt which pip && \
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt pip install --upgrade pip && \
conda run -v -n ${CONDA_ENV} pre-commit install pip install uv && \
conda run -v -n ${CONDA_ENV} pip install ipykernel uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \
pre-commit install && \
uv pip install ipykernel && \
conda install llvm-openmp -y
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND" echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
echo "conda activate ${CONDA_ENV}" echo "conda activate ${CONDA_ENV}"
@ -33,4 +38,4 @@ dev-pyenv:
.PHONY: dvc-init .PHONY: dvc-init
dvc-init: dvc-init:
. .dev_env_pipeline/bin/activate && dvc init --subdir . .dev_env_pipeline/bin/activate && dvc init --subdir

View file

@ -1,16 +1,21 @@
# Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow) # Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
FROM python:3.10.12-slim FROM python:3.12.12-slim
RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev
COPY pipeline/requirements/predictions/requirements.txt requirements.txt COPY pipeline/requirements/predictions/requirements.txt requirements.txt
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install -r requirements.txt
RUN pip install uv
RUN uv pip install -r requirements.txt --system
# RUN pip install -r requirements.txt
# Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script # Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script
COPY pipeline/ /home/pipeline/ COPY pipeline/ /home/pipeline/
WORKDIR /home/pipeline/ WORKDIR /home/pipeline/
CMD [ "python", "3_generate_predictions.py"] CMD [ "python", "3_generate_predictions.py"]

View file

@ -1,3 +1,3 @@
# The generic reproducible ML-pipeline # The generic reproducible ML-pipeline!
Pipeline required to build a model to produce an output, that gets hashed via DVC Pipeline required to build a model to produce an output, that gets hashed via DVC

View file

@ -1,3 +1,4 @@
# Ignore dynaconf secret files # Ignore dynaconf secret files
.secrets.* .secrets.*
example.py

View file

@ -29,6 +29,7 @@ data_filepath = prepare_data_params["data_filepath"]
train_proportion = prepare_data_params["train_proportion"] train_proportion = prepare_data_params["train_proportion"]
output_train_filepath = prepare_data_params["output_train_filepath"] output_train_filepath = prepare_data_params["output_train_filepath"]
output_test_filepath = prepare_data_params["output_test_filepath"] output_test_filepath = prepare_data_params["output_test_filepath"]
sample_test_filepath = prepare_data_params["sample_test_filepath"]
feature_processor_config = feature_process_params["feature_processor_config"] feature_processor_config = feature_process_params["feature_processor_config"]
logger.info(f"--- Initiate DataClient ---") logger.info(f"--- Initiate DataClient ---")
@ -99,6 +100,10 @@ def prepare_data(
logger.info("--- Outputting data ---") logger.info("--- Outputting data ---")
output_dataclient.save_data(
obj=data.sample(1), location=sample_test_filepath, save_config=None
)
output_dataclient.save_data( output_dataclient.save_data(
obj=train, location=output_train_filepath, save_config=None obj=train, location=output_train_filepath, save_config=None
) )

View file

@ -99,6 +99,12 @@ def generate_scenario_predictions(
] ]
) )
# TEMPORARY FIX: ADD is_post_sap10_starting and is_post_sap10_ending if not present
if "is_post_sap10_starting" not in scenario_data.columns:
scenario_data["is_post_sap10_starting"] = False
if "is_post_sap10_ending" not in scenario_data.columns:
scenario_data["is_post_sap10_ending"] = False
logger.info("--- Loading Model ---") logger.info("--- Loading Model ---")
model.load_model(model_filepath) model.load_model(model_filepath)

View file

@ -14,9 +14,23 @@ default:
output_filepath: ./data/model/allmodels/ output_filepath: ./data/model/allmodels/
problem_type: regression problem_type: regression
eval_metric: mean_squared_error #mean_absolute_error eval_metric: mean_squared_error #mean_absolute_error
time_limit: 1800 time_limit: 3600
presets: medium_quality presets: medium_quality
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT'] excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT', 'FASTAI']
infer_limit: 0.05 infer_limit: 1
infer_limit_batch_size: 10000 infer_limit_batch_size: 10000
fit_strategy: "parallel"
ag_args_ensemble: {'num_folds_parallel': 2} ag_args_ensemble: {'num_folds_parallel': 2}
num_gpus: 0
hyperparameters:
{
'NN_TORCH': [{}],
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0,}}],
# 'GBM': [{}],
'CAT': [{}],
'XGB': [{}],
'FASTAI': [{}],
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}

View file

@ -18,30 +18,44 @@ def remove_starting_columns(df):
return df return df
def remove_floor_height_ending(df): def keep_negative_heat_change(df):
# df.describe(percentiles=[0.005,0.99])['FLOOR_HEIGHT_ENDING'] df = df[df["heat_demand_change"] < 0]
# shows bottom 0.5 percentile is 1.665
# So keep anything above this
df = df[df["floor_height_ending"] > 1.665].reset_index(drop=True)
print("we in here")
return df return df
def remove_minimum_habitable_room_size(df): def keep_non_negative_carbon_ending(df):
# Need minimum of 6.5m per habitable room df = df[df["carbon_ending"] > 0]
df = df[
df["total_floor_area_ending"] / df["number_habitable_rooms"] > 6.5
].reset_index(drop=True)
return df return df
def keep_flats(df): def keep_negative_carbon_change(df):
df = df[df["property_type"] == "Flat"] df = df[df["carbon_change"] < 0]
return df return df
def keep_non_zero_rdsap(df): # TODO: Move to ETL pipeline
df = df[df["rdsap_change"] != 0] def remove_unreasonable_habitable_rooms(df):
"""
Assumption is that proportion of floor area to habitable rooms should be at least 6.5m2
"""
minimum_room_size_index = (
df["total_floor_area_ending"] / df["number_habitable_rooms"] >= 6.5
)
df = df[minimum_room_size_index]
return df
def remove_top_1_percent_heat_demand(df):
# threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
threshold_value = 860
df = df[df["heat_demand_starting"] < threshold_value]
return df
def remove_top_1_percent_carbon(df):
# threshold_value = df.describe(percentiles=[0.99])['CARBON_STARTING']['99%']
threshold_value = 18
df = df[df["carbon_starting"] < threshold_value]
return df return df
@ -54,10 +68,12 @@ def keep_non_zero_rdsap(df):
# return df # return df
business_logic = { business_logic = {
# "keep_non_zero_rdsap": keep_non_zero_rdsap, "remove_unreasonable_habitable_rooms": remove_unreasonable_habitable_rooms,
# "keep_flats": keep_flats, "keep_negative_heat_change": keep_negative_heat_change,
# "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size, "keep_negative_carbon_change": keep_negative_carbon_change,
# "remove_floor_height_ending": remove_floor_height_ending "remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand,
"remove_top_1_percent_carbon": remove_top_1_percent_carbon,
"keep_non_negative_carbon_ending": keep_non_negative_carbon_ending,
# "remove_starting_columns": remove_starting_columns # "remove_starting_columns": remove_starting_columns
# "keep_ENDING_COLUMNS": keep_ending_columns # "keep_ENDING_COLUMNS": keep_ending_columns
} }

View file

@ -1,23 +1,24 @@
""" """
After predictions, we may want to apply some post processing to the predictions After predictions, we may want to apply some post processing to the predictions
""" """
import pandas as pd import pandas as pd
def clip_predictions_to_minimum_value( def clip_predictions_to_minimum_value(
data: pd.DataFrame, predictions: pd.Series, minimum_value: int = 0 data: pd.DataFrame,
predictions: pd.Series,
) -> pd.Series: ) -> pd.Series:
series_name = predictions.name series_name = predictions.name
predictions.name = "predictions" predictions.name = "predictions"
predictions = predictions.astype(data["carbon_starting"].dtype)
predictions_df = pd.concat([data, predictions], axis=1) predictions_df = pd.concat([data, predictions], axis=1)
# We expect all prediction to be atleast one point improvement # We expect all prediction to be atleast one point improvement
replace_index = ( replace_index = predictions_df["predictions"] > predictions_df["carbon_starting"]
predictions_df["sap_starting"] + minimum_value > predictions_df["predictions"] predictions_df.loc[replace_index, "predictions"] = predictions_df.loc[
) replace_index, "carbon_starting"
predictions_df.loc[replace_index, "predictions"] = ( ]
predictions_df.loc[replace_index, "sap_starting"] + minimum_value
)
predictions_new = predictions_df["predictions"] predictions_new = predictions_df["predictions"]
predictions_new.name = series_name predictions_new.name = series_name
@ -30,6 +31,6 @@ def clip_predictions_to_minimum_value(
post_prediction_logic = { post_prediction_logic = {
"clip_predictions_to_minimum_value": clip_predictions_to_minimum_value, # "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
# "round_predictions": round_predictions # "round_predictions": round_predictions
} }

View file

@ -8,6 +8,6 @@ default:
# - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md metrics_output_filepath: ./metrics/scenario_metrics.md

View file

@ -12,32 +12,168 @@ default:
AWS_ACCESS_KEY_ID: minio AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123 AWS_SECRET_ACCESS_KEY: minio123
ENDPOINT_URL: http://localhost:9000 ENDPOINT_URL: http://localhost:9000
local: local: null
null
prepare_data: prepare_data:
input_dataclient_type: aws-s3 input_dataclient_type: aws-s3
output_dataclient_type: local output_dataclient_type: local
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
train_proportion: 0.9 train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet output_test_filepath: ./data/prepared_data/test.parquet
sample_test_filepath: ./data/prepared_data/sample_test.parquet
feature_processor: feature_processor:
feature_processor_type: dataframe feature_processor_type: dataframe
feature_processor_config: feature_processor_config:
subsample_amount: null subsample_amount: null
subsample_seed: 0 subsample_seed: 0
target: sap_ending target: carbon_starting
identifier_columns: ["uprn"] identifier_columns: ["uprn"]
# drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"] # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "sap_ending"]
drop_columns: [ drop_columns:
"heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending", [
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', "carbon_ending",
'number_habitable_rooms', 'number_heated_rooms'] "potential_energy_efficiency",
"environment_impact_potential",
"energy_consumption_potential",
"co2_emissions_potential",
"heat_demand_change",
"carbon_change",
"rdsap_change",
"heat_demand_starting",
"heat_demand_ending",
"sap_starting",
"sap_ending",
"days_to_starting",
"days_to_ending",
"number_habitable_rooms_starting",
"number_habitable_rooms_ending",
"number_heated_rooms_starting",
"number_heated_rooms_ending",
"number_habitable_rooms",
"number_heated_rooms",
"lighting_cost_starting",
"lighting_cost_ending",
"heating_cost_starting",
"heating_cost_ending",
"hot_water_cost_starting",
"hot_water_cost_ending",
"floor_thermal_transmittance",
"floor_thermal_transmittance_ending",
"lodgement_date_starting",
"lodgement_date_ending",
"potential_energy_efficiency",
"environment_impact_potential",
"energy_consumption_potential",
"co2_emissions_potential",
"walls_thermal_transmittance_ending",
"walls_thermal_transmittance_unit_ending",
"is_filled_cavity_ending",
"is_as_built_ending",
"walls_is_assumed_ending",
"is_park_home_ending",
"walls_insulation_thickness_ending",
"external_insulation_ending",
"internal_insulation_ending",
"floor_insulation_thickness_ending",
"roof_thermal_transmittance_ending",
"is_at_rafters_ending",
"roof_insulation_thickness_ending",
"heater_type_ending",
"system_type_ending",
"thermostat_characteristics_ending",
"heating_scope_ending",
"energy_recovery_ending",
"hotwater_tariff_type_ending",
"extra_features_ending",
"chp_systems_ending",
"distribution_system_ending",
"no_system_present_ending",
"appliance_ending",
"has_radiators_ending",
"has_fan_coil_units_ending",
"has_pipes_in_screed_above_insulation_ending",
"has_pipes_in_insulated_timber_floor_ending",
"has_pipes_in_concrete_slab_ending",
"has_boiler_ending",
"has_air_source_heat_pump_ending",
"has_room_heaters_ending",
"has_electric_storage_heaters_ending",
"has_warm_air_ending",
"has_electric_underfloor_heating_ending",
"has_electric_ceiling_heating_ending",
"has_community_scheme_ending",
"has_ground_source_heat_pump_ending",
"has_no_system_present_ending",
"has_portable_electric_heaters_ending",
"has_water_source_heat_pump_ending",
"has_electric_heat_pump_ending",
"has_micro-cogeneration_ending",
"has_solar_assisted_heat_pump_ending",
"has_exhaust_source_heat_pump_ending",
"has_community_heat_pump_ending",
"has_hot-water-only_ending",
"has_electric_ending",
"has_mains_gas_ending",
"has_wood_logs_ending",
"has_coal_ending",
"has_oil_ending",
"has_wood_pellets_ending",
"has_anthracite_ending",
"has_dual_fuel_mineral_and_wood_ending",
"has_smokeless_fuel_ending",
"has_lpg_ending",
"has_b30k_ending",
"has_mineral_and_wood_ending",
"has_dual_fuel_appliance_ending",
"has_electricaire_ending",
"has_assumed_for_most_rooms_ending",
"has_underfloor_heating_ending",
"thermostatic_control_ending",
"charging_system_ending",
"switch_system_ending",
"no_control_ending",
"dhw_control_ending",
"community_heating_ending",
"multiple_room_thermostats_ending",
"auxiliary_systems_ending",
"trvs_ending",
"rate_control_ending",
"glazing_type_ending",
"fuel_type_ending",
"main-fuel_tariff_type_ending",
"is_community_ending",
"no_individual_heating_or_community_network_ending",
"complex_fuel_type_ending",
"mechanical_ventilation_ending",
"secondheat_description_ending",
"glazed_type_ending",
"multi_glaze_proportion_ending",
"low_energy_lighting_ending",
"number_open_fireplaces_ending",
"solar_water_heating_flag_ending",
"photo_supply_ending",
"transaction_type_ending",
"energy_tariff_ending",
"extension_count_ending",
"total_floor_area_ending",
"floor_height_ending",
"hot_water_energy_eff_ending",
"floor_energy_eff_ending",
"windows_energy_eff_ending",
"walls_energy_eff_ending",
"sheating_energy_eff_ending",
"roof_energy_eff_ending",
"mainheat_energy_eff_ending",
"mainheatc_energy_eff_ending",
"lighting_energy_eff_ending",
"is_post_sap10_ending",
"estimated_perimeter_ending",
]
# retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
retain_features: null retain_features: null
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending', # 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
@ -78,4 +214,4 @@ default:
dev: dev:
generate_predictions: generate_predictions:
input_dataclient_type: aws-s3 input_dataclient_type: aws-s3

View file

@ -1,4 +1,4 @@
"""" """ "
Implementations of MLModels, all of which will have four methods to: Implementations of MLModels, all of which will have four methods to:
- Load model - Load model
- Save Model - Save Model
@ -11,9 +11,6 @@ import joblib
import pandas as pd import pandas as pd
from pathlib import Path from pathlib import Path
from typing import Union, List from typing import Union, List
from sklearn import linear_model
from sklearn.svm import SVR
from autogluon.tabular import TabularDataset, TabularPredictor
from core.interface.InterfaceModels import MLModel from core.interface.InterfaceModels import MLModel
from core.Logger import logger from core.Logger import logger
@ -69,6 +66,8 @@ class SKLearnLinearRegression:
""" """
Method to train a model Method to train a model
""" """
from sklearn import linear_model
self.model = linear_model.LinearRegression() self.model = linear_model.LinearRegression()
x_train = data.iloc[:, data.columns != target] x_train = data.iloc[:, data.columns != target]
@ -117,6 +116,7 @@ class SKLearnSVMRegression:
""" """
Method to train a model Method to train a model
""" """
from sklearn.svm import SVR
validate_dict_keys( validate_dict_keys(
list(model_hyperparameters.keys()), list(model_hyperparameters.keys()),
@ -152,12 +152,17 @@ class AutogluonAutoML:
"infer_limit", "infer_limit",
"infer_limit_batch_size", "infer_limit_batch_size",
"ag_args_ensemble", "ag_args_ensemble",
"fit_strategy",
"num_gpus",
"hyperparameters",
] ]
def load_model(self, path: Union[Path, str]) -> None: def load_model(self, path: Union[Path, str]) -> None:
""" """
Method to load a model Method to load a model
""" """
from autogluon.tabular import TabularPredictor
filepath = str(path) filepath = str(path)
self.model = TabularPredictor.load(path=filepath) self.model = TabularPredictor.load(path=filepath)
@ -183,6 +188,10 @@ class AutogluonAutoML:
""" """
Method to train a model Method to train a model
""" """
from autogluon.tabular import TabularDataset, TabularPredictor
# Force Parallel Model fitting
os.environ["AG_FORCE_PARALLEL"] = "True"
validate_dict_keys( validate_dict_keys(
keys_1=list(model_hyperparameters.keys()), keys_1=list(model_hyperparameters.keys()),
@ -209,6 +218,9 @@ class AutogluonAutoML:
infer_limit=model_hyperparameters["infer_limit"], infer_limit=model_hyperparameters["infer_limit"],
infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"], infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"], ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
fit_strategy=model_hyperparameters["fit_strategy"],
num_gpus=model_hyperparameters["num_gpus"],
hyperparameters=model_hyperparameters["hyperparameters"].to_dict(),
) )
def predict( def predict(

View file

@ -16,16 +16,23 @@ stages:
deps: deps:
- path: 1_prepare_data.py - path: 1_prepare_data.py
hash: md5 hash: md5
md5: 11a3b8bfdfe199ab7ecc39ccc5652649 md5: a5ce162e1c402c0f811a80ef78cf4dd5
size: 4298 size: 4481
params: params:
configs/settings.yaml: configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns: default.feature_processor.feature_processor_config.drop_columns:
- carbon_ending
- potential_energy_efficiency
- environment_impact_potential
- energy_consumption_potential
- co2_emissions_potential
- heat_demand_change - heat_demand_change
- carbon_change - carbon_change
- rdsap_change - rdsap_change
- heat_demand_starting
- heat_demand_ending - heat_demand_ending
- carbon_ending - sap_starting
- sap_ending
- days_to_starting - days_to_starting
- days_to_ending - days_to_ending
- number_habitable_rooms_starting - number_habitable_rooms_starting
@ -34,24 +41,144 @@ stages:
- number_heated_rooms_ending - number_heated_rooms_ending
- number_habitable_rooms - number_habitable_rooms
- number_heated_rooms - number_heated_rooms
- lighting_cost_starting
- lighting_cost_ending
- heating_cost_starting
- heating_cost_ending
- hot_water_cost_starting
- hot_water_cost_ending
- floor_thermal_transmittance
- floor_thermal_transmittance_ending
- lodgement_date_starting
- lodgement_date_ending
- potential_energy_efficiency
- environment_impact_potential
- energy_consumption_potential
- co2_emissions_potential
- walls_thermal_transmittance_ending
- walls_thermal_transmittance_unit_ending
- is_filled_cavity_ending
- is_as_built_ending
- walls_is_assumed_ending
- is_park_home_ending
- walls_insulation_thickness_ending
- external_insulation_ending
- internal_insulation_ending
- floor_insulation_thickness_ending
- roof_thermal_transmittance_ending
- is_at_rafters_ending
- roof_insulation_thickness_ending
- heater_type_ending
- system_type_ending
- thermostat_characteristics_ending
- heating_scope_ending
- energy_recovery_ending
- hotwater_tariff_type_ending
- extra_features_ending
- chp_systems_ending
- distribution_system_ending
- no_system_present_ending
- appliance_ending
- has_radiators_ending
- has_fan_coil_units_ending
- has_pipes_in_screed_above_insulation_ending
- has_pipes_in_insulated_timber_floor_ending
- has_pipes_in_concrete_slab_ending
- has_boiler_ending
- has_air_source_heat_pump_ending
- has_room_heaters_ending
- has_electric_storage_heaters_ending
- has_warm_air_ending
- has_electric_underfloor_heating_ending
- has_electric_ceiling_heating_ending
- has_community_scheme_ending
- has_ground_source_heat_pump_ending
- has_no_system_present_ending
- has_portable_electric_heaters_ending
- has_water_source_heat_pump_ending
- has_electric_heat_pump_ending
- has_micro-cogeneration_ending
- has_solar_assisted_heat_pump_ending
- has_exhaust_source_heat_pump_ending
- has_community_heat_pump_ending
- has_hot-water-only_ending
- has_electric_ending
- has_mains_gas_ending
- has_wood_logs_ending
- has_coal_ending
- has_oil_ending
- has_wood_pellets_ending
- has_anthracite_ending
- has_dual_fuel_mineral_and_wood_ending
- has_smokeless_fuel_ending
- has_lpg_ending
- has_b30k_ending
- has_mineral_and_wood_ending
- has_dual_fuel_appliance_ending
- has_electricaire_ending
- has_assumed_for_most_rooms_ending
- has_underfloor_heating_ending
- thermostatic_control_ending
- charging_system_ending
- switch_system_ending
- no_control_ending
- dhw_control_ending
- community_heating_ending
- multiple_room_thermostats_ending
- auxiliary_systems_ending
- trvs_ending
- rate_control_ending
- glazing_type_ending
- fuel_type_ending
- main-fuel_tariff_type_ending
- is_community_ending
- no_individual_heating_or_community_network_ending
- complex_fuel_type_ending
- mechanical_ventilation_ending
- secondheat_description_ending
- glazed_type_ending
- multi_glaze_proportion_ending
- low_energy_lighting_ending
- number_open_fireplaces_ending
- solar_water_heating_flag_ending
- photo_supply_ending
- transaction_type_ending
- energy_tariff_ending
- extension_count_ending
- total_floor_area_ending
- floor_height_ending
- hot_water_energy_eff_ending
- floor_energy_eff_ending
- windows_energy_eff_ending
- walls_energy_eff_ending
- sheating_energy_eff_ending
- roof_energy_eff_ending
- mainheat_energy_eff_ending
- mainheatc_energy_eff_ending
- lighting_energy_eff_ending
- is_post_sap10_ending
- estimated_perimeter_ending
default.feature_processor.feature_processor_config.retain_features: default.feature_processor.feature_processor_config.retain_features:
default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_config.target:
carbon_starting
default.feature_processor.feature_processor_type: dataframe default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath: default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet default.prepare_data.output_test_filepath:
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath:
./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 0.9 default.prepare_data.train_proportion: 0.9
outs: outs:
- path: data/prepared_data/ - path: data/prepared_data/
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: e40fba0aee32d4b7120d7cbe633040b0.dir
size: 45056059 size: 21607047
nfiles: 2 nfiles: 3
build_model: build_model:
cmd: python 2_build_model.py cmd: python 2_build_model.py
deps: deps:
@ -61,9 +188,9 @@ stages:
size: 4820 size: 4820
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: e40fba0aee32d4b7120d7cbe633040b0.dir
size: 45056059 size: 21607047
nfiles: 2 nfiles: 3
params: params:
configs/build_model.yaml: configs/build_model.yaml:
default: default:
@ -79,7 +206,7 @@ stages:
output_filepath: ./data/model/allmodels/ output_filepath: ./data/model/allmodels/
problem_type: regression problem_type: regression
eval_metric: mean_squared_error eval_metric: mean_squared_error
time_limit: 1800 time_limit: 3600
presets: medium_quality presets: medium_quality
excluded_model_types: excluded_model_types:
- RF - RF
@ -87,25 +214,94 @@ stages:
- NN_TORCH - NN_TORCH
- KNN - KNN
- XT - XT
infer_limit: 0.05 - FASTAI
infer_limit: 1
infer_limit_batch_size: 10000 infer_limit_batch_size: 10000
fit_strategy: parallel
ag_args_ensemble: ag_args_ensemble:
num_folds_parallel: 2 num_folds_parallel: 2
num_gpus: 0
hyperparameters:
NN_TORCH:
- {}
GBM:
- extra_trees: true
ag_args:
name_suffix: XT
- {}
- learning_rate: 0.03
num_leaves: 128
feature_fraction: 0.9
min_data_in_leaf: 3
ag_args:
name_suffix: Large
priority: 0
CAT:
- {}
XGB:
- {}
FASTAI:
- {}
RF:
- criterion: gini
ag_args:
name_suffix: Gini
problem_types:
- binary
- multiclass
- criterion: entropy
ag_args:
name_suffix: Entr
problem_types:
- binary
- multiclass
- criterion: squared_error
ag_args:
name_suffix: MSE
problem_types:
- regression
- quantile
XT:
- criterion: gini
ag_args:
name_suffix: Gini
problem_types:
- binary
- multiclass
- criterion: entropy
ag_args:
name_suffix: Entr
problem_types:
- binary
- multiclass
- criterion: squared_error
ag_args:
name_suffix: MSE
problem_types:
- regression
- quantile
KNN:
- weights: uniform
ag_args:
name_suffix: Unif
- weights: distance
ag_args:
name_suffix: Dist
outs: outs:
- path: data/fit_predictions/ - path: data/fit_predictions/
hash: md5 hash: md5
md5: d9c9afc05e8780db47c0548b19bf7d19.dir md5: 81df2583046d6611c871df31b943ccbe.dir
size: 3349989 size: 2733019
nfiles: 1 nfiles: 1
- path: data/model/ - path: data/model/
hash: md5 hash: md5
md5: 13c3100e1486c27a83a8a47491077842.dir md5: bfd74e6b160e063b0d9e1f40b3bd0e26.dir
size: 773523079 size: 598129590
nfiles: 36 nfiles: 31
- path: metrics/fit_metrics.json - path: metrics/fit_metrics.json
hash: md5 hash: md5
md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a md5: e344820734a823327e3cc8c43bf2f6bc
size: 224 size: 212
generate_predictions: generate_predictions:
cmd: python 3_generate_predictions.py cmd: python 3_generate_predictions.py
deps: deps:
@ -115,26 +311,28 @@ stages:
size: 2464 size: 2464
- path: data/model - path: data/model
hash: md5 hash: md5
md5: 13c3100e1486c27a83a8a47491077842.dir md5: bfd74e6b160e063b0d9e1f40b3bd0e26.dir
size: 773523079 size: 598129590
nfiles: 36 nfiles: 31
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: e40fba0aee32d4b7120d7cbe633040b0.dir
size: 45056059 size: 21607047
nfiles: 2 nfiles: 3
params: params:
configs/settings.yaml: configs/settings.yaml:
default.generate_predictions.input_dataclient_type: local default.generate_predictions.input_dataclient_type: local
default.generate_predictions.output_dataclient_type: local default.generate_predictions.output_dataclient_type: local
default.generate_predictions.predictions_column_name: predictions default.generate_predictions.predictions_column_name: predictions
default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet default.generate_predictions.predictions_output_filepath:
default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet ./data/predictions/predictions.parquet
default.generate_predictions.test_data_filepath:
./data/prepared_data/test.parquet
outs: outs:
- path: data/predictions/ - path: data/predictions/
hash: md5 hash: md5
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir md5: f4cf2a393b36cf49a0722889344a0016.dir
size: 463197 size: 392930
nfiles: 1 nfiles: 1
generate_metrics: generate_metrics:
cmd: python 4_generate_metrics.py cmd: python 4_generate_metrics.py
@ -145,14 +343,14 @@ stages:
size: 3484 size: 3484
- path: data/predictions - path: data/predictions
hash: md5 hash: md5
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir md5: f4cf2a393b36cf49a0722889344a0016.dir
size: 463197 size: 392930
nfiles: 1 nfiles: 1
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: e40fba0aee32d4b7120d7cbe633040b0.dir
size: 45056059 size: 21607047
nfiles: 2 nfiles: 3
params: params:
configs/settings.yaml: configs/settings.yaml:
default.generate_metrics.dataclient_type: local default.generate_metrics.dataclient_type: local
@ -161,30 +359,29 @@ stages:
outs: outs:
- path: metrics/metrics.json - path: metrics/metrics.json
hash: md5 hash: md5
md5: 3e08df02fd5c5d094bcf936e1338d596 md5: 3d87652cffab141e1d5b4307761ec788
size: 223 size: 223
generate_scenerio_metrics: generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py cmd: python 5_generate_scenarios.py
deps: deps:
- path: 5_generate_scenarios.py - path: 5_generate_scenarios.py
hash: md5 hash: md5
md5: 40506749fefd926d47c60ff5b16db307 md5: 872b0c762ce1c8933fcbc5f54d5d4b5d
size: 5337 size: 5658
params: params:
configs/scenarios.yaml: configs/scenarios.yaml:
default.scenarios: default.scenarios:
input_dataclient_type: aws-s3 input_dataclient_type: aws-s3
output_dataclient_type: local output_dataclient_type: local
scenario_data_filepaths: scenario_data_filepaths:
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md metrics_output_filepath: ./metrics/scenario_metrics.md
outs: outs:
- path: metrics/scenario_metrics.md - path: metrics/scenario_metrics.md
hash: md5 hash: md5
md5: fa4d6d7bbd7818613800da5f8f37ea96 md5: d41d8cd98f00b204e9800998ecf8427e
size: 363 size: 0
- path: metrics/scenario_table.md - path: metrics/scenario_table.md
hash: md5 hash: md5
md5: d6baf100a1623cc2467c2f8221d314c9 md5: d41d8cd98f00b204e9800998ecf8427e
size: 2133 size: 0

View file

@ -1,7 +1,7 @@
joblib==1.3.2 joblib==1.5.2
boto3==1.28.17 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
dynaconf==3.2.1 dynaconf==3.2.12
pyarrow==13.0.0 pyarrow==20.0.0
pre-commit==3.3.3 pre-commit==4.3.0

View file

@ -1,7 +1,7 @@
joblib==1.3.2 joblib==1.5.2
boto3==1.28.17 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
dynaconf==3.2.1 dynaconf==3.2.12
pyarrow==13.0.0 pyarrow==20.0.0
PyYAML==6.0.1 PyYAML==6.0.3

View file

@ -1,10 +1,10 @@
joblib==1.3.2 joblib==1.5.2
boto3==1.28.17 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
ray==2.6.3 ray==2.44.1
dynaconf==3.2.1 dynaconf==3.2.12
alibi==0.9.5 # alibi
shap==0.42.1 shap==0.49.1
pyarrow==13.0.0 pyarrow==20.0.0
pre-commit==3.3.3 pre-commit==4.3.0

View file

@ -1,4 +1,4 @@
boto3==1.28.41 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
dynaconf==3.2.1 dynaconf==3.2.12

View file

@ -1,4 +1,5 @@
dvc==3.51.0 dvc==3.66.0
dvc-s3==3.2.0 dvc-s3==3.2.2
gto==1.7.1 gto==1.9.0
pyOpenSSL==23.3.0 pyOpenSSL==23.3.0
boto3==1.40.61