Compare commits

...

94 commits

Author SHA1 Message Date
quandanrepo
7be5cf8de3
Merge pull request #171 from Hestia-Homes/sap_baseline-dev-model
Sap baseline dev model
2026-01-12 18:40:16 +00:00
Michael Duong
c4b402a720 Merge branch 'sap_baseline-dev' of github.com:Hestia-Homes/ML into sap_baseline-dev-model 2026-01-12 18:38:55 +00:00
Michael Duong
295ed96371 add logging for disk space 2026-01-12 18:38:44 +00:00
Github-Bot
39211b161a Update Registry 2026-01-12 17:58:30 +00:00
Github-Bot
faaaf66f1f Update Registry 2026-01-12 17:57:56 +00:00
quandanrepo
5e5aa8009a
Merge pull request #170 from Hestia-Homes/sap_baseline-dev-model
Sap baseline dev model
2026-01-12 17:57:21 +00:00
Michael Duong
fc8f41db6f Merge branch 'sap_baseline-dev' of github.com:Hestia-Homes/ML into sap_baseline-dev-model 2026-01-12 17:54:16 +00:00
Michael Duong
bcb8c6a2a1 remove quotes 2026-01-12 17:54:05 +00:00
Github-Bot
e430d1bb17 Update Registry 2026-01-12 16:58:29 +00:00
Github-Bot
0fdd416687 Update Registry 2026-01-12 16:57:56 +00:00
quandanrepo
11db6b4f5e
Merge pull request #169 from Hestia-Homes/sap_baseline-dev-model
Sap baseline dev model
2026-01-12 16:57:23 +00:00
Michael Duong
6dfb81095c add boto3 2026-01-12 16:55:04 +00:00
Michael Duong
6b3d9a4fa5 Merge branch 'sap_baseline-dev' of github.com:Hestia-Homes/ML into sap_baseline-dev-model 2026-01-12 16:53:15 +00:00
Michael Duong
3f8c0f91d5 add boto3 2026-01-12 16:53:02 +00:00
Github-Bot
c8b338d907 Update Registry 2026-01-12 16:48:22 +00:00
Github-Bot
41b8a4752f Update Registry 2026-01-12 16:47:40 +00:00
quandanrepo
002540a3d6
Merge pull request #166 from Hestia-Homes/sap_baseline-dev-model
Sap baseline dev model - initial te
2026-01-12 16:47:09 +00:00
Michael Duong
4bec54c7fa amend the deploy file 2026-01-09 10:05:08 +00:00
Michael Duong
1ed368f507 remove carbon and heating starting columns 2026-01-09 09:33:59 +00:00
Michael Duong
be5d014759 change model name 2026-01-09 09:05:51 +00:00
Michael Duong
953abe63e9 update dvc 2026-01-08 23:13:00 +00:00
Michael Duong
0b23e04b89 test workflow 2026-01-08 23:06:33 +00:00
Michael Duong
c30755ffed add workflow 2026-01-08 23:04:56 +00:00
Michael Duong
767997c38f test simple model 2026-01-08 22:59:09 +00:00
Michael Duong
e07839249a Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev 2026-01-08 22:11:17 +00:00
Github-Bot
171089410c Update Registry 2025-11-04 18:37:49 +00:00
Github-Bot
2e1eec8f93 Update Registry 2025-11-04 18:37:12 +00:00
quandanrepo
15f3cb9d9b
Merge pull request #160 from Hestia-Homes/sap-dev-update-2
use serverless max memory of 3008
2025-11-04 18:36:38 +00:00
Michael Duong
1ca096bd44 use serverless max memory of 3008 2025-11-04 18:35:13 +00:00
Github-Bot
ec29ef093a Update Registry 2025-11-04 18:16:55 +00:00
Github-Bot
13fbdebc8b Update Registry 2025-11-04 18:16:21 +00:00
quandanrepo
c1a1229ea9
Merge pull request #159 from Hestia-Homes/sap-dev-update-2
update the serverless.yml file
2025-11-04 18:15:47 +00:00
Michael Duong
e20c6d1bae fix location 2025-11-04 18:13:40 +00:00
Michael Duong
1e2968763c update the serverless.yml file 2025-11-04 18:10:28 +00:00
Github-Bot
2df774c1b1 Update Registry 2025-11-04 17:35:26 +00:00
Github-Bot
62deab922b Update Registry 2025-11-04 17:34:51 +00:00
quandanrepo
7091143b33
Merge pull request #158 from Hestia-Homes/sap-dev-update-2
Sap dev update 2
2025-11-04 17:34:19 +00:00
Michael Duong
3e38b572ce remove the specific fast ai prediction 2025-11-04 16:52:24 +00:00
Michael Duong
9cd9208e3b try without fast ai 2025-11-04 16:50:49 +00:00
Github-Bot
3231c0724a Update Registry 2025-11-04 16:24:47 +00:00
Github-Bot
c1dda84206 Update Registry 2025-11-04 16:24:08 +00:00
quandanrepo
98182c029f
Merge pull request #157 from Hestia-Homes/sap-dev-update-2
trigger only one model to warm up, speed up dataframe generation
2025-11-04 16:23:31 +00:00
Michael Duong
175bf87a95 trigger only one model to warm up, speed up dataframe generation 2025-11-04 16:16:43 +00:00
Github-Bot
3b9699038a Update Registry 2025-11-04 14:45:59 +00:00
Github-Bot
284cfca3a9 Update Registry 2025-11-04 14:45:21 +00:00
quandanrepo
cf7143ff0b
Merge pull request #156 from Hestia-Homes/sap-dev-update-2
change the MPLCONFIGDIR to /tmp/matplotlib
2025-11-04 14:44:46 +00:00
Michael Duong
454e86db9c change the MPLCONFIGDIR to /tmp/matplotlib 2025-11-04 14:34:58 +00:00
Github-Bot
da1d815919 Update Registry 2025-11-04 13:25:33 +00:00
Github-Bot
9fa44d8aca Update Registry 2025-11-04 13:24:56 +00:00
quandanrepo
a1d1ced6c2
Merge pull request #154 from Hestia-Homes/sap-dev-update-2
update gto
2025-11-04 13:24:22 +00:00
Michael Duong
630c3586ff update gto 2025-11-04 13:16:17 +00:00
KhalimCK
e78e1bc1cf
Merge pull request #153 from Hestia-Homes/sap-dev-update-2
Sap dev update 2
2025-11-04 13:09:33 +00:00
Michael Duong
ed36032519 add info to readme 2025-11-03 23:18:29 +00:00
Michael Duong
87b3c27986 clean up prediction app and add logging 2025-11-03 23:11:26 +00:00
Michael Duong
b333b80d5c add a row of fake data 2025-11-03 22:58:28 +00:00
Michael Duong
d7370248a2 use model columns as data 2025-11-03 22:40:35 +00:00
Michael Duong
2c8f014258 remove testing from first one 2025-11-03 18:11:49 +00:00
Michael Duong
a20d0e6762 add dummy invoke 2025-11-03 18:05:33 +00:00
Michael Duong
e0fc65ec8a add conditional imports 2025-11-03 17:47:20 +00:00
Michael Duong
d331ee1649 try multiple invocations 2025-11-03 17:38:38 +00:00
Michael Duong
a0a3d222d8 new model 2025-11-03 17:24:49 +00:00
Michael Duong
6220cd17d3 faster inference 2025-11-03 14:43:26 +00:00
Michael Duong
541f2b2689 change libomp to conda install instead of brew due to segmentation errors, update back to 1.4 2025-11-03 14:03:19 +00:00
Michael Duong
bdc177baa9 roll back to autogluon 1.3.0 due to stabiulity issue 2025-11-02 22:49:36 +00:00
Michael Duong
91d6455cdf update pyarrow version 2025-11-02 17:32:27 +00:00
Michael Duong
43aacd80be update to autogluon 1.4 2025-11-02 17:26:40 +00:00
Michael Duong
2c735737a8 use correct escaping 2025-11-02 14:24:58 +00:00
Michael Duong
56eace2b7f correct the quotations 2025-11-02 14:03:35 +00:00
Michael Duong
ad26148bbc test static folder 2025-11-02 13:54:16 +00:00
Michael Duong
51f2c07b74 add delay to deleting of s3 file 2025-11-02 13:46:57 +00:00
Michael Duong
c3a7866df4 make changes to request body 2025-11-02 13:34:34 +00:00
Michael Duong
96bfeb92f9 test curling 2025-11-02 13:24:54 +00:00
Michael Duong
e04f6125e0 add single row dataset for testing 2025-11-02 13:11:33 +00:00
Michael Duong
ab3b2bb1d0 adjust change yum to dnf 2025-11-02 12:03:24 +00:00
Michael Duong
d4b70ecc7c adjust build location 2025-11-02 12:00:55 +00:00
Michael Duong
a083934ffb changed docker images python version and add github action 2025-11-02 11:53:44 +00:00
Michael Duong
fcdf5228d6 fix gitignore 2025-11-02 11:42:29 +00:00
Michael Duong
7b001f3abf update to python 3.12 and autogluon 1.3 2025-11-02 11:38:40 +00:00
Michael Duong
88c5b6c93a add new model, with latest data 2025-09-15 14:56:53 +01:00
Github-Bot
ce1ebb6174 Update Registry 2024-10-16 16:03:39 +00:00
Github-Bot
5a67ba1e15 Update Registry 2024-10-16 16:03:02 +00:00
KhalimCK
c76913e31b
Merge pull request #150 from Hestia-Homes/sap-dev-floorthermal
Sap dev floorthermal
2024-10-16 17:02:27 +01:00
Michael Duong
dbcb839be8 add new scenario data 2024-10-09 21:47:15 +01:00
Michael Duong
5166493eda model with no floor_thermal starting and ending 2024-10-09 16:25:04 +01:00
Michael Duong
9f784f72f6 Merge branch 'sap-dev' of github.com:Hestia-Homes/ML into sap-dev-model 2024-10-09 11:47:48 +01:00
Michael Duong
037c1c9e5f sap model with changed u values data 2024-10-09 11:47:28 +01:00
Github-Bot
db5b795ce7 Update Registry 2024-10-08 14:32:04 +00:00
Github-Bot
f9625e4575 Update Registry 2024-10-08 14:31:27 +00:00
KhalimCK
c12af458d4
Merge pull request #145 from Hestia-Homes/sap-dev-model
add model with new data
2024-10-08 15:30:49 +01:00
Michael Duong
c19eece16c add new scenario data to run 2024-10-08 15:10:04 +01:00
Michael Duong
9ed90fec16 add scenario data 2024-10-07 16:40:31 +01:00
Michael Duong
9d0ae55f15 add scenario data 2024-10-07 16:36:47 +01:00
Michael Duong
8858052b1f add fix for pip issues 2024-10-06 11:16:40 +01:00
Michael Duong
3c47d59fb9 add model with new data 2024-10-04 23:17:15 +01:00
23 changed files with 943 additions and 353 deletions

View file

@ -2,7 +2,17 @@ name: Sap Change Model Deploy
on: on:
push: push:
branches: [ sap-dev, sap-prod, heat-dev, heat-prod, carbon-dev, carbon-prod] branches:
[
sap-dev,
sap-prod,
heat-dev,
heat-prod,
carbon-dev,
carbon-prod,
sap_baseline-dev,
sap_baseline-prod,
]
jobs: jobs:
deploy: deploy:
@ -85,10 +95,19 @@ jobs:
aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }} aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2 aws-region: eu-west-2
- name: Check Disk Usage
run: |
df -h /
du -sh /* 2>/dev/null | sort -hr | head -10
uname -m # Bonus: Confirms x64 vs. aarch64
- name: DVC Pull - name: DVC Pull
run: | run: |
cd modules/ml-pipeline/src/pipeline cd modules/ml-pipeline/src/pipeline
dvc pull -r ${{ steps.set_runtime_environment.outputs.runtime_environment }} dvc pull -r ${{ steps.set_runtime_environment.outputs.runtime_environment }}
cd data/model
ls
rm -r allmodels
- name: Setup Docker - name: Setup Docker
uses: docker/setup-buildx-action@v1 uses: docker/setup-buildx-action@v1

View file

@ -13,6 +13,7 @@ on:
- "sap-dev" - "sap-dev"
- "heat-dev" - "heat-dev"
- "carbon-dev" - "carbon-dev"
- "sap_baseline-dev"
permissions: write-all permissions: write-all
@ -180,7 +181,12 @@ jobs:
dvc push -r dev dvc push -r dev
Register-New-Model-Dev: Register-New-Model-Dev:
needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev] needs:
[
Register-Major-Model-Dev,
Register-Minor-Model-Dev,
Register-Patch-Model-Dev,
]
if: | if: |
always() && always() &&
(needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') && (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') &&

View file

@ -5,14 +5,13 @@ on:
# branches: # branches:
# - "model-**" # - "model-**"
pull_request: pull_request:
branches: ["sap-dev", "heat-dev", "carbon-dev"] branches: ["sap-dev", "heat-dev", "carbon-dev", "sap_baseline-dev"]
label: label:
types: ["created", "edited"] types: ["created", "edited"]
permissions: write-all permissions: write-all
jobs: jobs:
Check-Label: Check-Label:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
@ -32,8 +31,91 @@ jobs:
# echo "Please choose one of these tags: 'major', 'major', 'patch'" # echo "Please choose one of these tags: 'major', 'major', 'patch'"
# exit(1) # exit(1)
Verify-Model: Verify-Lambda:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install packages to retrieve artifacts
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt
- name: Retrieve artifacts (dvc.lock)
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-pipeline/src/pipeline
dvc pull -r experiments
- name: Set timestamp
id: set_timestamp
run: |
echo "timestamp=$(date +%Y%m%d)" >> $GITHUB_ENV
echo "Generated timestamp: ${timestamp}"
- name: Upload sample row dataset to S3
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-pipeline/src/pipeline/data/prepared_data/
aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet
- name: Build Lambda docker Image
run: |
docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
- name: Run lambda docker container
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
docker run -d -p 9000:8080 \
-e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
-e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
-e RUNTIME_ENVIRONMENT=dev \
-e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
- name: Test Lambda endpoint
run: |
sleep 2
curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
-H "Content-Type: application/json" \
-d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"warm\\\": true}\"}"
- name: Get Lambda logs
run: |
docker logs $(docker ps -al -q)
- name: Test Lambda endpoint again
run: |
sleep 2
curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
-H "Content-Type: application/json" \
-d "{\"body\": \"{\\\"file_location\\\": \\\"s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/sample_test.parquet\\\", \\\"property_id\\\": 1, \\\"portfolio_id\\\": 4, \\\"created_at\\\": \\\"now\\\", \\\"testing\\\": true}\"}"
- name: Get Lambda logs
run: |
docker logs $(docker ps -al -q)
- name: Stop Lambda container
run: |
docker stop lambda_test || echo "Container already stopped"
- name: Remove uploaded sample row dataset from S3
if: always()
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
aws s3 rm --recursive s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}/
Verify-Model:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
@ -64,7 +146,6 @@ jobs:
docker run prediction_test docker run prediction_test
Trigger-CML: Trigger-CML:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:

View file

@ -8,25 +8,73 @@
"active": true "active": true
}, },
"sap": { "sap": {
"version": "v0.14.0", "version": "v0.17.5",
"stage": { "stage": {
"dev": "v0.14.0" "dev": "v0.17.5"
}, },
"registered": true, "registered": true,
"active": true "active": true
}, },
"heat": { "heat": {
"version": "v0.5.0", "version": "v0.8.0",
"stage": { "stage": {
"dev": "v0.5.0" "dev": "v0.8.0"
}, },
"registered": true, "registered": true,
"active": true "active": true
}, },
"carbon": { "carbon": {
"version": "v0.5.0", "version": "v0.8.0",
"stage": { "stage": {
"dev": "v0.5.0" "dev": "v0.8.0"
},
"registered": true,
"active": true
},
"hotwater": {
"version": "v1.0.0",
"stage": {
"dev": "v1.0.0"
},
"registered": true,
"active": true
},
"heating": {
"version": "v1.0.0",
"stage": {
"dev": "v1.0.0"
},
"registered": true,
"active": true
},
"lighting": {
"version": "v1.0.0",
"stage": {
"dev": "v1.0.0"
},
"registered": true,
"active": true
},
"hotwaterkwh": {
"version": "v1.4.0",
"stage": {
"dev": "v1.4.0"
},
"registered": true,
"active": true
},
"heatingkwh": {
"version": "v1.6.0",
"stage": {
"dev": "v1.6.0"
},
"registered": true,
"active": true
},
"sap_baseline": {
"version": "v1.0.2",
"stage": {
"dev": "v1.0.2"
}, },
"registered": true, "registered": true,
"active": true "active": true

View file

@ -83,3 +83,13 @@ curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d
``` ```
This will send a POST request to the running Lambda function and pass in the required data as JSON. This will send a POST request to the running Lambda function and pass in the required data as JSON.
For the testing of warm or testing of the lambda, use:
```json
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"testing\": \"true\"}"}'
```
or
```json
curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\", \"warm\": \"true\"}"}'
```

View file

@ -1,19 +1,24 @@
FROM public.ecr.aws/lambda/python:3.10 FROM public.ecr.aws/lambda/python:3.12
# Set the working directory # Set the working directory
WORKDIR ${LAMBDA_TASK_ROOT} WORKDIR ${LAMBDA_TASK_ROOT}
ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}" ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
ENV MPLCONFIGDIR="/tmp/matplotlib"
# Environment variables # Environment variables
ARG RUNTIME_ENVIRONMENT ARG RUNTIME_ENVIRONMENT
ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT} ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
# Install necessary build tools - required to test locally # Install necessary build tools - required to test locally
RUN yum install -y gcc python3-devel gcc-c++ RUN dnf install -y gcc python3-devel gcc-c++
# Install python packages # Install python packages
COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r ./requirements.txt
RUN pip install uv
RUN uv pip install -r requirements.txt --system
# RUN pip install --no-cache-dir -r ./requirements.txt
# Copy the project code # Copy the project code
COPY modules/ml-pipeline/src/pipeline ./pipeline COPY modules/ml-pipeline/src/pipeline ./pipeline

View file

@ -47,6 +47,30 @@ def upload_dataframe_to_s3(df, bucket, s3_file_name):
return False return False
def warming_up_invocation(
model,
model_filepath: str,
):
"""
Function to handle warm up invocations
"""
import pandas as pd
import numpy as np
model.load_model(model_filepath)
warmup_df = pd.DataFrame(
np.zeros((1, len(model.model.original_features))),
columns=model.model.original_features,
)
# model_names = model.model.model_names()
# if "NeuralNetFastAI" in model_names:
# model.model.predict(warmup_df, model="NeuralNetFastAI")
# else:
model.predict(data=warmup_df)
def handler(event, context): def handler(event, context):
""" """
Take in event and trigger the prediction pipeline Take in event and trigger the prediction pipeline
@ -66,9 +90,6 @@ def handler(event, context):
created_at = body["created_at"] created_at = body["created_at"]
# TODO: Implement the loading of the model and prediction # TODO: Implement the loading of the model and prediction
storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
logger.info(f"--- Initiate MLModel ---") logger.info(f"--- Initiate MLModel ---")
build_model_params = settings.build_model build_model_params = settings.build_model
@ -78,6 +99,32 @@ def handler(event, context):
model = model_factory(build_model_params["model_type"]) model = model_factory(build_model_params["model_type"])
model_filepath = build_model_params["model_save_filepath"]
if "warm" in body:
logger.info("Warm up invocation - synthetic prediction")
warming_up_invocation(model=model, model_filepath=model_filepath)
return {
"statusCode": 200,
"body": json.dumps(
{
"message": "Successfully warmed up invocation",
}
),
}
if "testing" in body:
logger.info(
"Testing invocation for CI/CD - save file to same location in S3"
)
storage_filepath = body["file_location"].replace(
".parquet", "_output.parquet"
)
else:
storage_filepath = f"s3://{PREDICTIONS_BUCKET}/{portfolio_id}/{property_id}/{created_at}.parquet"
logger.info(f"--- Initiate Input DataClient ---") logger.info(f"--- Initiate Input DataClient ---")
input_dataclient = dataclient_factory( input_dataclient = dataclient_factory(
dataclient_type="aws-s3", dataclient_type="aws-s3",
@ -95,7 +142,7 @@ def handler(event, context):
output_dataclient=output_dataclient, output_dataclient=output_dataclient,
model=model, model=model,
target=feature_process_params["feature_processor_config"]["target"], target=feature_process_params["feature_processor_config"]["target"],
model_filepath=build_model_params["model_save_filepath"], model_filepath=model_filepath,
test_data_filepath=body["file_location"], test_data_filepath=body["file_location"],
predictions_output_filepath=storage_filepath, predictions_output_filepath=storage_filepath,
predictions_column_name=generate_predictions_params[ predictions_column_name=generate_predictions_params[

View file

@ -51,3 +51,4 @@ functions:
path: /predict path: /predict
method: POST method: POST
timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed
memorySize: 3008

View file

@ -1,7 +1,8 @@
export PYENV_ROOT=$(HOME)/.pyenv export PYENV_ROOT=$(HOME)/.pyenv
export PATH := $(PYENV_ROOT)/bin:$(PATH) export PATH := $(PYENV_ROOT)/bin:$(PATH)
PYTHON_VERSION ?= 3.10.12 PYTHON_VERSION ?= 3.12.12
CONDA_ENV=dev_env_pipeline CONDA_ENV=dev_env_pipeline
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
.PHONY: init .PHONY: init
init: dev-conda init: dev-conda
@ -12,11 +13,15 @@ dev-conda:
# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously" # conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
conda init bash conda init bash
conda run -v -n ${CONDA_ENV} pip install --upgrade pip ${CONDA_ACTIVATE} ${CONDA_ENV} && \
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/training/requirements-dev.txt which pip && \
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt pip install --upgrade pip && \
conda run -v -n ${CONDA_ENV} pre-commit install pip install uv && \
conda run -v -n ${CONDA_ENV} pip install ipykernel uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \
pre-commit install && \
uv pip install ipykernel && \
conda install llvm-openmp -y
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND" echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
echo "conda activate ${CONDA_ENV}" echo "conda activate ${CONDA_ENV}"

View file

@ -1,12 +1,17 @@
# Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow) # Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
FROM python:3.10.12-slim FROM python:3.12.12-slim
RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev
COPY pipeline/requirements/predictions/requirements.txt requirements.txt COPY pipeline/requirements/predictions/requirements.txt requirements.txt
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install -r requirements.txt
RUN pip install uv
RUN uv pip install -r requirements.txt --system
# RUN pip install -r requirements.txt
# Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script # Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script
COPY pipeline/ /home/pipeline/ COPY pipeline/ /home/pipeline/

View file

@ -29,6 +29,7 @@ data_filepath = prepare_data_params["data_filepath"]
train_proportion = prepare_data_params["train_proportion"] train_proportion = prepare_data_params["train_proportion"]
output_train_filepath = prepare_data_params["output_train_filepath"] output_train_filepath = prepare_data_params["output_train_filepath"]
output_test_filepath = prepare_data_params["output_test_filepath"] output_test_filepath = prepare_data_params["output_test_filepath"]
sample_test_filepath = prepare_data_params["sample_test_filepath"]
feature_processor_config = feature_process_params["feature_processor_config"] feature_processor_config = feature_process_params["feature_processor_config"]
logger.info(f"--- Initiate DataClient ---") logger.info(f"--- Initiate DataClient ---")
@ -99,6 +100,10 @@ def prepare_data(
logger.info("--- Outputting data ---") logger.info("--- Outputting data ---")
output_dataclient.save_data(
obj=data.sample(1), location=sample_test_filepath, save_config=None
)
output_dataclient.save_data( output_dataclient.save_data(
obj=train, location=output_train_filepath, save_config=None obj=train, location=output_train_filepath, save_config=None
) )

View file

@ -99,6 +99,12 @@ def generate_scenario_predictions(
] ]
) )
# TEMPORARY FIX: ADD is_post_sap10_starting and is_post_sap10_ending if not present
if "is_post_sap10_starting" not in scenario_data.columns:
scenario_data["is_post_sap10_starting"] = False
if "is_post_sap10_ending" not in scenario_data.columns:
scenario_data["is_post_sap10_ending"] = False
logger.info("--- Loading Model ---") logger.info("--- Loading Model ---")
model.load_model(model_filepath) model.load_model(model_filepath)

View file

@ -14,9 +14,23 @@ default:
output_filepath: ./data/model/allmodels/ output_filepath: ./data/model/allmodels/
problem_type: regression problem_type: regression
eval_metric: mean_squared_error #mean_absolute_error eval_metric: mean_squared_error #mean_absolute_error
time_limit: 1800 time_limit: 3600
presets: medium_quality presets: medium_quality
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT'] excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT', 'FASTAI']
infer_limit: 0.05 infer_limit: 1
infer_limit_batch_size: 10000 infer_limit_batch_size: 10000
fit_strategy: "parallel"
ag_args_ensemble: {'num_folds_parallel': 2} ag_args_ensemble: {'num_folds_parallel': 2}
num_gpus: 0
hyperparameters:
{
'NN_TORCH': [{}],
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0,}}],
# 'GBM': [{}],
'CAT': [{}],
'XGB': [{}],
'FASTAI': [{}],
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}

View file

@ -1,6 +1,7 @@
""" """
After predictions, we may want to apply some post processing to the predictions After predictions, we may want to apply some post processing to the predictions
""" """
import pandas as pd import pandas as pd
@ -30,6 +31,6 @@ def clip_predictions_to_minimum_value(
post_prediction_logic = { post_prediction_logic = {
"clip_predictions_to_minimum_value": clip_predictions_to_minimum_value, # "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value,
# "round_predictions": round_predictions # "round_predictions": round_predictions
} }

View file

@ -3,11 +3,10 @@ default:
input_dataclient_type: aws-s3 input_dataclient_type: aws-s3
output_dataclient_type: local output_dataclient_type: local
scenario_data_filepaths: scenario_data_filepaths:
# - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/07-10-2024-16-26-06/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/08-10-2024-15-07-33/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/08-10-2024-22-18-44/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/09-10-2024-18-21-08/recommendations_scoring_data.parquet
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md metrics_output_filepath: ./metrics/scenario_metrics.md

View file

@ -12,32 +12,165 @@ default:
AWS_ACCESS_KEY_ID: minio AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123 AWS_SECRET_ACCESS_KEY: minio123
ENDPOINT_URL: http://localhost:9000 ENDPOINT_URL: http://localhost:9000
local: local: null
null
prepare_data: prepare_data:
input_dataclient_type: aws-s3 input_dataclient_type: aws-s3
output_dataclient_type: local output_dataclient_type: local
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
train_proportion: 0.9 train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet output_test_filepath: ./data/prepared_data/test.parquet
sample_test_filepath: ./data/prepared_data/sample_test.parquet
feature_processor: feature_processor:
feature_processor_type: dataframe feature_processor_type: dataframe
feature_processor_config: feature_processor_config:
subsample_amount: null subsample_amount: null
subsample_seed: 0 subsample_seed: 0
target: sap_ending target: sap_starting
identifier_columns: ["uprn"] identifier_columns: ["uprn"]
# drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"] # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
drop_columns: [ drop_columns:
"heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending", [
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', "sap_ending",
'number_habitable_rooms', 'number_heated_rooms'] "potential_energy_efficiency",
"environment_impact_potential",
"energy_consumption_potential",
"co2_emissions_potential",
"heat_demand_change",
"carbon_change",
"rdsap_change",
"heat_demand_starting",
"heat_demand_ending",
"carbon_starting",
"carbon_ending",
"days_to_starting",
"days_to_ending",
"number_habitable_rooms_starting",
"number_habitable_rooms_ending",
"number_heated_rooms_starting",
"number_heated_rooms_ending",
"number_habitable_rooms",
"number_heated_rooms",
"lighting_cost_starting",
"lighting_cost_ending",
"heating_cost_starting",
"heating_cost_ending",
"hot_water_cost_starting",
"hot_water_cost_ending",
"floor_thermal_transmittance",
"floor_thermal_transmittance_ending",
"lodgement_date_starting",
"lodgement_date_ending",
"walls_thermal_transmittance_ending",
"walls_thermal_transmittance_unit_ending",
"is_filled_cavity_ending",
"is_as_built_ending",
"walls_is_assumed_ending",
"is_park_home_ending",
"walls_insulation_thickness_ending",
"external_insulation_ending",
"internal_insulation_ending",
"floor_insulation_thickness_ending",
"roof_thermal_transmittance_ending",
"is_at_rafters_ending",
"roof_insulation_thickness_ending",
"heater_type_ending",
"system_type_ending",
"thermostat_characteristics_ending",
"heating_scope_ending",
"energy_recovery_ending",
"hotwater_tariff_type_ending",
"extra_features_ending",
"chp_systems_ending",
"distribution_system_ending",
"no_system_present_ending",
"appliance_ending",
"has_radiators_ending",
"has_fan_coil_units_ending",
"has_pipes_in_screed_above_insulation_ending",
"has_pipes_in_insulated_timber_floor_ending",
"has_pipes_in_concrete_slab_ending",
"has_boiler_ending",
"has_air_source_heat_pump_ending",
"has_room_heaters_ending",
"has_electric_storage_heaters_ending",
"has_warm_air_ending",
"has_electric_underfloor_heating_ending",
"has_electric_ceiling_heating_ending",
"has_community_scheme_ending",
"has_ground_source_heat_pump_ending",
"has_no_system_present_ending",
"has_portable_electric_heaters_ending",
"has_water_source_heat_pump_ending",
"has_electric_heat_pump_ending",
"has_micro-cogeneration_ending",
"has_solar_assisted_heat_pump_ending",
"has_exhaust_source_heat_pump_ending",
"has_community_heat_pump_ending",
"has_hot-water-only_ending",
"has_electric_ending",
"has_mains_gas_ending",
"has_wood_logs_ending",
"has_coal_ending",
"has_oil_ending",
"has_wood_pellets_ending",
"has_anthracite_ending",
"has_dual_fuel_mineral_and_wood_ending",
"has_smokeless_fuel_ending",
"has_lpg_ending",
"has_b30k_ending",
"has_mineral_and_wood_ending",
"has_dual_fuel_appliance_ending",
"has_electricaire_ending",
"has_assumed_for_most_rooms_ending",
"has_underfloor_heating_ending",
"thermostatic_control_ending",
"charging_system_ending",
"switch_system_ending",
"no_control_ending",
"dhw_control_ending",
"community_heating_ending",
"multiple_room_thermostats_ending",
"auxiliary_systems_ending",
"trvs_ending",
"rate_control_ending",
"glazing_type_ending",
"fuel_type_ending",
"main-fuel_tariff_type_ending",
"is_community_ending",
"no_individual_heating_or_community_network_ending",
"complex_fuel_type_ending",
"mechanical_ventilation_ending",
"secondheat_description_ending",
"glazed_type_ending",
"multi_glaze_proportion_ending",
"low_energy_lighting_ending",
"number_open_fireplaces_ending",
"solar_water_heating_flag_ending",
"photo_supply_ending",
"transaction_type_ending",
"energy_tariff_ending",
"extension_count_ending",
"total_floor_area_ending",
"floor_height_ending",
"hot_water_energy_eff_ending",
"floor_energy_eff_ending",
"windows_energy_eff_ending",
"walls_energy_eff_ending",
"sheating_energy_eff_ending",
"roof_energy_eff_ending",
"mainheat_energy_eff_ending",
"mainheatc_energy_eff_ending",
"lighting_energy_eff_ending",
"is_post_sap10_ending",
"estimated_perimeter_ending",
]
retain_features: null retain_features: null
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending', # 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',

View file

@ -11,9 +11,6 @@ import joblib
import pandas as pd import pandas as pd
from pathlib import Path from pathlib import Path
from typing import Union, List from typing import Union, List
from sklearn import linear_model
from sklearn.svm import SVR
from autogluon.tabular import TabularDataset, TabularPredictor
from core.interface.InterfaceModels import MLModel from core.interface.InterfaceModels import MLModel
from core.Logger import logger from core.Logger import logger
@ -69,6 +66,8 @@ class SKLearnLinearRegression:
""" """
Method to train a model Method to train a model
""" """
from sklearn import linear_model
self.model = linear_model.LinearRegression() self.model = linear_model.LinearRegression()
x_train = data.iloc[:, data.columns != target] x_train = data.iloc[:, data.columns != target]
@ -117,6 +116,7 @@ class SKLearnSVMRegression:
""" """
Method to train a model Method to train a model
""" """
from sklearn.svm import SVR
validate_dict_keys( validate_dict_keys(
list(model_hyperparameters.keys()), list(model_hyperparameters.keys()),
@ -152,12 +152,17 @@ class AutogluonAutoML:
"infer_limit", "infer_limit",
"infer_limit_batch_size", "infer_limit_batch_size",
"ag_args_ensemble", "ag_args_ensemble",
"fit_strategy",
"num_gpus",
"hyperparameters",
] ]
def load_model(self, path: Union[Path, str]) -> None: def load_model(self, path: Union[Path, str]) -> None:
""" """
Method to load a model Method to load a model
""" """
from autogluon.tabular import TabularPredictor
filepath = str(path) filepath = str(path)
self.model = TabularPredictor.load(path=filepath) self.model = TabularPredictor.load(path=filepath)
@ -183,6 +188,10 @@ class AutogluonAutoML:
""" """
Method to train a model Method to train a model
""" """
from autogluon.tabular import TabularDataset, TabularPredictor
# Force Parallel Model fitting
os.environ["AG_FORCE_PARALLEL"] = "True"
validate_dict_keys( validate_dict_keys(
keys_1=list(model_hyperparameters.keys()), keys_1=list(model_hyperparameters.keys()),
@ -209,6 +218,9 @@ class AutogluonAutoML:
infer_limit=model_hyperparameters["infer_limit"], infer_limit=model_hyperparameters["infer_limit"],
infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"], infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"], ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
fit_strategy=model_hyperparameters["fit_strategy"],
num_gpus=model_hyperparameters["num_gpus"],
hyperparameters=model_hyperparameters["hyperparameters"].to_dict(),
) )
def predict( def predict(

View file

@ -16,15 +16,22 @@ stages:
deps: deps:
- path: 1_prepare_data.py - path: 1_prepare_data.py
hash: md5 hash: md5
md5: 11a3b8bfdfe199ab7ecc39ccc5652649 md5: a5ce162e1c402c0f811a80ef78cf4dd5
size: 4298 size: 4481
params: params:
configs/settings.yaml: configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns: default.feature_processor.feature_processor_config.drop_columns:
- sap_ending
- potential_energy_efficiency
- environment_impact_potential
- energy_consumption_potential
- co2_emissions_potential
- heat_demand_change - heat_demand_change
- carbon_change - carbon_change
- rdsap_change - rdsap_change
- heat_demand_starting
- heat_demand_ending - heat_demand_ending
- carbon_starting
- carbon_ending - carbon_ending
- days_to_starting - days_to_starting
- days_to_ending - days_to_ending
@ -34,24 +41,139 @@ stages:
- number_heated_rooms_ending - number_heated_rooms_ending
- number_habitable_rooms - number_habitable_rooms
- number_heated_rooms - number_heated_rooms
- lighting_cost_starting
- lighting_cost_ending
- heating_cost_starting
- heating_cost_ending
- hot_water_cost_starting
- hot_water_cost_ending
- floor_thermal_transmittance
- floor_thermal_transmittance_ending
- lodgement_date_starting
- lodgement_date_ending
- walls_thermal_transmittance_ending
- walls_thermal_transmittance_unit_ending
- is_filled_cavity_ending
- is_as_built_ending
- walls_is_assumed_ending
- is_park_home_ending
- walls_insulation_thickness_ending
- external_insulation_ending
- internal_insulation_ending
- floor_insulation_thickness_ending
- roof_thermal_transmittance_ending
- is_at_rafters_ending
- roof_insulation_thickness_ending
- heater_type_ending
- system_type_ending
- thermostat_characteristics_ending
- heating_scope_ending
- energy_recovery_ending
- hotwater_tariff_type_ending
- extra_features_ending
- chp_systems_ending
- distribution_system_ending
- no_system_present_ending
- appliance_ending
- has_radiators_ending
- has_fan_coil_units_ending
- has_pipes_in_screed_above_insulation_ending
- has_pipes_in_insulated_timber_floor_ending
- has_pipes_in_concrete_slab_ending
- has_boiler_ending
- has_air_source_heat_pump_ending
- has_room_heaters_ending
- has_electric_storage_heaters_ending
- has_warm_air_ending
- has_electric_underfloor_heating_ending
- has_electric_ceiling_heating_ending
- has_community_scheme_ending
- has_ground_source_heat_pump_ending
- has_no_system_present_ending
- has_portable_electric_heaters_ending
- has_water_source_heat_pump_ending
- has_electric_heat_pump_ending
- has_micro-cogeneration_ending
- has_solar_assisted_heat_pump_ending
- has_exhaust_source_heat_pump_ending
- has_community_heat_pump_ending
- has_hot-water-only_ending
- has_electric_ending
- has_mains_gas_ending
- has_wood_logs_ending
- has_coal_ending
- has_oil_ending
- has_wood_pellets_ending
- has_anthracite_ending
- has_dual_fuel_mineral_and_wood_ending
- has_smokeless_fuel_ending
- has_lpg_ending
- has_b30k_ending
- has_mineral_and_wood_ending
- has_dual_fuel_appliance_ending
- has_electricaire_ending
- has_assumed_for_most_rooms_ending
- has_underfloor_heating_ending
- thermostatic_control_ending
- charging_system_ending
- switch_system_ending
- no_control_ending
- dhw_control_ending
- community_heating_ending
- multiple_room_thermostats_ending
- auxiliary_systems_ending
- trvs_ending
- rate_control_ending
- glazing_type_ending
- fuel_type_ending
- main-fuel_tariff_type_ending
- is_community_ending
- no_individual_heating_or_community_network_ending
- complex_fuel_type_ending
- mechanical_ventilation_ending
- secondheat_description_ending
- glazed_type_ending
- multi_glaze_proportion_ending
- low_energy_lighting_ending
- number_open_fireplaces_ending
- solar_water_heating_flag_ending
- photo_supply_ending
- transaction_type_ending
- energy_tariff_ending
- extension_count_ending
- total_floor_area_ending
- floor_height_ending
- hot_water_energy_eff_ending
- floor_energy_eff_ending
- windows_energy_eff_ending
- walls_energy_eff_ending
- sheating_energy_eff_ending
- roof_energy_eff_ending
- mainheat_energy_eff_ending
- mainheatc_energy_eff_ending
- lighting_energy_eff_ending
- is_post_sap10_ending
- estimated_perimeter_ending
default.feature_processor.feature_processor_config.retain_features: default.feature_processor.feature_processor_config.retain_features:
default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_config.target: sap_starting
default.feature_processor.feature_processor_type: dataframe default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath: default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet default.prepare_data.output_test_filepath:
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath:
./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 0.9 default.prepare_data.train_proportion: 0.9
outs: outs:
- path: data/prepared_data/ - path: data/prepared_data/
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: 2e33489c17eff1a60079e26d370a9058.dir
size: 45056059 size: 26013963
nfiles: 2 nfiles: 3
build_model: build_model:
cmd: python 2_build_model.py cmd: python 2_build_model.py
deps: deps:
@ -61,9 +183,9 @@ stages:
size: 4820 size: 4820
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: 2e33489c17eff1a60079e26d370a9058.dir
size: 45056059 size: 26013963
nfiles: 2 nfiles: 3
params: params:
configs/build_model.yaml: configs/build_model.yaml:
default: default:
@ -79,7 +201,7 @@ stages:
output_filepath: ./data/model/allmodels/ output_filepath: ./data/model/allmodels/
problem_type: regression problem_type: regression
eval_metric: mean_squared_error eval_metric: mean_squared_error
time_limit: 1800 time_limit: 3600
presets: medium_quality presets: medium_quality
excluded_model_types: excluded_model_types:
- RF - RF
@ -87,25 +209,94 @@ stages:
- NN_TORCH - NN_TORCH
- KNN - KNN
- XT - XT
infer_limit: 0.05 - FASTAI
infer_limit: 1
infer_limit_batch_size: 10000 infer_limit_batch_size: 10000
fit_strategy: parallel
ag_args_ensemble: ag_args_ensemble:
num_folds_parallel: 2 num_folds_parallel: 2
num_gpus: 0
hyperparameters:
NN_TORCH:
- {}
GBM:
- extra_trees: true
ag_args:
name_suffix: XT
- {}
- learning_rate: 0.03
num_leaves: 128
feature_fraction: 0.9
min_data_in_leaf: 3
ag_args:
name_suffix: Large
priority: 0
CAT:
- {}
XGB:
- {}
FASTAI:
- {}
RF:
- criterion: gini
ag_args:
name_suffix: Gini
problem_types:
- binary
- multiclass
- criterion: entropy
ag_args:
name_suffix: Entr
problem_types:
- binary
- multiclass
- criterion: squared_error
ag_args:
name_suffix: MSE
problem_types:
- regression
- quantile
XT:
- criterion: gini
ag_args:
name_suffix: Gini
problem_types:
- binary
- multiclass
- criterion: entropy
ag_args:
name_suffix: Entr
problem_types:
- binary
- multiclass
- criterion: squared_error
ag_args:
name_suffix: MSE
problem_types:
- regression
- quantile
KNN:
- weights: uniform
ag_args:
name_suffix: Unif
- weights: distance
ag_args:
name_suffix: Dist
outs: outs:
- path: data/fit_predictions/ - path: data/fit_predictions/
hash: md5 hash: md5
md5: d9c9afc05e8780db47c0548b19bf7d19.dir md5: 701063eff6ee4444971742f05c4de861.dir
size: 3349989 size: 3453673
nfiles: 1 nfiles: 1
- path: data/model/ - path: data/model/
hash: md5 hash: md5
md5: 13c3100e1486c27a83a8a47491077842.dir md5: 9c3e945f96fc2c95f2962a98af5c04fe.dir
size: 773523079 size: 594127389
nfiles: 36 nfiles: 30
- path: metrics/fit_metrics.json - path: metrics/fit_metrics.json
hash: md5 hash: md5
md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a md5: f1df654cc38d2c73261bfaed0a1ef277
size: 224 size: 223
generate_predictions: generate_predictions:
cmd: python 3_generate_predictions.py cmd: python 3_generate_predictions.py
deps: deps:
@ -115,26 +306,28 @@ stages:
size: 2464 size: 2464
- path: data/model - path: data/model
hash: md5 hash: md5
md5: 13c3100e1486c27a83a8a47491077842.dir md5: 9c3e945f96fc2c95f2962a98af5c04fe.dir
size: 773523079 size: 594127389
nfiles: 36 nfiles: 30
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: 2e33489c17eff1a60079e26d370a9058.dir
size: 45056059 size: 26013963
nfiles: 2 nfiles: 3
params: params:
configs/settings.yaml: configs/settings.yaml:
default.generate_predictions.input_dataclient_type: local default.generate_predictions.input_dataclient_type: local
default.generate_predictions.output_dataclient_type: local default.generate_predictions.output_dataclient_type: local
default.generate_predictions.predictions_column_name: predictions default.generate_predictions.predictions_column_name: predictions
default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet default.generate_predictions.predictions_output_filepath:
default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet ./data/predictions/predictions.parquet
default.generate_predictions.test_data_filepath:
./data/prepared_data/test.parquet
outs: outs:
- path: data/predictions/ - path: data/predictions/
hash: md5 hash: md5
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir md5: 3dfb21b2918e954e657f0c7cbeca308f.dir
size: 463197 size: 479298
nfiles: 1 nfiles: 1
generate_metrics: generate_metrics:
cmd: python 4_generate_metrics.py cmd: python 4_generate_metrics.py
@ -145,14 +338,14 @@ stages:
size: 3484 size: 3484
- path: data/predictions - path: data/predictions
hash: md5 hash: md5
md5: 5d07bcebf3160a72bb18dfd79106e85c.dir md5: 3dfb21b2918e954e657f0c7cbeca308f.dir
size: 463197 size: 479298
nfiles: 1 nfiles: 1
- path: data/prepared_data - path: data/prepared_data
hash: md5 hash: md5
md5: 80c9e138146a1d96b9d16091c207e2e8.dir md5: 2e33489c17eff1a60079e26d370a9058.dir
size: 45056059 size: 26013963
nfiles: 2 nfiles: 3
params: params:
configs/settings.yaml: configs/settings.yaml:
default.generate_metrics.dataclient_type: local default.generate_metrics.dataclient_type: local
@ -161,30 +354,29 @@ stages:
outs: outs:
- path: metrics/metrics.json - path: metrics/metrics.json
hash: md5 hash: md5
md5: 3e08df02fd5c5d094bcf936e1338d596 md5: 365cf4dc2ac89b4df55dc7ae6862d8b1
size: 223 size: 222
generate_scenerio_metrics: generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py cmd: python 5_generate_scenarios.py
deps: deps:
- path: 5_generate_scenarios.py - path: 5_generate_scenarios.py
hash: md5 hash: md5
md5: 40506749fefd926d47c60ff5b16db307 md5: 872b0c762ce1c8933fcbc5f54d5d4b5d
size: 5337 size: 5658
params: params:
configs/scenarios.yaml: configs/scenarios.yaml:
default.scenarios: default.scenarios:
input_dataclient_type: aws-s3 input_dataclient_type: aws-s3
output_dataclient_type: local output_dataclient_type: local
scenario_data_filepaths: scenario_data_filepaths:
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md metrics_output_filepath: ./metrics/scenario_metrics.md
outs: outs:
- path: metrics/scenario_metrics.md - path: metrics/scenario_metrics.md
hash: md5 hash: md5
md5: fa4d6d7bbd7818613800da5f8f37ea96 md5: d41d8cd98f00b204e9800998ecf8427e
size: 363 size: 0
- path: metrics/scenario_table.md - path: metrics/scenario_table.md
hash: md5 hash: md5
md5: d6baf100a1623cc2467c2f8221d314c9 md5: d41d8cd98f00b204e9800998ecf8427e
size: 2133 size: 0

View file

@ -1,7 +1,7 @@
joblib==1.3.2 joblib==1.5.2
boto3==1.28.17 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
dynaconf==3.2.1 dynaconf==3.2.12
pyarrow==13.0.0 pyarrow==20.0.0
pre-commit==3.3.3 pre-commit==4.3.0

View file

@ -1,7 +1,7 @@
joblib==1.3.2 joblib==1.5.2
boto3==1.28.17 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
dynaconf==3.2.1 dynaconf==3.2.12
pyarrow==13.0.0 pyarrow==20.0.0
PyYAML==6.0.1 PyYAML==6.0.3

View file

@ -1,10 +1,10 @@
joblib==1.3.2 joblib==1.5.2
boto3==1.28.17 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
ray==2.6.3 ray==2.44.1
dynaconf==3.2.1 dynaconf==3.2.12
alibi==0.9.5 # alibi
shap==0.42.1 shap==0.49.1
pyarrow==13.0.0 pyarrow==20.0.0
pre-commit==3.3.3 pre-commit==4.3.0

View file

@ -1,4 +1,4 @@
boto3==1.28.41 boto3==1.40.61
pandas==2.1.4 pandas==2.3.3
autogluon.tabular[all]==1.0.0 autogluon.tabular[all]==1.4.0
dynaconf==3.2.1 dynaconf==3.2.12

View file

@ -1,4 +1,5 @@
dvc==3.51.0 dvc==3.66.0
dvc-s3==3.2.0 dvc-s3==3.2.2
gto==1.7.1 boto3==1.40.61
gto==1.9.0
pyOpenSSL==23.3.0 pyOpenSSL==23.3.0