From 43aacd80bed83266551a0a13b0c5efe69811a3b5 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 2 Nov 2025 17:26:40 +0000 Subject: [PATCH] update to autogluon 1.4 --- deployment/Dockerfile.prediction.lambda | 1 + modules/ml-pipeline/Makefile | 9 +-- .../src/pipeline/configs/build_model.yaml | 2 +- modules/ml-pipeline/src/pipeline/dvc.lock | 56 +++++++++---------- .../predictions/requirements-dev.txt | 2 +- .../requirements/predictions/requirements.txt | 2 +- .../training/requirements-dev.txt | 4 +- .../requirements/training/requirements.txt | 2 +- 8 files changed, 40 insertions(+), 38 deletions(-) diff --git a/deployment/Dockerfile.prediction.lambda b/deployment/Dockerfile.prediction.lambda index ff4d5d0..ca21a8e 100644 --- a/deployment/Dockerfile.prediction.lambda +++ b/deployment/Dockerfile.prediction.lambda @@ -3,6 +3,7 @@ FROM public.ecr.aws/lambda/python:3.12 # Set the working directory WORKDIR ${LAMBDA_TASK_ROOT} ENV PYTHONPATH="${PYTHONPATH}:${LAMBDA_TASK_ROOT}" +ENV MPLCONFIGDIR="${LAMBDA_TASK_ROOT}/tmp/matplotlib" # Environment variables ARG RUNTIME_ENVIRONMENT diff --git a/modules/ml-pipeline/Makefile b/modules/ml-pipeline/Makefile index ff83c27..09f9f15 100644 --- a/modules/ml-pipeline/Makefile +++ b/modules/ml-pipeline/Makefile @@ -1,7 +1,7 @@ export PYENV_ROOT=$(HOME)/.pyenv export PATH := $(PYENV_ROOT)/bin:$(PATH) PYTHON_VERSION ?= 3.12.12 -CONDA_ENV=dev_env_pipeline_1 +CONDA_ENV=dev_env_pipeline CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate .PHONY: init @@ -16,10 +16,11 @@ dev-conda: ${CONDA_ACTIVATE} ${CONDA_ENV} && \ which pip && \ pip install --upgrade pip && \ - pip install -r src/pipeline/requirements/training/requirements-dev.txt && \ - pip install -r src/pipeline/requirements/version_control/requirements.txt && \ + pip install uv && \ + uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \ + uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \ pre-commit install && \ - pip install ipykernel + uv pip install ipykernel echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND" echo "conda activate ${CONDA_ENV}" diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index 69349ba..38c0910 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -14,7 +14,7 @@ default: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error #mean_absolute_error - time_limit: 180 + time_limit: 1800 presets: medium_quality excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT'] infer_limit: 0.0005 diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 1a3143a..f05f185 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -61,8 +61,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 02b2c25e488f75c4a676540c127b8930.dir - size: 45890160 + md5: 2feba8772c240b507eb900934efcb8ca.dir + size: 46064555 nfiles: 3 build_model: cmd: python 2_build_model.py @@ -73,8 +73,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 02b2c25e488f75c4a676540c127b8930.dir - size: 45890160 + md5: 2feba8772c240b507eb900934efcb8ca.dir + size: 46064555 nfiles: 3 params: configs/build_model.yaml: @@ -91,7 +91,7 @@ stages: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error - time_limit: 180 + time_limit: 1800 presets: medium_quality excluded_model_types: - RF @@ -107,18 +107,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 7f9a534daf824434262bee89e2ee2cfd.dir - size: 3475064 + md5: 29036f4f42b1fdcab7f9e40a87f38a8c.dir + size: 3474783 nfiles: 1 - path: data/model/ hash: md5 - md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir - size: 414148418 - nfiles: 24 + md5: 77cab231e3d51bbebbae5a7af310c18a.dir + size: 791390619 + nfiles: 34 - path: metrics/fit_metrics.json hash: md5 - md5: 7763f689b46c38ec8f0cc605deac4c2a - size: 221 + md5: 4f39064fb6b31c7c879299621bcea28d + size: 224 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -128,13 +128,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir - size: 414148418 - nfiles: 24 + md5: 77cab231e3d51bbebbae5a7af310c18a.dir + size: 791390619 + nfiles: 34 - path: data/prepared_data hash: md5 - md5: 02b2c25e488f75c4a676540c127b8930.dir - size: 45890160 + md5: 2feba8772c240b507eb900934efcb8ca.dir + size: 46064555 nfiles: 3 params: configs/settings.yaml: @@ -148,8 +148,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 2d9353f60e16d4f85dd4a08a71dce548.dir - size: 483856 + md5: 8dfa69b48586da6b0ef33a6fbedb7c4a.dir + size: 484314 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -160,13 +160,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 2d9353f60e16d4f85dd4a08a71dce548.dir - size: 483856 + md5: 8dfa69b48586da6b0ef33a6fbedb7c4a.dir + size: 484314 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 02b2c25e488f75c4a676540c127b8930.dir - size: 45890160 + md5: 2feba8772c240b507eb900934efcb8ca.dir + size: 46064555 nfiles: 3 params: configs/settings.yaml: @@ -176,8 +176,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 8a52e3a0047c68b9de5c371a1d406f73 - size: 224 + md5: bf980dad2dc5b97651546b0b755419ae + size: 223 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: @@ -197,9 +197,9 @@ stages: outs: - path: metrics/scenario_metrics.md hash: md5 - md5: 666f73f6fdb49484737f1a7edd798727 - size: 363 + md5: 05e2cce8e61d5005398659e9f3465cd6 + size: 356 - path: metrics/scenario_table.md hash: md5 - md5: 71c9fcb9ec304353aba0d7f5c58ca8b2 + md5: 92446d2f3836c6f790d06e3b268b05f3 size: 872 diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt index a70ecf8..30ec95f 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt @@ -1,7 +1,7 @@ joblib==1.5.2 boto3==1.40.61 pandas==2.2.3 -autogluon.tabular[all]==1.3 +autogluon.tabular[all]==1.4 dynaconf==3.2.12 pyarrow==22.0.0 pre-commit==4.3.0 diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt index b9aa74c..63580ab 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt @@ -1,7 +1,7 @@ joblib==1.5.2 boto3==1.40.61 pandas==2.2.3 -autogluon.tabular[all]==1.3 +autogluon.tabular[all]==1.4 dynaconf==3.2.12 pyarrow==22.0.0 PyYAML==6.0.3 diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt index 1e59b59..a503ecf 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt @@ -1,10 +1,10 @@ joblib==1.5.2 boto3==1.40.61 pandas==2.2.3 -autogluon.tabular[all]==1.3 +autogluon.tabular[all]==1.4 ray==2.44.1 dynaconf==3.2.12 alibi==0.5.5 shap==0.49.1 -pyarrow==22.0.0 +pyarrow pre-commit==4.3.0 diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt index 84455e8..6e85ded 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt @@ -1,4 +1,4 @@ boto3==1.40.61 pandas==2.2.3 -autogluon.tabular[all]==1.3 +autogluon.tabular[all]==1.4 dynaconf==3.2.12