update to python 3.12 and autogluon 1.3

This commit is contained in:
Michael Duong 2025-11-02 11:37:08 +00:00
parent 88c5b6c93a
commit 7b001f3abf
13 changed files with 92 additions and 74 deletions

View file

@ -1,7 +1,8 @@
export PYENV_ROOT=$(HOME)/.pyenv
export PATH := $(PYENV_ROOT)/bin:$(PATH)
PYTHON_VERSION ?= 3.10.12
CONDA_ENV=dev_env_pipeline
PYTHON_VERSION ?= 3.12.12
CONDA_ENV=dev_env_pipeline_1
CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda activate
.PHONY: init
init: dev-conda
@ -12,11 +13,13 @@ dev-conda:
# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
conda init bash
conda run -v -n ${CONDA_ENV} pip install --upgrade pip
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/training/requirements-dev.txt
conda run -v -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt
conda run -v -n ${CONDA_ENV} pre-commit install
conda run -v -n ${CONDA_ENV} pip install ipykernel
${CONDA_ACTIVATE} ${CONDA_ENV} && \
which pip && \
pip install --upgrade pip && \
pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
pip install -r src/pipeline/requirements/version_control/requirements.txt && \
pre-commit install && \
pip install ipykernel
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
echo "conda activate ${CONDA_ENV}"

View file

@ -99,6 +99,12 @@ def generate_scenario_predictions(
]
)
# TEMPORARY FIX: ADD is_post_sap10_starting and is_post_sap10_ending if not present
if "is_post_sap10_starting" not in scenario_data.columns:
scenario_data["is_post_sap10_starting"] = False
if "is_post_sap10_ending" not in scenario_data.columns:
scenario_data["is_post_sap10_ending"] = False
logger.info("--- Loading Model ---")
model.load_model(model_filepath)

View file

@ -17,6 +17,7 @@ default:
time_limit: 1800
presets: medium_quality
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
infer_limit: 0.05
infer_limit: 0.0005
infer_limit_batch_size: 10000
"fit_strategy": "parallel"
ag_args_ensemble: {'num_folds_parallel': 2}

View file

@ -21,7 +21,8 @@ default:
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet

View file

@ -1,4 +1,4 @@
""""
""" "
Implementations of MLModels, all of which will have four methods to:
- Load model
- Save Model
@ -152,6 +152,7 @@ class AutogluonAutoML:
"infer_limit",
"infer_limit_batch_size",
"ag_args_ensemble",
"fit_strategy",
]
def load_model(self, path: Union[Path, str]) -> None:
@ -209,6 +210,7 @@ class AutogluonAutoML:
infer_limit=model_hyperparameters["infer_limit"],
infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
fit_strategy=model_hyperparameters["fit_strategy"],
)
def predict(

View file

@ -49,18 +49,20 @@ stages:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending
default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet
default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
default.prepare_data.output_test_filepath:
./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath:
./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 0.9
outs:
- path: data/prepared_data/
hash: md5
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -71,8 +73,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
params:
configs/build_model.yaml:
@ -97,25 +99,26 @@ stages:
- NN_TORCH
- KNN
- XT
infer_limit: 0.05
infer_limit: 0.0005
infer_limit_batch_size: 10000
fit_strategy: parallel
ag_args_ensemble:
num_folds_parallel: 2
outs:
- path: data/fit_predictions/
hash: md5
md5: a6196bf08607c43ba6bc637611bb32b0.dir
size: 3491001
md5: 4fa77e3f129d2e6f9ef7222c44978c26.dir
size: 3474669
nfiles: 1
- path: data/model/
hash: md5
md5: b225d7b01356cecefb3794a9a3cd19b5.dir
size: 790430916
nfiles: 36
md5: e27b9216bc7455f8245d5b49f27b2707.dir
size: 753575768
nfiles: 30
- path: metrics/fit_metrics.json
hash: md5
md5: 33421d5e3a2d569dbe6d4486c568a2b7
size: 225
md5: 426a162284ca9e29c043eb1d72e547e6
size: 224
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -125,26 +128,28 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: b225d7b01356cecefb3794a9a3cd19b5.dir
size: 790430916
nfiles: 36
md5: e27b9216bc7455f8245d5b49f27b2707.dir
size: 753575768
nfiles: 30
- path: data/prepared_data
hash: md5
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
params:
configs/settings.yaml:
default.generate_predictions.input_dataclient_type: local
default.generate_predictions.output_dataclient_type: local
default.generate_predictions.predictions_column_name: predictions
default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet
default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet
default.generate_predictions.predictions_output_filepath:
./data/predictions/predictions.parquet
default.generate_predictions.test_data_filepath:
./data/prepared_data/test.parquet
outs:
- path: data/predictions/
hash: md5
md5: bd6821db9abc95af8c74aa20effd7f37.dir
size: 487194
md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
size: 484524
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -155,13 +160,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: bd6821db9abc95af8c74aa20effd7f37.dir
size: 487194
md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
size: 484524
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
params:
configs/settings.yaml:
@ -171,15 +176,15 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 9c2a7802554f5c2f750b2242c6003026
md5: b9ae6d24424f2d5389697577e9076b91
size: 223
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
- path: 5_generate_scenarios.py
hash: md5
md5: 40506749fefd926d47c60ff5b16db307
size: 5337
md5: 872b0c762ce1c8933fcbc5f54d5d4b5d
size: 5658
params:
configs/scenarios.yaml:
default.scenarios:
@ -192,9 +197,9 @@ stages:
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: c01524a0cc2e61151c106d7049af3bf9
md5: 32d78c20d91fedf2f5dbb4162f323e25
size: 356
- path: metrics/scenario_table.md
hash: md5
md5: a995c8ef7ffbe2ca254441150817ae21
md5: 52cbd19566151b0c300f9673252704d2
size: 872

View file

@ -1,4 +1,2 @@
/fit_metrics.json
/metrics.json
/scenario_table.md
/scenario_metrics.md

View file

@ -0,0 +1 @@
{"mean_absolute_error": 1.2158480882644653, "median_absolute_error": 0.8539352416992188, "mean_squared_error": 3.116994857788086, "mean_absolute_percentage_error": 0.01968802697956562, "symmetric_mape": 0.019615056540152054}

View file

@ -0,0 +1 @@
{"mean_absolute_error": 2.121211290359497, "median_absolute_error": 1.3063621520996094, "mean_squared_error": 11.15064525604248, "mean_absolute_percentage_error": 0.03622421622276306, "symmetric_mape": 0.035541225671999285}

View file

@ -1,7 +1,7 @@
joblib==1.3.2
boto3==1.28.17
pandas==2.1.4
autogluon.tabular[all]==1.0.0
dynaconf==3.2.1
pyarrow==13.0.0
pre-commit==3.3.3
joblib==1.5.2
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
dynaconf==3.2.12
pyarrow==22.0.0
pre-commit==4.3.0

View file

@ -1,7 +1,7 @@
joblib==1.3.2
boto3==1.28.17
pandas==2.1.4
autogluon.tabular[all]==1.0.0
dynaconf==3.2.1
pyarrow==13.0.0
PyYAML==6.0.1
joblib==1.5.2
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
dynaconf==3.2.12
pyarrow==22.0.0
PyYAML==6.0.3

View file

@ -1,10 +1,10 @@
joblib==1.3.2
boto3==1.28.17
pandas==2.1.4
autogluon.tabular[all]==1.0.0
ray==2.6.3
dynaconf==3.2.1
alibi==0.9.5
shap==0.42.1
pyarrow==13.0.0
pre-commit==3.3.3
joblib==1.5.2
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
ray==2.44.1
dynaconf==3.2.12
alibi==0.5.5
shap==0.49.1
pyarrow==22.0.0
pre-commit==4.3.0

View file

@ -1,4 +1,4 @@
boto3==1.28.41
pandas==2.1.4
autogluon.tabular[all]==1.0.0
dynaconf==3.2.1
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
dynaconf==3.2.12