change libomp to conda install instead of brew due to segmentation errors, update back to 1.4

This commit is contained in:
Michael Duong 2025-11-03 14:03:19 +00:00
parent bdc177baa9
commit 541f2b2689
8 changed files with 122 additions and 40 deletions

View file

@ -20,7 +20,8 @@ dev-conda:
uv pip install -r src/pipeline/requirements/training/requirements-dev.txt && \
uv pip install -r src/pipeline/requirements/version_control/requirements.txt && \
pre-commit install && \
uv pip install ipykernel
uv pip install ipykernel && \
conda install llvm-openmp -y
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
echo "conda activate ${CONDA_ENV}"

View file

@ -17,8 +17,20 @@ default:
time_limit: 1800
presets: medium_quality
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
infer_limit: 0.0005
infer_limit: 0.001
infer_limit_batch_size: 10000
fit_strategy: "parallel"
fit_strategy: "sequential"
ag_args_ensemble: {'num_folds_parallel': 2}
num_gpus: auto
hyperparameters:
{
'NN_TORCH': [{}],
'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0, 'hyperparameter_tune_kwargs': 'auto'}}],
# 'GBM': [{}],
'CAT': [{}],
'XGB': [{}],
'FASTAI': [{}],
'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}

View file

@ -154,6 +154,7 @@ class AutogluonAutoML:
"ag_args_ensemble",
"fit_strategy",
"num_gpus",
"hyperparameters",
]
def load_model(self, path: Union[Path, str]) -> None:
@ -215,6 +216,7 @@ class AutogluonAutoML:
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
fit_strategy=model_hyperparameters["fit_strategy"],
num_gpus=model_hyperparameters["num_gpus"],
hyperparameters=model_hyperparameters["hyperparameters"].to_dict(),
)
def predict(

View file

@ -61,8 +61,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
size: 46092230
md5: ba409a8c79863ddc407786b7aa7a053a.dir
size: 46113237
nfiles: 3
build_model:
cmd: python 2_build_model.py
@ -73,8 +73,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
size: 46092230
md5: ba409a8c79863ddc407786b7aa7a053a.dir
size: 46113237
nfiles: 3
params:
configs/build_model.yaml:
@ -99,27 +99,94 @@ stages:
- NN_TORCH
- KNN
- XT
infer_limit: 0.0005
infer_limit: 0.001
infer_limit_batch_size: 10000
fit_strategy: parallel
fit_strategy: sequential
ag_args_ensemble:
num_folds_parallel: 2
num_gpus: auto
hyperparameters:
NN_TORCH:
- {}
GBM:
- extra_trees: true
ag_args:
name_suffix: XT
- {}
- learning_rate: 0.03
num_leaves: 128
feature_fraction: 0.9
min_data_in_leaf: 3
ag_args:
name_suffix: Large
priority: 0
hyperparameter_tune_kwargs: auto
CAT:
- {}
XGB:
- {}
FASTAI:
- {}
RF:
- criterion: gini
ag_args:
name_suffix: Gini
problem_types:
- binary
- multiclass
- criterion: entropy
ag_args:
name_suffix: Entr
problem_types:
- binary
- multiclass
- criterion: squared_error
ag_args:
name_suffix: MSE
problem_types:
- regression
- quantile
XT:
- criterion: gini
ag_args:
name_suffix: Gini
problem_types:
- binary
- multiclass
- criterion: entropy
ag_args:
name_suffix: Entr
problem_types:
- binary
- multiclass
- criterion: squared_error
ag_args:
name_suffix: MSE
problem_types:
- regression
- quantile
KNN:
- weights: uniform
ag_args:
name_suffix: Unif
- weights: distance
ag_args:
name_suffix: Dist
outs:
- path: data/fit_predictions/
hash: md5
md5: 01328a1cc5a1ff35e701a3c44902afc6.dir
size: 3474659
md5: a9361ab31ff8fc08c3e5e3b96cec06d4.dir
size: 3474690
nfiles: 1
- path: data/model/
hash: md5
md5: 70f076a248524dfce60412f83969ae63.dir
size: 760254863
nfiles: 33
md5: 19019e558886b1acd6d29442a47243d0.dir
size: 761937021
nfiles: 34
- path: metrics/fit_metrics.json
hash: md5
md5: 4726c52b2f27650ab1bbf97b5bf61e54
size: 224
md5: 3af168aedf1f81a22024bb8c815f5d12
size: 221
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -129,13 +196,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 70f076a248524dfce60412f83969ae63.dir
size: 760254863
nfiles: 33
md5: 19019e558886b1acd6d29442a47243d0.dir
size: 761937021
nfiles: 34
- path: data/prepared_data
hash: md5
md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
size: 46092230
md5: ba409a8c79863ddc407786b7aa7a053a.dir
size: 46113237
nfiles: 3
params:
configs/settings.yaml:
@ -149,8 +216,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 312f9106eb18d34df75124f0536f0603.dir
size: 484470
md5: a9f32d70a4817df8092e52c5513a445f.dir
size: 484694
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -161,13 +228,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 312f9106eb18d34df75124f0536f0603.dir
size: 484470
md5: a9f32d70a4817df8092e52c5513a445f.dir
size: 484694
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 7b780ea01da913d9d8cadcff73fbde0f.dir
size: 46092230
md5: ba409a8c79863ddc407786b7aa7a053a.dir
size: 46113237
nfiles: 3
params:
configs/settings.yaml:
@ -177,8 +244,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 661388682aa1ca888b256e4667211379
size: 222
md5: 736ef69da7edb94577139ae9ede5ac0d
size: 224
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
@ -198,9 +265,9 @@ stages:
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: 88ebca8dccf907692675301ffe06b10d
md5: adcc78833e7a0824ecb10ad78a646ea8
size: 356
- path: metrics/scenario_table.md
hash: md5
md5: 3ec419e883b812b254b331f055999cc9
md5: 35e704d0499e943c4110f66f1482d2ec
size: 872

View file

@ -1,7 +1,7 @@
joblib==1.5.2
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
pandas==2.3.3
autogluon.tabular[all]==1.4.0
dynaconf==3.2.12
pyarrow==20.0.0
pre-commit==4.3.0

View file

@ -1,7 +1,7 @@
joblib==1.5.2
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
pandas==2.3.3
autogluon.tabular[all]==1.4.0
dynaconf==3.2.12
pyarrow==20.0.0
PyYAML==6.0.3

View file

@ -1,10 +1,10 @@
joblib==1.5.2
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
pandas==2.3.3
autogluon.tabular[all]==1.4.0
ray==2.44.1
dynaconf==3.2.12
alibi==0.5.5
# alibi
shap==0.49.1
pyarrow==20.0.0
pre-commit==4.3.0

View file

@ -1,4 +1,4 @@
boto3==1.40.61
pandas==2.2.3
autogluon.tabular[all]==1.3
pandas==2.3.3
autogluon.tabular[all]==1.4.0
dynaconf==3.2.12