upgrade autogluon

This commit is contained in:
Michael Duong 2024-03-21 14:41:58 +00:00
parent 7f2f80af22
commit ad2c4d6019
8 changed files with 46 additions and 40 deletions

View file

@ -14,8 +14,9 @@ default:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error #mean_absolute_error
time_limit: 4000
presets: medium_quality
time_limit: 1800
presets: good_quality
excluded_model_types: ['RF', 'FASTAI', 'CAT', 'NN_TORCH', 'KNN', 'XT']
infer_limit: 0.05
infer_limit_batch_size: 10000
ag_args_ensemble: {'num_folds_parallel': 2}

View file

@ -24,7 +24,7 @@ default:
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
# data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
train_proportion: 1
train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet

View file

@ -25,7 +25,7 @@ def model_factory(model_type: str) -> MLModel:
models = {
"SKLearnLinearRegression": SKLearnLinearRegression(),
"SKLearnSVMRegression": SKLearnSVMRegression(),
"AutogluonAutoML": AutogluonAutoML()
"AutogluonAutoML": AutogluonAutoML(),
# ADD OTHER MODELS HERE
}
@ -151,6 +151,7 @@ class AutogluonAutoML:
"excluded_model_types",
"infer_limit",
"infer_limit_batch_size",
"ag_args_ensemble",
]
def load_model(self, path: Union[Path, str]) -> None:
@ -207,6 +208,7 @@ class AutogluonAutoML:
excluded_model_types=model_hyperparameters["excluded_model_types"],
infer_limit=model_hyperparameters["infer_limit"],
infer_limit_batch_size=model_hyperparameters["infer_limit_batch_size"],
ag_args_ensemble=model_hyperparameters["ag_args_ensemble"],
)
def predict(

View file

@ -27,12 +27,12 @@ stages:
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 1
default.prepare_data.train_proportion: 0.9
outs:
- path: data/prepared_data/
hash: md5
md5: 3c77fa10cd1cd503eb4d2540394629f6.dir
size: 42626894
md5: 3d1144848fce4ce50f6abfaec5235552.dir
size: 46392840
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -43,8 +43,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 3c77fa10cd1cd503eb4d2540394629f6.dir
size: 42626894
md5: 3d1144848fce4ce50f6abfaec5235552.dir
size: 46392840
nfiles: 2
params:
configs/build_model.yaml:
@ -61,8 +61,8 @@ stages:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error
time_limit: 4000
presets: medium_quality
time_limit: 1800
presets: good_quality
excluded_model_types:
- RF
- FASTAI
@ -72,21 +72,23 @@ stages:
- XT
infer_limit: 0.05
infer_limit_batch_size: 10000
ag_args_ensemble:
num_folds_parallel: 2
outs:
- path: data/fit_predictions/
hash: md5
md5: e0a11ac6e4adf69d6180c0217c639a0e.dir
size: 3680908
md5: 346b6611afbf2070e038bf945249a86e.dir
size: 3384302
nfiles: 1
- path: data/model/
hash: md5
md5: bdaaf823857f9dc7b6ee2d4b88927cc1.dir
size: 805896324
nfiles: 31
md5: 8e37f21728cd092660bafa8c32dc109f.dir
size: 423840922
nfiles: 118
- path: metrics/fit_metrics.json
hash: md5
md5: 0ed5b1141bbb8bc3156e7c056b29f3cd
size: 225
md5: d63e1a8d31503055835ac35149554e41
size: 223
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -96,13 +98,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: bdaaf823857f9dc7b6ee2d4b88927cc1.dir
size: 805896324
nfiles: 31
md5: 8e37f21728cd092660bafa8c32dc109f.dir
size: 423840922
nfiles: 118
- path: data/prepared_data
hash: md5
md5: 3c77fa10cd1cd503eb4d2540394629f6.dir
size: 42626894
md5: 3d1144848fce4ce50f6abfaec5235552.dir
size: 46392840
nfiles: 2
params:
configs/settings.yaml:
@ -114,8 +116,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 38707d16ae1e2330cc03f524db9cdd60.dir
size: 648730
md5: d148baf508140353d62c16d6ab0fb6b7.dir
size: 469224
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -126,13 +128,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 38707d16ae1e2330cc03f524db9cdd60.dir
size: 648730
md5: d148baf508140353d62c16d6ab0fb6b7.dir
size: 469224
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 3c77fa10cd1cd503eb4d2540394629f6.dir
size: 42626894
md5: 3d1144848fce4ce50f6abfaec5235552.dir
size: 46392840
nfiles: 2
params:
configs/settings.yaml:
@ -142,8 +144,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 145e7ac84ab4a4407b23695a632b4d91
size: 226
md5: 196232f94b563ac525cf65ee5cc6d639
size: 222
startup_cleanup:
cmd: python 0_startup_cleanup.py
deps:

View file

@ -1,7 +1,7 @@
joblib==1.3.2
boto3==1.28.17
pandas==1.5.3
autogluon==0.8.2
pandas==2.1.4
autogluon==1.0.0
dynaconf==3.2.0
pyarrow==13.0.0
pre-commit==3.3.3

View file

@ -1,7 +1,7 @@
joblib==1.3.2
boto3==1.28.17
pandas==1.5.3
autogluon==0.8.2
pandas==2.1.4
autogluon==1.0.0
dynaconf==3.2.0
pyarrow==13.0.0
PyYAML==6.0.1

View file

@ -1,9 +1,10 @@
joblib==1.3.2
boto3==1.28.17
pandas==1.5.3
autogluon==0.8.2
pandas==2.1.4
autogluon==1.0.0
ray==2.6.3
dynaconf==3.2.0
alibi==0.9.4
alibi==0.9.5
shap==0.42.1
pyarrow==13.0.0
pre-commit==3.3.3

View file

@ -1,4 +1,4 @@
boto3==1.28.41
pandas==1.5.3
autogluon==0.8.2
pandas==2.1.4
autogluon==1.0.0
dynaconf==3.2.0