From 6220cd17d3e8f8b961d694c1488a1f9aa9706262 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Mon, 3 Nov 2025 14:43:26 +0000 Subject: [PATCH] faster inference --- .../src/pipeline/configs/build_model.yaml | 8 +-- modules/ml-pipeline/src/pipeline/dvc.lock | 57 +++++++++---------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index 50122ee..a03f430 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -17,15 +17,15 @@ default: time_limit: 1800 presets: medium_quality excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT'] - infer_limit: 0.001 + infer_limit: 0.0005 infer_limit_batch_size: 10000 - fit_strategy: "sequential" + fit_strategy: "parallel" ag_args_ensemble: {'num_folds_parallel': 2} - num_gpus: auto + num_gpus: 0 hyperparameters: { 'NN_TORCH': [{}], - 'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0, 'hyperparameter_tune_kwargs': 'auto'}}], + 'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, {'learning_rate': 0.03, 'num_leaves': 128, 'feature_fraction': 0.9, 'min_data_in_leaf': 3, 'ag_args': {'name_suffix': 'Large', 'priority': 0,}}], # 'GBM': [{}], 'CAT': [{}], 'XGB': [{}], diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index cffd1b3..673285c 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -61,8 +61,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: ba409a8c79863ddc407786b7aa7a053a.dir - size: 46113237 + md5: bdf32829fa7d4084293e54f73084a25c.dir + size: 46126494 nfiles: 3 build_model: cmd: python 2_build_model.py @@ -73,8 +73,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: ba409a8c79863ddc407786b7aa7a053a.dir - size: 46113237 + md5: bdf32829fa7d4084293e54f73084a25c.dir + size: 46126494 nfiles: 3 params: configs/build_model.yaml: @@ -99,12 +99,12 @@ stages: - NN_TORCH - KNN - XT - infer_limit: 0.001 + infer_limit: 0.0005 infer_limit_batch_size: 10000 - fit_strategy: sequential + fit_strategy: parallel ag_args_ensemble: num_folds_parallel: 2 - num_gpus: auto + num_gpus: 0 hyperparameters: NN_TORCH: - {} @@ -120,7 +120,6 @@ stages: ag_args: name_suffix: Large priority: 0 - hyperparameter_tune_kwargs: auto CAT: - {} XGB: @@ -175,18 +174,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: a9361ab31ff8fc08c3e5e3b96cec06d4.dir - size: 3474690 + md5: bb3a39098652aee04227b8157cffe6dc.dir + size: 3475209 nfiles: 1 - path: data/model/ hash: md5 - md5: 19019e558886b1acd6d29442a47243d0.dir - size: 761937021 - nfiles: 34 + md5: 057ecca4ca9b6cabc60d81b36f5efee1.dir + size: 719633310 + nfiles: 33 - path: metrics/fit_metrics.json hash: md5 - md5: 3af168aedf1f81a22024bb8c815f5d12 - size: 221 + md5: 5b63b6c51867279dea639df1c50b4416 + size: 225 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -196,13 +195,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 19019e558886b1acd6d29442a47243d0.dir - size: 761937021 - nfiles: 34 + md5: 057ecca4ca9b6cabc60d81b36f5efee1.dir + size: 719633310 + nfiles: 33 - path: data/prepared_data hash: md5 - md5: ba409a8c79863ddc407786b7aa7a053a.dir - size: 46113237 + md5: bdf32829fa7d4084293e54f73084a25c.dir + size: 46126494 nfiles: 3 params: configs/settings.yaml: @@ -216,8 +215,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: a9f32d70a4817df8092e52c5513a445f.dir - size: 484694 + md5: 76dd44be410b11c62f701c645a9003a9.dir + size: 484698 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -228,13 +227,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: a9f32d70a4817df8092e52c5513a445f.dir - size: 484694 + md5: 76dd44be410b11c62f701c645a9003a9.dir + size: 484698 nfiles: 1 - path: data/prepared_data hash: md5 - md5: ba409a8c79863ddc407786b7aa7a053a.dir - size: 46113237 + md5: bdf32829fa7d4084293e54f73084a25c.dir + size: 46126494 nfiles: 3 params: configs/settings.yaml: @@ -244,7 +243,7 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 736ef69da7edb94577139ae9ede5ac0d + md5: de4098ee21d5efbf8b769471ae2fa20f size: 224 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py @@ -265,9 +264,9 @@ stages: outs: - path: metrics/scenario_metrics.md hash: md5 - md5: adcc78833e7a0824ecb10ad78a646ea8 + md5: d4f3c1eeee96720270e2467fc50b0b29 size: 356 - path: metrics/scenario_table.md hash: md5 - md5: 35e704d0499e943c4110f66f1482d2ec + md5: 56e39ca1f444b0a97ce894cf9ff90d32 size: 872