diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index 75ae2be..5e870b8 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -11,6 +11,6 @@ AutogluonAutoML: output_filepath: ./data/model/autogluonmodel/ problem_type: regression eval_metric: mean_absolute_error - time_limit: 400 - presets: good_quality + time_limit: 60 + presets: medium_quality excluded_model_types: ['KNN'] diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py index 91a4815..180d3a9 100644 --- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py @@ -17,3 +17,4 @@ def SAP_ENDING(df): new_feature_funcs = {"SAP_ENDING": SAP_ENDING} +# new_feature_funcs = {} diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 501dc10..9748f15 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -5,8 +5,8 @@ stages: deps: - path: prepare_data.py hash: md5 - md5: 934d774e67f38e440b621ce71152f5f6 - size: 5031 + md5: 2648d7d407dca857a1d20a11a88d3d98 + size: 5116 params: configs/prepare_data.yaml: output_test_filepath: ./data/prepared_data/test.parquet @@ -15,8 +15,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 3767eec56906f5ac724a3f07433645ef.dir - size: 13442342 + md5: ec064b0274e2e6a0864580a748e6bb6a.dir + size: 24067192 nfiles: 2 build_model: cmd: python build_model.py @@ -27,8 +27,8 @@ stages: size: 5134 - path: data/prepared_data hash: md5 - md5: 3767eec56906f5ac724a3f07433645ef.dir - size: 13442342 + md5: ec064b0274e2e6a0864580a748e6bb6a.dir + size: 24067192 nfiles: 2 params: configs/build_model.yaml: @@ -36,8 +36,8 @@ stages: output_filepath: ./data/model/autogluonmodel/ problem_type: regression eval_metric: mean_absolute_error - time_limit: 400 - presets: good_quality + time_limit: 60 + presets: medium_quality excluded_model_types: - KNN SKLearnLinearRegression: @@ -49,25 +49,25 @@ stages: outs: - path: data/model/ hash: md5 - md5: 7b2f8334c81fb5ff23e42e77741b31d1.dir - size: 118227750 - nfiles: 71 + md5: dc73587056f07735719bfee464a5f898.dir + size: 285397707 + nfiles: 16 - path: metrics/fit_metrics.json hash: md5 - md5: e1c9a16617804f48e8ffac7cec6575ca - size: 185 + md5: f6d03cb197a3d78e61f6fef023ed8d7f + size: 184 generate_predictions: cmd: python generate_predictions.py deps: - path: data/model hash: md5 - md5: 7b2f8334c81fb5ff23e42e77741b31d1.dir - size: 118227750 - nfiles: 71 + md5: dc73587056f07735719bfee464a5f898.dir + size: 285397707 + nfiles: 16 - path: data/prepared_data hash: md5 - md5: 3767eec56906f5ac724a3f07433645ef.dir - size: 13442342 + md5: ec064b0274e2e6a0864580a748e6bb6a.dir + size: 24067192 nfiles: 2 - path: generate_predictions.py hash: md5 @@ -83,21 +83,21 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: fb7cf3f4a90598ec1e43a1b7a4af3bef.dir - size: 536774 + md5: 3d70862c00e38f383a06c3e4df5ebb38.dir + size: 386958 nfiles: 1 generate_metrics: cmd: python generate_metrics.py deps: - path: data/predictions hash: md5 - md5: fb7cf3f4a90598ec1e43a1b7a4af3bef.dir - size: 536774 + md5: 3d70862c00e38f383a06c3e4df5ebb38.dir + size: 386958 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 3767eec56906f5ac724a3f07433645ef.dir - size: 13442342 + md5: ec064b0274e2e6a0864580a748e6bb6a.dir + size: 24067192 nfiles: 2 - path: generate_metrics.py hash: md5 @@ -106,14 +106,12 @@ stages: params: configs/generate_metrics.yaml: dataclient_type: local - input_datahandler_type: parquet metrics_output_filepath: ./metrics/metrics.json metrics_type: Regression - output_datahandler_type: json outs: - path: metrics/metrics.json hash: md5 - md5: 852ef4cf2ca5e7f89d70420a9df7a596 + md5: 2507f756ea68768185ebeaf66db2ebbd size: 183 startup_cleanup: cmd: python startup_cleanup.py diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt index e34d5af..d285422 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt @@ -5,5 +5,3 @@ autogluon==0.8.2 alibi==0.9.4 pyarrow==13.0.0 pre-commit==3.3.3 -sphinx==7.2.5 -sphinx_rtd_theme==1.3.0