diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index 1ebb62d..4c72487 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -13,7 +13,7 @@ default: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error #mean_absolute_error - time_limit: 4000 + time_limit: 400 presets: medium_quality excluded_model_types: ['KNN', 'RF'] infer_limit: 0.05 diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 13851cf..1b07fdf 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -5,8 +5,8 @@ stages: deps: - path: 1_prepare_data.py hash: md5 - md5: c9f030df733e318b80d1fa91b7732f79 - size: 5132 + md5: 896d3d88a4a9f68d174efe71dc089517 + size: 4222 params: configs/settings.yaml: default.feature_processor.feature_processor_config.drop_columns: @@ -20,7 +20,7 @@ stages: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: HEAT_DEMAND_ENDING default.feature_processor.feature_processor_type: dataframe - default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet + default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.output_dataclient_type: local default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet @@ -29,20 +29,20 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: e0be70d5025e40dd0d655d9949f72130.dir - size: 31800776 + md5: 6f9c63363ad52a836524dbb6fae7a2ac.dir + size: 34480114 nfiles: 2 build_model: cmd: python 2_build_model.py deps: - path: 2_build_model.py hash: md5 - md5: 84699d208874c52accaff61c6af9bb0a - size: 5359 + md5: b824822475c222521516493e68eef9c5 + size: 4149 - path: data/prepared_data hash: md5 - md5: e0be70d5025e40dd0d655d9949f72130.dir - size: 31800776 + md5: 6f9c63363ad52a836524dbb6fae7a2ac.dir + size: 34480114 nfiles: 2 params: configs/build_model.yaml: @@ -58,37 +58,39 @@ stages: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error - time_limit: 4000 + time_limit: 400 presets: medium_quality excluded_model_types: - KNN - RF + infer_limit: 0.05 + infer_limit_batch_size: 10000 outs: - path: data/model/ hash: md5 - md5: 14ca33cde5e86770135f768abaf84978.dir - size: 422447808 - nfiles: 27 + md5: 452eba2d92233e81d321814aacefe5c2.dir + size: 323127043 + nfiles: 24 - path: metrics/fit_metrics.json hash: md5 - md5: 41bfb8d2da8f06d1864d73ce125cc6aa - size: 221 + md5: 888124b56e0c5008a6423e290fc5cc71 + size: 222 generate_predictions: cmd: python 3_generate_predictions.py deps: - path: 3_generate_predictions.py hash: md5 - md5: 5ef2856a5a977304f1ec01f9b4205262 - size: 3028 + md5: 0a70ad4dfe99414a75d1261c75a177b9 + size: 2464 - path: data/model hash: md5 - md5: 14ca33cde5e86770135f768abaf84978.dir - size: 422447808 - nfiles: 27 + md5: 452eba2d92233e81d321814aacefe5c2.dir + size: 323127043 + nfiles: 24 - path: data/prepared_data hash: md5 - md5: e0be70d5025e40dd0d655d9949f72130.dir - size: 31800776 + md5: 6f9c63363ad52a836524dbb6fae7a2ac.dir + size: 34480114 nfiles: 2 params: configs/settings.yaml: @@ -100,25 +102,25 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 40d0c7a7fd4a15add0615e322cf341a0.dir - size: 352151 + md5: f852550a0a51f0c2b120b0680c1a9b54.dir + size: 325890 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py deps: - path: 4_generate_metrics.py hash: md5 - md5: 2c9fb78955a8c19cff0a098976f81d1b - size: 4487 + md5: 567b1acb819e2ff432b989cdbdd4a2bf + size: 3448 - path: data/predictions hash: md5 - md5: 40d0c7a7fd4a15add0615e322cf341a0.dir - size: 352151 + md5: f852550a0a51f0c2b120b0680c1a9b54.dir + size: 325890 nfiles: 1 - path: data/prepared_data hash: md5 - md5: e0be70d5025e40dd0d655d9949f72130.dir - size: 31800776 + md5: 6f9c63363ad52a836524dbb6fae7a2ac.dir + size: 34480114 nfiles: 2 params: configs/settings.yaml: @@ -128,15 +130,15 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 4e023650240e78d6ad761f1db7aac922 - size: 220 + md5: ed3012943593fac4ac7ad9a5499ac18f + size: 219 startup_cleanup: cmd: python 0_startup_cleanup.py deps: - path: 0_startup_cleanup.py hash: md5 - md5: fbb7e3b1b98b517c870f3e1df3e7f695 - size: 1676 + md5: b1b12f6b6393fbf8b83d23684df0a3d4 + size: 1220 params: configs/settings.yaml: default.startup_cleanup.artefacts: ./data