diff --git a/modules/ml-pipeline/src/pipeline/README.md b/modules/ml-pipeline/src/pipeline/README.md index d47f864..d44e220 100644 --- a/modules/ml-pipeline/src/pipeline/README.md +++ b/modules/ml-pipeline/src/pipeline/README.md @@ -37,3 +37,4 @@ Workflow: - This experiment will have the corresponding .dvc files for the hashed model and data - Use version control as normal - git add, git commit etc +- To revert change, use `git checkout {COMMIT_HASH}`, followed by `git switch -c {NEW_BRANCH_NAME}` diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index 1acea2a..fcec7f7 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -16,6 +16,6 @@ default: eval_metric: mean_squared_error #mean_absolute_error time_limit: 4000 presets: medium_quality - excluded_model_types: ['RF', 'NN_TORCH', 'KNN', 'XT', 'CAT', 'FASTAI'] + excluded_model_types: ['RF', 'FASTAI', 'CAT', 'NN_TORCH', 'KNN', 'XT'] infer_limit: 0.05 infer_limit_batch_size: 10000 diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 9b24faf..19b0a5b 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -22,8 +22,7 @@ default: # data_filepath: s3://retrofit-data-dev/sap_change_model/floor_area_clean_test.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet - # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet - data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_rooms.parquet + data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet # data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet train_proportion: 1 output_train_filepath: ./data/prepared_data/train.parquet diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 2f513d4..826e654 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -22,7 +22,7 @@ stages: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_type: dataframe - default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_rooms.parquet + default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.output_dataclient_type: local default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet @@ -31,8 +31,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 8f0f5481075094460ab852ace2fa9b7a.dir - size: 43692138 + md5: 3c77fa10cd1cd503eb4d2540394629f6.dir + size: 42626894 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -43,8 +43,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 8f0f5481075094460ab852ace2fa9b7a.dir - size: 43692138 + md5: 3c77fa10cd1cd503eb4d2540394629f6.dir + size: 42626894 nfiles: 2 params: configs/build_model.yaml: @@ -65,28 +65,28 @@ stages: presets: medium_quality excluded_model_types: - RF + - FASTAI + - CAT - NN_TORCH - KNN - XT - - CAT - - FASTAI infer_limit: 0.05 infer_limit_batch_size: 10000 outs: - path: data/fit_predictions/ hash: md5 - md5: e2a05a84a14d35516a6cda8e0a1e963c.dir - size: 3681005 + md5: e0a11ac6e4adf69d6180c0217c639a0e.dir + size: 3680908 nfiles: 1 - path: data/model/ hash: md5 - md5: 7b0382d001ed2bd7aec5c8112f69d129.dir - size: 793365790 - nfiles: 30 + md5: bdaaf823857f9dc7b6ee2d4b88927cc1.dir + size: 805896324 + nfiles: 31 - path: metrics/fit_metrics.json hash: md5 - md5: bcfd8d3bd3af858fa3dc26433bc8cd9e - size: 224 + md5: 0ed5b1141bbb8bc3156e7c056b29f3cd + size: 225 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -96,13 +96,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 7b0382d001ed2bd7aec5c8112f69d129.dir - size: 793365790 - nfiles: 30 + md5: bdaaf823857f9dc7b6ee2d4b88927cc1.dir + size: 805896324 + nfiles: 31 - path: data/prepared_data hash: md5 - md5: 8f0f5481075094460ab852ace2fa9b7a.dir - size: 43692138 + md5: 3c77fa10cd1cd503eb4d2540394629f6.dir + size: 42626894 nfiles: 2 params: configs/settings.yaml: @@ -114,8 +114,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 90b5275b5d9829a42573ade3f5a025d2.dir - size: 648526 + md5: 38707d16ae1e2330cc03f524db9cdd60.dir + size: 648730 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -126,13 +126,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 90b5275b5d9829a42573ade3f5a025d2.dir - size: 648526 + md5: 38707d16ae1e2330cc03f524db9cdd60.dir + size: 648730 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 8f0f5481075094460ab852ace2fa9b7a.dir - size: 43692138 + md5: 3c77fa10cd1cd503eb4d2540394629f6.dir + size: 42626894 nfiles: 2 params: configs/settings.yaml: @@ -142,7 +142,7 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: be48389ba2755e6c18e41243aaa9bb81 + md5: 145e7ac84ab4a4407b23695a632b4d91 size: 226 startup_cleanup: cmd: python 0_startup_cleanup.py