diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md index 126d696..d838705 100644 --- a/MODEL_REGISTRY.md +++ b/MODEL_REGISTRY.md @@ -8,9 +8,9 @@ "active": true }, "sap": { - "version": "v0.6.0", + "version": "v0.9.0", "stage": { - "dev": "v0.6.0" + "dev": "v0.9.0" }, "registered": true, "active": true diff --git a/modules/ml-pipeline/src/pipeline/analysis/feature_importance.parquet b/modules/ml-pipeline/src/pipeline/analysis/feature_importance.parquet index 6960946..ec8b0d3 100644 Binary files a/modules/ml-pipeline/src/pipeline/analysis/feature_importance.parquet and b/modules/ml-pipeline/src/pipeline/analysis/feature_importance.parquet differ diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index fcec7f7..1acea2a 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -16,6 +16,6 @@ default: eval_metric: mean_squared_error #mean_absolute_error time_limit: 4000 presets: medium_quality - excluded_model_types: ['RF', 'FASTAI', 'CAT', 'NN_TORCH', 'KNN', 'XT'] + excluded_model_types: ['RF', 'NN_TORCH', 'KNN', 'XT', 'CAT', 'FASTAI'] infer_limit: 0.05 infer_limit_batch_size: 10000 diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 19b0a5b..9b24faf 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -22,7 +22,8 @@ default: # data_filepath: s3://retrofit-data-dev/sap_change_model/floor_area_clean_test.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet - data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet + # data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet + data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_rooms.parquet # data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet train_proportion: 1 output_train_filepath: ./data/prepared_data/train.parquet diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 826e654..2f513d4 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -22,7 +22,7 @@ stages: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_type: dataframe - default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet + default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_rooms.parquet default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.output_dataclient_type: local default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet @@ -31,8 +31,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 3c77fa10cd1cd503eb4d2540394629f6.dir - size: 42626894 + md5: 8f0f5481075094460ab852ace2fa9b7a.dir + size: 43692138 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -43,8 +43,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 3c77fa10cd1cd503eb4d2540394629f6.dir - size: 42626894 + md5: 8f0f5481075094460ab852ace2fa9b7a.dir + size: 43692138 nfiles: 2 params: configs/build_model.yaml: @@ -65,28 +65,28 @@ stages: presets: medium_quality excluded_model_types: - RF - - FASTAI - - CAT - NN_TORCH - KNN - XT + - CAT + - FASTAI infer_limit: 0.05 infer_limit_batch_size: 10000 outs: - path: data/fit_predictions/ hash: md5 - md5: e0a11ac6e4adf69d6180c0217c639a0e.dir - size: 3680908 + md5: e2a05a84a14d35516a6cda8e0a1e963c.dir + size: 3681005 nfiles: 1 - path: data/model/ hash: md5 - md5: bdaaf823857f9dc7b6ee2d4b88927cc1.dir - size: 805896324 - nfiles: 31 + md5: 7b0382d001ed2bd7aec5c8112f69d129.dir + size: 793365790 + nfiles: 30 - path: metrics/fit_metrics.json hash: md5 - md5: 0ed5b1141bbb8bc3156e7c056b29f3cd - size: 225 + md5: bcfd8d3bd3af858fa3dc26433bc8cd9e + size: 224 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -96,13 +96,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: bdaaf823857f9dc7b6ee2d4b88927cc1.dir - size: 805896324 - nfiles: 31 + md5: 7b0382d001ed2bd7aec5c8112f69d129.dir + size: 793365790 + nfiles: 30 - path: data/prepared_data hash: md5 - md5: 3c77fa10cd1cd503eb4d2540394629f6.dir - size: 42626894 + md5: 8f0f5481075094460ab852ace2fa9b7a.dir + size: 43692138 nfiles: 2 params: configs/settings.yaml: @@ -114,8 +114,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 38707d16ae1e2330cc03f524db9cdd60.dir - size: 648730 + md5: 90b5275b5d9829a42573ade3f5a025d2.dir + size: 648526 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -126,13 +126,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 38707d16ae1e2330cc03f524db9cdd60.dir - size: 648730 + md5: 90b5275b5d9829a42573ade3f5a025d2.dir + size: 648526 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 3c77fa10cd1cd503eb4d2540394629f6.dir - size: 42626894 + md5: 8f0f5481075094460ab852ace2fa9b7a.dir + size: 43692138 nfiles: 2 params: configs/settings.yaml: @@ -142,7 +142,7 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 145e7ac84ab4a4407b23695a632b4d91 + md5: be48389ba2755e6c18e41243aaa9bb81 size: 226 startup_cleanup: cmd: python 0_startup_cleanup.py