From f92c97f6cfb832feff1fee4a77a484251bf3f81f Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 9 Feb 2024 16:19:47 +0000 Subject: [PATCH] drop days_starting and days_ending --- .../src/pipeline/configs/build_model.yaml | 2 +- .../src/pipeline/configs/settings.yaml | 2 +- modules/ml-pipeline/src/pipeline/dvc.lock | 56 ++++++++++--------- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index fcec7f7..f4770a7 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -14,7 +14,7 @@ default: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error #mean_absolute_error - time_limit: 4000 + time_limit: 400 presets: medium_quality excluded_model_types: ['RF', 'FASTAI', 'CAT', 'NN_TORCH', 'KNN', 'XT'] infer_limit: 0.05 diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 4327e64..98cf6dc 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -35,7 +35,7 @@ default: subsample_seed: 0 target: sap_ending identifier_columns: ["uprn"] - drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending"] + drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"] # retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"] retain_features: null diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index f15978f..9bb73b8 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -5,8 +5,8 @@ stages: deps: - path: 1_prepare_data.py hash: md5 - md5: 1793a35e71751d3c84f9affc67ecb9a8 - size: 4296 + md5: 11a3b8bfdfe199ab7ecc39ccc5652649 + size: 4298 params: configs/settings.yaml: default.feature_processor.feature_processor_config.drop_columns: @@ -15,6 +15,8 @@ stages: - rdsap_change - heat_demand_ending - carbon_ending + - days_to_starting + - days_to_ending default.feature_processor.feature_processor_config.retain_features: default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 @@ -29,8 +31,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 84fa631bd02686b052d6a7144eafd38e.dir - size: 43859225 + md5: f85c36a5dfd31a897538b3934d5fb997.dir + size: 41375196 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -41,8 +43,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 84fa631bd02686b052d6a7144eafd38e.dir - size: 43859225 + md5: f85c36a5dfd31a897538b3934d5fb997.dir + size: 41375196 nfiles: 2 params: configs/build_model.yaml: @@ -59,7 +61,7 @@ stages: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error - time_limit: 4000 + time_limit: 400 presets: medium_quality excluded_model_types: - RF @@ -73,18 +75,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: ede187e9d0bffdef054f573f3c2bd222.dir - size: 3578590 + md5: 991e6c55826953aa7c2be573369ec96f.dir + size: 3574047 nfiles: 1 - path: data/model/ hash: md5 - md5: b2ad0b538dc4aef0de3d431fc9c40c4f.dir - size: 814720415 - nfiles: 31 + md5: f8a8b7462831bd46b1e2df47d73bb69d.dir + size: 391430703 + nfiles: 23 - path: metrics/fit_metrics.json hash: md5 - md5: c45b84f12971a0156e4f3d85d3e725f5 - size: 218 + md5: 35a66a845854cc6fee9dd10860e216bb + size: 225 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -94,13 +96,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: b2ad0b538dc4aef0de3d431fc9c40c4f.dir - size: 814720415 - nfiles: 31 + md5: f8a8b7462831bd46b1e2df47d73bb69d.dir + size: 391430703 + nfiles: 23 - path: data/prepared_data hash: md5 - md5: 84fa631bd02686b052d6a7144eafd38e.dir - size: 43859225 + md5: f85c36a5dfd31a897538b3934d5fb997.dir + size: 41375196 nfiles: 2 params: configs/settings.yaml: @@ -112,8 +114,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 5e60ca251af51de6fef3d0c659f8bb27.dir - size: 627416 + md5: 94b7381ac318b1ca18e0bc086778f7ce.dir + size: 626160 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -124,13 +126,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 5e60ca251af51de6fef3d0c659f8bb27.dir - size: 627416 + md5: 94b7381ac318b1ca18e0bc086778f7ce.dir + size: 626160 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 84fa631bd02686b052d6a7144eafd38e.dir - size: 43859225 + md5: f85c36a5dfd31a897538b3934d5fb997.dir + size: 41375196 nfiles: 2 params: configs/settings.yaml: @@ -140,8 +142,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 033efa4d4044b6b6fc92dd37194727fa - size: 225 + md5: 4d8681f7c0f41f97be52d6b1ae039c5b + size: 224 startup_cleanup: cmd: python 0_startup_cleanup.py deps: