From 51b704972058d3a14c12ac7cfd6dbaf04b03de26 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 3 Oct 2023 23:46:37 +0000 Subject: [PATCH] add optimised model --- modules/ml-pipeline/src/pipeline/2_build_model.py | 4 +--- modules/ml-pipeline/src/pipeline/configs/build_model.yaml | 6 +++--- modules/ml-pipeline/src/pipeline/core/MLModels.py | 8 ++++++-- modules/ml-pipeline/src/pipeline/dvc.lock | 5 ++--- modules/ml-pipeline/src/pipeline/metrics/.gitignore | 2 -- 5 files changed, 12 insertions(+), 13 deletions(-) delete mode 100644 modules/ml-pipeline/src/pipeline/metrics/.gitignore diff --git a/modules/ml-pipeline/src/pipeline/2_build_model.py b/modules/ml-pipeline/src/pipeline/2_build_model.py index 44d72cd..cae5cfd 100644 --- a/modules/ml-pipeline/src/pipeline/2_build_model.py +++ b/modules/ml-pipeline/src/pipeline/2_build_model.py @@ -93,10 +93,8 @@ def build_model( logger.info("--- Training model ---") logger.info("----------------------") - model_train_data = train_data.drop(columns=identifier_columns) - model.train_model( - data=model_train_data, + data=train_data.drop(columns=identifier_columns), target=target, model_hyperparameters=model_hyperparameters, ) diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index a5eb62a..d296e6a 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -1,7 +1,7 @@ default: build_model: model_type: AutogluonAutoML - model_save_filepath: ./data/model/autogluonmodel/ + model_save_filepath: ./data/model/optimised/ fit_metrics_filepath: ./metrics/fit_metrics.json SKLearnLinearRegression: null @@ -10,9 +10,9 @@ default: kernel: "linear" AutogluonAutoML: - output_filepath: ./data/model/autogluonmodel/ + output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error #mean_absolute_error - time_limit: 1000 + time_limit: 4000 presets: medium_quality excluded_model_types: ['KNN', 'RF'] diff --git a/modules/ml-pipeline/src/pipeline/core/MLModels.py b/modules/ml-pipeline/src/pipeline/core/MLModels.py index 4d9a9e9..4cf8b08 100644 --- a/modules/ml-pipeline/src/pipeline/core/MLModels.py +++ b/modules/ml-pipeline/src/pipeline/core/MLModels.py @@ -165,8 +165,12 @@ class AutogluonAutoML: if self.model is None: raise KeyError("No model trained/ loaded - unable to save") - logger.info("In local development mode - no need for s3 client") - logger.info("Using AutoGluon Model - Model saving already occured") + logger.info( + "Using AutoGluon Model - Model saving is using optimised deployment mode" + ) + + logger.info("Saving optimised model") + self.model.clone_for_deployment(str(path)) return str(path) diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 1cc260c..51fcfbe 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -10,7 +10,6 @@ stages: params: configs/settings.yaml: default.feature_processor.feature_processor_config.drop_columns: - - UPRN - HEAT_DEMAND_CHANGE - CARBON_CHANGE - RDSAP_CHANGE @@ -30,8 +29,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 951ad046d4fca2b977a314f9520e8235.dir - size: 28249626 + md5: 26f4de635a4c10cd4a07df1ffb4bbb2c.dir + size: 33889000 nfiles: 2 build_model: cmd: python 2_build_model.py diff --git a/modules/ml-pipeline/src/pipeline/metrics/.gitignore b/modules/ml-pipeline/src/pipeline/metrics/.gitignore deleted file mode 100644 index e6fbc8d..0000000 --- a/modules/ml-pipeline/src/pipeline/metrics/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/fit_metrics.json -/metrics.json