From 680e879503c5b4dafb4702a1ad9cb2391cb500fa Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Wed, 11 Oct 2023 15:38:55 +0000 Subject: [PATCH] 400 second model --- .../src/pipeline/configs/build_model.yaml | 2 +- .../pipeline/configs/post_prediction_logic.py | 13 +++--- modules/ml-pipeline/src/pipeline/dvc.lock | 46 +++++++++---------- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index ee7394e..7e409bf 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -13,6 +13,6 @@ default: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error #mean_absolute_error - time_limit: 60 + time_limit: 400 presets: medium_quality excluded_model_types: ['KNN', 'RF'] diff --git a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py index bb36713..1ffab90 100644 --- a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py @@ -5,19 +5,18 @@ import pandas as pd def clip_predictions_to_minimum_value( - data: pd.DataFrame, predictions: pd.Series, minimum_value: int = 1 + data: pd.DataFrame, + predictions: pd.Series, ) -> pd.Series: series_name = predictions.name predictions.name = "predictions" predictions_df = pd.concat([data, predictions], axis=1) # We expect all prediction to be atleast one point improvement - replace_index = ( - predictions_df["predictions"] > predictions_df["CARBON_STARTING"] - 1 - ) - predictions_df.loc[replace_index, "predictions"] = ( - predictions_df.loc[replace_index, "CARBON_STARTING"] - minimum_value - ) + replace_index = predictions_df["predictions"] > predictions_df["CARBON_STARTING"] + predictions_df.loc[replace_index, "predictions"] = predictions_df.loc[ + replace_index, "CARBON_STARTING" + ] predictions_new = predictions_df["predictions"] predictions_new.name = series_name diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index d2291d8..2056834 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -29,8 +29,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: cd9a3d5e6208c1fd5de513b4d5c51b5b.dir - size: 30121189 + md5: 5fd3c01804ee2994ee77fc501d178be4.dir + size: 30137355 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -41,8 +41,8 @@ stages: size: 5359 - path: data/prepared_data hash: md5 - md5: cd9a3d5e6208c1fd5de513b4d5c51b5b.dir - size: 30121189 + md5: 5fd3c01804ee2994ee77fc501d178be4.dir + size: 30137355 nfiles: 2 params: configs/build_model.yaml: @@ -58,7 +58,7 @@ stages: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error - time_limit: 60 + time_limit: 400 presets: medium_quality excluded_model_types: - KNN @@ -66,13 +66,13 @@ stages: outs: - path: data/model/ hash: md5 - md5: 0d43e4ac3985da215dadf5fed8e68200.dir - size: 210841782 - nfiles: 21 + md5: 4b49c12395a645e35e50a9de8840f08d.dir + size: 282024140 + nfiles: 24 - path: metrics/fit_metrics.json hash: md5 - md5: 06f8bb0d004b91c33493dbee9a8763e7 - size: 206 + md5: a6d139fa59f5ddf75023bb7d3364f6d2 + size: 225 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -82,13 +82,13 @@ stages: size: 3028 - path: data/model hash: md5 - md5: 0d43e4ac3985da215dadf5fed8e68200.dir - size: 210841782 - nfiles: 21 + md5: 4b49c12395a645e35e50a9de8840f08d.dir + size: 282024140 + nfiles: 24 - path: data/prepared_data hash: md5 - md5: cd9a3d5e6208c1fd5de513b4d5c51b5b.dir - size: 30121189 + md5: 5fd3c01804ee2994ee77fc501d178be4.dir + size: 30137355 nfiles: 2 params: configs/settings.yaml: @@ -100,8 +100,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: acdcb06ee7574672b1148c10c37a868b.dir - size: 275959 + md5: 8f724261b3d17bf87067e91a1ff99077.dir + size: 441423 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -112,13 +112,13 @@ stages: size: 4487 - path: data/predictions hash: md5 - md5: acdcb06ee7574672b1148c10c37a868b.dir - size: 275959 + md5: 8f724261b3d17bf87067e91a1ff99077.dir + size: 441423 nfiles: 1 - path: data/prepared_data hash: md5 - md5: cd9a3d5e6208c1fd5de513b4d5c51b5b.dir - size: 30121189 + md5: 5fd3c01804ee2994ee77fc501d178be4.dir + size: 30137355 nfiles: 2 params: configs/settings.yaml: @@ -128,8 +128,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: e3bdc173023a7d909704f0313aa1609f - size: 219 + md5: 38787835f838f65c6cc75654843eb311 + size: 223 startup_cleanup: cmd: python 0_startup_cleanup.py deps: