From 78bf0a490dbf9bc281cf773f0d2317ea381ac786 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Wed, 27 Mar 2024 23:43:07 +0000 Subject: [PATCH] use 0.9 training data --- .../src/pipeline/configs/settings.yaml | 2 +- modules/ml-pipeline/src/pipeline/dvc.lock | 50 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index fecdcb0..66f7089 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -19,7 +19,7 @@ default: input_dataclient_type: aws-s3 output_dataclient_type: local data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet - train_proportion: 1 + train_proportion: 0.9 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 81224d8..ef40a2d 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -45,12 +45,12 @@ stages: default.prepare_data.output_dataclient_type: local default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet - default.prepare_data.train_proportion: 1 + default.prepare_data.train_proportion: 0.9 outs: - path: data/prepared_data/ hash: md5 - md5: 35d7daa8144434e188ba3b1da4bcf328.dir - size: 33946500 + md5: 824541f44e6538d2ef10e9d754c79743.dir + size: 36691842 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -61,8 +61,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 35d7daa8144434e188ba3b1da4bcf328.dir - size: 33946500 + md5: 824541f44e6538d2ef10e9d754c79743.dir + size: 36691842 nfiles: 2 params: configs/build_model.yaml: @@ -95,18 +95,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 19d033f5abfa9b064c3e52815e607ced.dir - size: 3927492 + md5: 5a3091120d3497fa00b994d91bc7e5eb.dir + size: 3664806 nfiles: 1 - path: data/model/ hash: md5 - md5: f159d40353b01ffdcf1b1b490c019f1f.dir - size: 787748148 - nfiles: 32 + md5: 074da8dcfa515b9f3d082b21c7d76616.dir + size: 721558897 + nfiles: 31 - path: metrics/fit_metrics.json hash: md5 - md5: e69d56ab9d82f23f2aa66001bd9bebbc - size: 229 + md5: 728a49dcef5a98182325df455f929a33 + size: 225 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -116,13 +116,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: f159d40353b01ffdcf1b1b490c019f1f.dir - size: 787748148 - nfiles: 32 + md5: 074da8dcfa515b9f3d082b21c7d76616.dir + size: 721558897 + nfiles: 31 - path: data/prepared_data hash: md5 - md5: 35d7daa8144434e188ba3b1da4bcf328.dir - size: 33946500 + md5: 824541f44e6538d2ef10e9d754c79743.dir + size: 36691842 nfiles: 2 params: configs/settings.yaml: @@ -134,8 +134,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 50d0c76fc56c6290babeff1c84750316.dir - size: 651956 + md5: 680f51234d214d4cab9e6a064c75fc5d.dir + size: 499546 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -146,13 +146,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 50d0c76fc56c6290babeff1c84750316.dir - size: 651956 + md5: 680f51234d214d4cab9e6a064c75fc5d.dir + size: 499546 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 35d7daa8144434e188ba3b1da4bcf328.dir - size: 33946500 + md5: 824541f44e6538d2ef10e9d754c79743.dir + size: 36691842 nfiles: 2 params: configs/settings.yaml: @@ -162,5 +162,5 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 542b982d6aa9fe0fdb89611e4299cb1e - size: 228 + md5: 67b7ab30a4b0839d20bc6eb0c84e4dd1 + size: 226