From 74bce78d2e5bf2ddb7f339c9fe1bc0812acb7fb5 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Mon, 11 Sep 2023 16:57:44 +0100 Subject: [PATCH] better model --- .../src/pipeline/src/.vscode/settings.json | 1 + .../pipeline/src/configs/prepare_data.yaml | 2 +- modules/ml-pipeline/src/pipeline/src/dvc.lock | 34 +++++++++---------- 3 files changed, 19 insertions(+), 18 deletions(-) create mode 100644 modules/ml-pipeline/src/pipeline/src/.vscode/settings.json diff --git a/modules/ml-pipeline/src/pipeline/src/.vscode/settings.json b/modules/ml-pipeline/src/pipeline/src/.vscode/settings.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/modules/ml-pipeline/src/pipeline/src/.vscode/settings.json @@ -0,0 +1 @@ +{} diff --git a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml index 9d084d1..9a0c3bd 100644 --- a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml +++ b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml @@ -1,6 +1,6 @@ dataclient_type: minio data_location: s3://dev_bucket -train_proportion: 0.4 +train_proportion: 0.8 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/src/dvc.lock b/modules/ml-pipeline/src/pipeline/src/dvc.lock index 9a1826b..ed7c57c 100644 --- a/modules/ml-pipeline/src/pipeline/src/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/src/dvc.lock @@ -11,12 +11,12 @@ stages: configs/prepare_data.yaml: output_test_filepath: ./data/prepared_data/test.parquet output_train_filepath: ./data/prepared_data/train.parquet - train_proportion: 0.4 + train_proportion: 0.8 outs: - path: data/prepared_data/ hash: md5 - md5: 2c91bf9e7db44bddf119efd1e56ade44.dir - size: 37095 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 build_model: cmd: python build_model.py @@ -27,8 +27,8 @@ stages: size: 3576 - path: data/prepared_data hash: md5 - md5: 2c91bf9e7db44bddf119efd1e56ade44.dir - size: 37095 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 params: configs/build_model.yaml: @@ -41,7 +41,7 @@ stages: outs: - path: data/model/ hash: md5 - md5: 2d500f958c97a8509fc37e94ab7c01df.dir + md5: fb7ae4137b445dc91e840b794d72e940.dir size: 1096 nfiles: 1 generate_predictions: @@ -49,13 +49,13 @@ stages: deps: - path: data/model hash: md5 - md5: 2d500f958c97a8509fc37e94ab7c01df.dir + md5: fb7ae4137b445dc91e840b794d72e940.dir size: 1096 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 2c91bf9e7db44bddf119efd1e56ade44.dir - size: 37095 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 - path: generate_predictions.py hash: md5 @@ -69,21 +69,21 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: deb4baede52e3c3d968b7737390a4f03.dir - size: 4184 + md5: 4d5854903b25bdae15d99c934ebcfb99.dir + size: 2531 nfiles: 1 generate_metrics: cmd: python generate_metrics.py deps: - path: data/predictions hash: md5 - md5: deb4baede52e3c3d968b7737390a4f03.dir - size: 4184 + md5: 4d5854903b25bdae15d99c934ebcfb99.dir + size: 2531 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 2c91bf9e7db44bddf119efd1e56ade44.dir - size: 37095 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 - path: generate_metrics.py hash: md5 @@ -96,5 +96,5 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 448cfa9c5f954b50fd944b8e322534af - size: 181 + md5: 3c9306e992b07491ff7e642949d6bc47 + size: 182