diff --git a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml index 27bedaa..6a777af 100644 --- a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml +++ b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml @@ -1,6 +1,6 @@ dataclient_type: minio data_location: s3://dev_bucket -train_proportion: 0.85 +train_proportion: 0.9 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/src/dvc.lock b/modules/ml-pipeline/src/pipeline/src/dvc.lock index ac079fa..0092858 100644 --- a/modules/ml-pipeline/src/pipeline/src/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/src/dvc.lock @@ -5,18 +5,18 @@ stages: deps: - path: prepare_data.py hash: md5 - md5: 113f292aa8fa1ecec56b21cfc7f657a9 + md5: 38b0836237bfa25ea0d71ca259610f4d size: 3623 params: configs/prepare_data.yaml: output_test_filepath: ./data/prepared_data/test.parquet output_train_filepath: ./data/prepared_data/train.parquet - train_proportion: 0.85 + train_proportion: 0.9 outs: - path: data/prepared_data/ hash: md5 - md5: 57c36f8222034dc253f65000274bbd7b.dir - size: 35596 + md5: b33066afcf18879beb7738317b840957.dir + size: 34895 nfiles: 2 build_model: cmd: python build_model.py @@ -27,8 +27,8 @@ stages: size: 3576 - path: data/prepared_data hash: md5 - md5: 57c36f8222034dc253f65000274bbd7b.dir - size: 35596 + md5: b33066afcf18879beb7738317b840957.dir + size: 34895 nfiles: 2 params: configs/build_model.yaml: @@ -41,7 +41,7 @@ stages: outs: - path: data/model/ hash: md5 - md5: b163a05d5a1a5649caf10e23bd5db8d0.dir + md5: 9e7589253302a779852158756c2a7bb9.dir size: 1096 nfiles: 1 generate_predictions: @@ -49,13 +49,13 @@ stages: deps: - path: data/model hash: md5 - md5: b163a05d5a1a5649caf10e23bd5db8d0.dir + md5: 9e7589253302a779852158756c2a7bb9.dir size: 1096 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 57c36f8222034dc253f65000274bbd7b.dir - size: 35596 + md5: b33066afcf18879beb7738317b840957.dir + size: 34895 nfiles: 2 - path: generate_predictions.py hash: md5 @@ -68,21 +68,21 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 06fd34c447fe9e5b4b9892c57257eb54.dir - size: 2334 + md5: b9c93c8c5fc441bdc0fb96cee19b1871.dir + size: 2121 nfiles: 1 generate_metrics: cmd: python generate_metrics.py deps: - path: data/predictions hash: md5 - md5: 06fd34c447fe9e5b4b9892c57257eb54.dir - size: 2334 + md5: b9c93c8c5fc441bdc0fb96cee19b1871.dir + size: 2121 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 57c36f8222034dc253f65000274bbd7b.dir - size: 35596 + md5: b33066afcf18879beb7738317b840957.dir + size: 34895 nfiles: 2 - path: generate_metrics.py hash: md5 @@ -95,5 +95,5 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 2706ac7ddd35940e95c187fd0178e52c + md5: 310e17ea14d1669035ab3c44c0204f57 size: 183 diff --git a/modules/ml-pipeline/src/pipeline/src/prepare_data.py b/modules/ml-pipeline/src/pipeline/src/prepare_data.py index dca9b57..cb7ed80 100644 --- a/modules/ml-pipeline/src/pipeline/src/prepare_data.py +++ b/modules/ml-pipeline/src/pipeline/src/prepare_data.py @@ -109,4 +109,4 @@ if __name__ == "__main__": logger.info("-------------------------------") logger.info(f"--- {__file__} - Complete! ---") - logger.info("-------------------------===---") + logger.info("-------------------------------")