diff --git a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml index 4336185..4aeffb7 100644 --- a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml +++ b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml @@ -1,6 +1,6 @@ dataclient_type: minio data_location: s3://dev_bucket -train_proportion: 0.1 +train_proportion: 0.7 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/src/dvc.lock b/modules/ml-pipeline/src/pipeline/src/dvc.lock index 956ef86..71172d8 100644 --- a/modules/ml-pipeline/src/pipeline/src/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/src/dvc.lock @@ -11,12 +11,12 @@ stages: configs/prepare_data.yaml: output_test_filepath: ./data/prepared_data/test.parquet output_train_filepath: ./data/prepared_data/train.parquet - train_proportion: 0.1 + train_proportion: 0.7 outs: - path: data/prepared_data/ hash: md5 - md5: 91a6bd7128a094ad083557b08e2b97e6.dir - size: 34952 + md5: fdb2377affab10953f85fdf280ee897b.dir + size: 36663 nfiles: 2 build_model: cmd: python build_model.py @@ -27,8 +27,8 @@ stages: size: 3576 - path: data/prepared_data hash: md5 - md5: 91a6bd7128a094ad083557b08e2b97e6.dir - size: 34952 + md5: fdb2377affab10953f85fdf280ee897b.dir + size: 36663 nfiles: 2 params: configs/build_model.yaml: @@ -41,7 +41,7 @@ stages: outs: - path: data/model/ hash: md5 - md5: fb041f589f43031bc176c2ca73599950.dir + md5: 7afb942aee47aa6da23f7dec7ce666a8.dir size: 1096 nfiles: 1 generate_predictions: @@ -49,13 +49,13 @@ stages: deps: - path: data/model hash: md5 - md5: fb041f589f43031bc176c2ca73599950.dir + md5: 7afb942aee47aa6da23f7dec7ce666a8.dir size: 1096 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 91a6bd7128a094ad083557b08e2b97e6.dir - size: 34952 + md5: fdb2377affab10953f85fdf280ee897b.dir + size: 36663 nfiles: 2 - path: generate_predictions.py hash: md5 @@ -68,21 +68,21 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: e03eac67fceabe6c3d504446ae32f344.dir - size: 5384 + md5: 339924cbd0435a59be599c06fd2b25e6.dir + size: 2949 nfiles: 1 generate_metrics: cmd: python generate_metrics.py deps: - path: data/predictions hash: md5 - md5: e03eac67fceabe6c3d504446ae32f344.dir - size: 5384 + md5: 339924cbd0435a59be599c06fd2b25e6.dir + size: 2949 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 91a6bd7128a094ad083557b08e2b97e6.dir - size: 34952 + md5: fdb2377affab10953f85fdf280ee897b.dir + size: 36663 nfiles: 2 - path: generate_metrics.py hash: md5 @@ -95,5 +95,5 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 4adf69df1e693cf9ceb7ce68f6311bf6 - size: 181 + md5: 6528bdbe67f968d8eb240533efab48d5 + size: 182