diff --git a/modules/ml-pipeline/src/pipeline/src/.dvc/config b/modules/ml-pipeline/src/pipeline/src/.dvc/config index 03ccfbc..c07cce5 100644 --- a/modules/ml-pipeline/src/pipeline/src/.dvc/config +++ b/modules/ml-pipeline/src/pipeline/src/.dvc/config @@ -1,2 +1,12 @@ -['remote "myremote"'] +[core] + remote = experiments +['remote "localremote"'] url = /tmp/dvcstore +['remote "experiments"'] + url = s3://retrofit-model-directory-dev/pipeline/experiments +['remote "dev"'] + url = s3://retrofit-model-directory-dev/pipeline/model_directory +['remote "staging"'] + url = s3://retrofit-model-directory-staging/pipeline/model_directory +['remote "prod"'] + url = s3://retrofit-model-directory-prod/pipeline/model_directory diff --git a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml index c17b906..27bedaa 100644 --- a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml +++ b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml @@ -1,6 +1,6 @@ dataclient_type: minio data_location: s3://dev_bucket -train_proportion: 0.2 +train_proportion: 0.85 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/src/dvc.lock b/modules/ml-pipeline/src/pipeline/src/dvc.lock index b4026e5..ac079fa 100644 --- a/modules/ml-pipeline/src/pipeline/src/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/src/dvc.lock @@ -11,12 +11,12 @@ stages: configs/prepare_data.yaml: output_test_filepath: ./data/prepared_data/test.parquet output_train_filepath: ./data/prepared_data/train.parquet - train_proportion: 0.2 + train_proportion: 0.85 outs: - path: data/prepared_data/ hash: md5 - md5: b9dd6ec988430bd4d4ec08824bf43b70.dir - size: 36061 + md5: 57c36f8222034dc253f65000274bbd7b.dir + size: 35596 nfiles: 2 build_model: cmd: python build_model.py @@ -27,8 +27,8 @@ stages: size: 3576 - path: data/prepared_data hash: md5 - md5: b9dd6ec988430bd4d4ec08824bf43b70.dir - size: 36061 + md5: 57c36f8222034dc253f65000274bbd7b.dir + size: 35596 nfiles: 2 params: configs/build_model.yaml: @@ -41,7 +41,7 @@ stages: outs: - path: data/model/ hash: md5 - md5: 2d4977388d8acf7a99bd6351ddb03d71.dir + md5: b163a05d5a1a5649caf10e23bd5db8d0.dir size: 1096 nfiles: 1 generate_predictions: @@ -49,13 +49,13 @@ stages: deps: - path: data/model hash: md5 - md5: 2d4977388d8acf7a99bd6351ddb03d71.dir + md5: b163a05d5a1a5649caf10e23bd5db8d0.dir size: 1096 nfiles: 1 - path: data/prepared_data hash: md5 - md5: b9dd6ec988430bd4d4ec08824bf43b70.dir - size: 36061 + md5: 57c36f8222034dc253f65000274bbd7b.dir + size: 35596 nfiles: 2 - path: generate_predictions.py hash: md5 @@ -68,21 +68,21 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 25c8d2bd3ce9d1ab09c68aef722babad.dir - size: 4987 + md5: 06fd34c447fe9e5b4b9892c57257eb54.dir + size: 2334 nfiles: 1 generate_metrics: cmd: python generate_metrics.py deps: - path: data/predictions hash: md5 - md5: 25c8d2bd3ce9d1ab09c68aef722babad.dir - size: 4987 + md5: 06fd34c447fe9e5b4b9892c57257eb54.dir + size: 2334 nfiles: 1 - path: data/prepared_data hash: md5 - md5: b9dd6ec988430bd4d4ec08824bf43b70.dir - size: 36061 + md5: 57c36f8222034dc253f65000274bbd7b.dir + size: 35596 nfiles: 2 - path: generate_metrics.py hash: md5 @@ -95,5 +95,5 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: e7c575cadca7fe73d9b7659b71457815 - size: 182 + md5: 2706ac7ddd35940e95c187fd0178e52c + size: 183 diff --git a/modules/ml-pipeline/src/pipeline/src/metrics/.gitignore b/modules/ml-pipeline/src/pipeline/src/metrics/.gitignore index e69de29..fbadd1c 100644 --- a/modules/ml-pipeline/src/pipeline/src/metrics/.gitignore +++ b/modules/ml-pipeline/src/pipeline/src/metrics/.gitignore @@ -0,0 +1 @@ +/metrics.json diff --git a/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt index f15e78f..28f6af4 100644 --- a/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt @@ -1,7 +1,8 @@ joblib==1.3.2 -boto3==1.28.41 +boto3==1.28.17 pandas==1.5.3 dvc==3.18.0 +dvc-s3==2.23.0 gto==1.0.4 scikit-learn==1.3.0 pyarrow==13.0.0