From b0f12db16d48972870574fbeadf4ddc395e58941 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 10 Sep 2023 20:04:42 +0100 Subject: [PATCH] split out version control system requirements --- .github/workflows/MLPipelinePullRequest.yml | 6 ++-- modules/ml-pipeline/Makefile | 2 +- .../pipeline/src/configs/prepare_data.yaml | 2 +- modules/ml-pipeline/src/pipeline/src/dvc.lock | 34 +++++++++---------- .../predictions/requirements-dev.txt | 3 -- .../training/requirements-dev.txt | 3 -- .../requirements/training/requirements.txt | 2 -- .../version_control/requirements.txt | 3 ++ 8 files changed, 24 insertions(+), 31 deletions(-) create mode 100644 modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt diff --git a/.github/workflows/MLPipelinePullRequest.yml b/.github/workflows/MLPipelinePullRequest.yml index d6af551..186c382 100644 --- a/.github/workflows/MLPipelinePullRequest.yml +++ b/.github/workflows/MLPipelinePullRequest.yml @@ -23,8 +23,7 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} run: | pip install --upgrade pip - pip install dvc dvc_s3 - # pip install -r src/requirements/predictions/requirements-dev.txt + pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt - name: Retrieve artifacts (dvc.lock) env: @@ -52,8 +51,7 @@ jobs: - name: Install packages to retrieve artifacts run: | pip install --upgrade pip - pip install dvc dvc_s3 - # pip install -r src/requirements/predictions/requirements-dev.txt + pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt - name: Retrieve artifacts (dvc.lock) env: diff --git a/modules/ml-pipeline/Makefile b/modules/ml-pipeline/Makefile index c9b0ddf..45a16f5 100644 --- a/modules/ml-pipeline/Makefile +++ b/modules/ml-pipeline/Makefile @@ -11,7 +11,7 @@ dev-pyenv: pyenv install ${PYTHON_VERSION} || echo "Python version already installed" pyenv global ${PYTHON_VERSION} python3 -m venv .dev_env - . .dev_env/bin/activate && pip install --upgrade pip && pip install -r src/pipeline/src/requirements/training/requirements-dev.txt && pre-commit install + . .dev_env/bin/activate && pip install --upgrade pip && pip install -r src/pipeline/src/requirements/training/requirements-dev.txt && pip install -r src/pipeline/src/requirements/version_control/requirements.txt && pre-commit install echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND" echo "source .dev_env/bin/activate" diff --git a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml index 27bedaa..9a0c3bd 100644 --- a/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml +++ b/modules/ml-pipeline/src/pipeline/src/configs/prepare_data.yaml @@ -1,6 +1,6 @@ dataclient_type: minio data_location: s3://dev_bucket -train_proportion: 0.85 +train_proportion: 0.8 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/src/dvc.lock b/modules/ml-pipeline/src/pipeline/src/dvc.lock index 220ef2a..ed7c57c 100644 --- a/modules/ml-pipeline/src/pipeline/src/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/src/dvc.lock @@ -11,12 +11,12 @@ stages: configs/prepare_data.yaml: output_test_filepath: ./data/prepared_data/test.parquet output_train_filepath: ./data/prepared_data/train.parquet - train_proportion: 0.85 + train_proportion: 0.8 outs: - path: data/prepared_data/ hash: md5 - md5: e933c262266b270aa53bf63dc8bf8a64.dir - size: 35580 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 build_model: cmd: python build_model.py @@ -27,8 +27,8 @@ stages: size: 3576 - path: data/prepared_data hash: md5 - md5: e933c262266b270aa53bf63dc8bf8a64.dir - size: 35580 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 params: configs/build_model.yaml: @@ -41,7 +41,7 @@ stages: outs: - path: data/model/ hash: md5 - md5: ee01cc8135569d30b42ef4b7d181548f.dir + md5: fb7ae4137b445dc91e840b794d72e940.dir size: 1096 nfiles: 1 generate_predictions: @@ -49,13 +49,13 @@ stages: deps: - path: data/model hash: md5 - md5: ee01cc8135569d30b42ef4b7d181548f.dir + md5: fb7ae4137b445dc91e840b794d72e940.dir size: 1096 nfiles: 1 - path: data/prepared_data hash: md5 - md5: e933c262266b270aa53bf63dc8bf8a64.dir - size: 35580 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 - path: generate_predictions.py hash: md5 @@ -69,21 +69,21 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: b847bb0e13ae3af76c0d0b66d2bc0bfb.dir - size: 2334 + md5: 4d5854903b25bdae15d99c934ebcfb99.dir + size: 2531 nfiles: 1 generate_metrics: cmd: python generate_metrics.py deps: - path: data/predictions hash: md5 - md5: b847bb0e13ae3af76c0d0b66d2bc0bfb.dir - size: 2334 + md5: 4d5854903b25bdae15d99c934ebcfb99.dir + size: 2531 nfiles: 1 - path: data/prepared_data hash: md5 - md5: e933c262266b270aa53bf63dc8bf8a64.dir - size: 35580 + md5: f0d462fe6b1a856a827409a745539285.dir + size: 36169 nfiles: 2 - path: generate_metrics.py hash: md5 @@ -96,5 +96,5 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: b3a6747855e3da77946292edddb0bf9e - size: 181 + md5: 3c9306e992b07491ff7e642949d6bc47 + size: 182 diff --git a/modules/ml-pipeline/src/pipeline/src/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/src/requirements/predictions/requirements-dev.txt index 28f6af4..5aac406 100644 --- a/modules/ml-pipeline/src/pipeline/src/requirements/predictions/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/src/requirements/predictions/requirements-dev.txt @@ -1,9 +1,6 @@ joblib==1.3.2 boto3==1.28.17 pandas==1.5.3 -dvc==3.18.0 -dvc-s3==2.23.0 -gto==1.0.4 scikit-learn==1.3.0 pyarrow==13.0.0 pre-commit==3.3.3 diff --git a/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt index 28f6af4..5aac406 100644 --- a/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements-dev.txt @@ -1,9 +1,6 @@ joblib==1.3.2 boto3==1.28.17 pandas==1.5.3 -dvc==3.18.0 -dvc-s3==2.23.0 -gto==1.0.4 scikit-learn==1.3.0 pyarrow==13.0.0 pre-commit==3.3.3 diff --git a/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements.txt index 8c146e8..196dfe7 100644 --- a/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/src/requirements/training/requirements.txt @@ -1,5 +1,3 @@ boto3==1.28.41 pandas==1.5.3 -dvc==3.18.0 -gto==1.0.4 scikit-learn==1.3.0 diff --git a/modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt b/modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt new file mode 100644 index 0000000..8459d38 --- /dev/null +++ b/modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt @@ -0,0 +1,3 @@ +dvc==3.18.0 +dvc-s3==2.23.0 +gto==1.0.4