From cc2079403af533a066889f044b8af4eaea9becae Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 12 Sep 2023 23:29:32 +0100 Subject: [PATCH] test tag and push --- .github/workflows/MLPipelinePostMerge.yml | 172 +++++++++--------- .../src/pipeline/src/prepare_data.py | 20 +- 2 files changed, 100 insertions(+), 92 deletions(-) diff --git a/.github/workflows/MLPipelinePostMerge.yml b/.github/workflows/MLPipelinePostMerge.yml index b43007d..97a8c2d 100644 --- a/.github/workflows/MLPipelinePostMerge.yml +++ b/.github/workflows/MLPipelinePostMerge.yml @@ -1,58 +1,95 @@ name: Register the model for the given pipeline branch -# on: -# push: -# branches: -# - "model-**" - on: - pull_request: - types: - - closed + push: branches: - - "master" + - "model-**" + +# on: +# pull_request: +# types: +# - closed +# branches: +# - "master" permissions: write-all jobs: - Promote-Model-To-Dev: - if: github.event.pull_request.merged == true - runs-on: ubuntu-latest + # Promote-Model-To-Dev: + # if: github.event.pull_request.merged == true + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v3 + # - name: Install packages to retrieve artifacts + # run: | + # pip install --upgrade pip + # pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt + + # - name: Retrieve artifacts (dvc.lock) + # env: + # AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} + # run: | + # cd modules/ml-pipeline/src/pipeline/src + # dvc pull -r experiments + + # - name: Push artifacts to Dev + # env: + # AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} + # run: | + # cd modules/ml-pipeline/src/pipeline/src + # dvc push -r dev + + + Register-New-Model-Dev: + # if: github.event.pull_request.merged == true + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Install packages to retrieve artifacts + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install packages to register model + env: + AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} run: | pip install --upgrade pip pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt - - name: Retrieve artifacts (dvc.lock) + - name: Register Model env: AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} run: | - cd modules/ml-pipeline/src/pipeline/src - dvc pull -r experiments - - name: Push artifacts to Dev - env: - AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} - run: | - cd modules/ml-pipeline/src/pipeline/src - dvc push -r dev + REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') + # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') + git config user.name "Github-Bot" + git config user.email "Github-Bot@no-reply.com" - # Register-New-Model-Dev: - # if: github.event.pull_request.merged == true + git tag model@v0.0.1 + git push origin model@0.0.1 + + # gto register test --repo https://github.com/Hestia-Homes/ML/ + # echo "chicken" >> test.md + + # gto register ${REGISTER_MODEL_NAME} + # gto assign regression --stage dev + # gto show + + # Register-Prediction-Image-Dev: + # needs: Promote-Model-To-Dev + # # needs: [Promote-Model-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v2 - # with: - # fetch-depth: 0 - # - name: Install packages to register model + # steps: + # - uses: actions/checkout@v3 + # - name: Install packages to retrieve artifacts # env: # AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} # AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} @@ -60,65 +97,30 @@ jobs: # pip install --upgrade pip # pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt - # - name: Register Model + # - name: Retrieve artifacts (dvc.lock) # env: # AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} # AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} # run: | + # cd modules/ml-pipeline/src/pipeline/src + # dvc pull -r dev + # - name: Build Prediction docker image (TODO - NEED LAMBDA IMAGE, need to add version from gto registry) + # run: | + # cd modules/ml-pipeline/src/pipeline/ # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') - # # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') + # docker build . --file Prediction.Dockerfile --tag ${REGISTER_MODEL_NAME} - # git config user.name "Github-Bot" - # git config user.email "Github-Bot@no-reply.com" + # - name: ECR Login - Dev + # env: + # AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} + # run: | + # echo "LOGIN TO ECR" - # # gto register test --repo https://github.com/Hestia-Homes/ML/ - # # echo "chicken" >> test.md - - # git checkout master - # gto register ${REGISTER_MODEL_NAME} - # gto assign regression --stage dev - # gto show - - Register-Prediction-Image-Dev: - needs: Promote-Model-To-Dev - # needs: [Promote-Model-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Install packages to retrieve artifacts - env: - AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} - run: | - pip install --upgrade pip - pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt - - - name: Retrieve artifacts (dvc.lock) - env: - AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} - run: | - cd modules/ml-pipeline/src/pipeline/src - dvc pull -r dev - - - name: Build Prediction docker image (TODO - NEED LAMBDA IMAGE, need to add version from gto registry) - run: | - cd modules/ml-pipeline/src/pipeline/ - REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') - docker build . --file Prediction.Dockerfile --tag ${REGISTER_MODEL_NAME} - - - name: ECR Login - Dev - env: - AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} - run: | - echo "LOGIN TO ECR" - - - name: Push Prediction image to ECR - Dev - env: - AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} - run: | - echo "PUSH TO ECR" + # - name: Push Prediction image to ECR - Dev + # env: + # AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} + # run: | + # echo "PUSH TO ECR" diff --git a/modules/ml-pipeline/src/pipeline/src/prepare_data.py b/modules/ml-pipeline/src/pipeline/src/prepare_data.py index 53ff8fc..6df07fb 100644 --- a/modules/ml-pipeline/src/pipeline/src/prepare_data.py +++ b/modules/ml-pipeline/src/pipeline/src/prepare_data.py @@ -50,7 +50,7 @@ def prepare_data( new_feature_funcs: dict, output_train_filepath: str = "train.parquet", output_test_filepath: str = "test.parquet", -) -> Tuple[pd.DataFrame, pd.DataFrame]: +) -> Tuple[pd.DataFrame, Union[pd.DataFrame, None]]: """ Given a client and location, load data into the pipeline :param dataclient: DataClient, Determines how to get data from the given provider (cloud or local) @@ -78,9 +78,13 @@ def prepare_data( logger.info("--- Splitting data ---") logger.info("----------------------") - train, test = train_test_split( - data, train_size=train_proportion, test_size=(1 - train_proportion) - ) + if train_proportion == 1: + train = data + test = None + else: + train, test = train_test_split( + data, train_size=train_proportion, test_size=(1 - train_proportion) + ) logger.info("-----------------------") logger.info("--- Outputting data ---") @@ -89,9 +93,11 @@ def prepare_data( datahandler.save_data( dataclient=output_dataclient, obj=train, location=output_train_filepath ) - datahandler.save_data( - dataclient=output_dataclient, obj=test, location=output_test_filepath - ) + + if test: + datahandler.save_data( + dataclient=output_dataclient, obj=test, location=output_test_filepath + ) return train, test