test tag and push

2026-07-27 22:45:04 +00:00 · 2023-09-12 23:29:32 +01:00 · 2023-09-12 23:29:32 +01:00 · cc2079403a
commit cc2079403a
parent eb41a8d1a0
2 changed files with 100 additions and 92 deletions
--- a/.github/workflows/MLPipelinePostMerge.yml
+++ b/.github/workflows/MLPipelinePostMerge.yml
@ -1,58 +1,95 @@
 name: Register the model for the given pipeline branch

-# on:
-#   push:
-#     branches:
-#       - "model-**"
-
 on:
-  pull_request:
-    types:
-      - closed
+  push:
    branches:
-      - "master"
+      - "model-**"
+
+# on:
+#   pull_request:
+#     types:
+#       - closed
+#     branches:
+#       - "master"

 permissions: write-all

 jobs:

-  Promote-Model-To-Dev:
-    if: github.event.pull_request.merged == true
-    runs-on: ubuntu-latest
+  # Promote-Model-To-Dev:
+  #   if: github.event.pull_request.merged == true
+  #   runs-on: ubuntu-latest

+  #   steps:
+  #   - uses: actions/checkout@v3
+  #   - name: Install packages to retrieve artifacts
+  #     run: |
+  #       pip install --upgrade pip
+  #       pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
+
+  #   - name: Retrieve artifacts (dvc.lock)
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/src
+  #       dvc pull -r experiments
+
+  #   - name: Push artifacts to Dev
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/src
+  #       dvc push -r dev
+
+
+  Register-New-Model-Dev:
+    # if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
-    - name: Install packages to retrieve artifacts
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Install packages to register model
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
      run: |
        pip install --upgrade pip
        pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt

-    - name: Retrieve artifacts (dvc.lock)
+    - name: Register Model
      env:
        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
      run: |
-        cd modules/ml-pipeline/src/pipeline/src
-        dvc pull -r experiments

-    - name: Push artifacts to Dev
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        cd modules/ml-pipeline/src/pipeline/src
-        dvc push -r dev
+        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')

+        git config user.name "Github-Bot"
+        git config user.email "Github-Bot@no-reply.com"

-  # Register-New-Model-Dev:
-  #   if: github.event.pull_request.merged == true
+        git tag model@v0.0.1
+        git push origin model@0.0.1
+
+        # gto register test --repo https://github.com/Hestia-Homes/ML/
+        # echo "chicken" >> test.md
+
+        # gto register ${REGISTER_MODEL_NAME}
+        # gto assign regression --stage dev
+        # gto show
+
+  # Register-Prediction-Image-Dev:
+  #   needs: Promote-Model-To-Dev
+  #   # needs: [Promote-Model-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS
  #   runs-on: ubuntu-latest
-  #   steps:
-  #   - uses: actions/checkout@v2
-  #     with:
-  #       fetch-depth: 0

-  #   - name: Install packages to register model
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #   - name: Install packages to retrieve artifacts
  #     env:
  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
@ -60,65 +97,30 @@ jobs:
  #       pip install --upgrade pip
  #       pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt

-  #   - name: Register Model
+  #   - name: Retrieve artifacts (dvc.lock)
  #     env:
  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
  #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/src
+  #       dvc pull -r dev

+  #   - name: Build Prediction docker image (TODO - NEED LAMBDA IMAGE, need to add version from gto registry)
+  #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/
  #       REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-  #       # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+  #       docker build . --file Prediction.Dockerfile --tag ${REGISTER_MODEL_NAME}

-  #       git config user.name "Github-Bot"
-  #       git config user.email "Github-Bot@no-reply.com"
+  #   - name: ECR Login - Dev
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       echo "LOGIN TO ECR"

-  #       # gto register test --repo https://github.com/Hestia-Homes/ML/
-  #       # echo "chicken" >> test.md
-
-  #       git checkout master
-  #       gto register ${REGISTER_MODEL_NAME}
-  #       gto assign regression --stage dev
-  #       gto show
-
-  Register-Prediction-Image-Dev:
-    needs: Promote-Model-To-Dev
-    # needs: [Promote-Model-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Install packages to retrieve artifacts
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
-
-    - name: Retrieve artifacts (dvc.lock)
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        cd modules/ml-pipeline/src/pipeline/src
-        dvc pull -r dev
-
-    - name: Build Prediction docker image (TODO - NEED LAMBDA IMAGE, need to add version from gto registry)
-      run: |
-        cd modules/ml-pipeline/src/pipeline/
-        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-        docker build . --file Prediction.Dockerfile --tag ${REGISTER_MODEL_NAME}
-
-    - name: ECR Login - Dev
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        echo "LOGIN TO ECR"
-
-    - name: Push Prediction image to ECR - Dev
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        echo "PUSH TO ECR"
+  #   - name: Push Prediction image to ECR - Dev
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       echo "PUSH TO ECR"
--- a/modules/ml-pipeline/src/pipeline/src/prepare_data.py
+++ b/modules/ml-pipeline/src/pipeline/src/prepare_data.py
@ -50,7 +50,7 @@ def prepare_data(
    new_feature_funcs: dict,
    output_train_filepath: str = "train.parquet",
    output_test_filepath: str = "test.parquet",
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
+) -> Tuple[pd.DataFrame, Union[pd.DataFrame, None]]:
    """
    Given a client and location, load data into the pipeline
    :param dataclient: DataClient, Determines how to get data from the given provider (cloud or local)
@ -78,9 +78,13 @@ def prepare_data(
    logger.info("--- Splitting data ---")
    logger.info("----------------------")

-    train, test = train_test_split(
-        data, train_size=train_proportion, test_size=(1 - train_proportion)
-    )
+    if train_proportion == 1:
+        train = data
+        test = None
+    else:
+        train, test = train_test_split(
+            data, train_size=train_proportion, test_size=(1 - train_proportion)
+        )

    logger.info("-----------------------")
    logger.info("--- Outputting data ---")
@ -89,9 +93,11 @@ def prepare_data(
    datahandler.save_data(
        dataclient=output_dataclient, obj=train, location=output_train_filepath
    )
-    datahandler.save_data(
-        dataclient=output_dataclient, obj=test, location=output_test_filepath
-    )
+
+    if test:
+        datahandler.save_data(
+            dataclient=output_dataclient, obj=test, location=output_test_filepath
+        )

    return train, test