From cc2079403af533a066889f044b8af4eaea9becae Mon Sep 17 00:00:00 2001
From: Michael Duong <michael123ste@gmail.com>
Date: Tue, 12 Sep 2023 23:29:32 +0100
Subject: [PATCH] test tag and push

---
 .github/workflows/MLPipelinePostMerge.yml     | 172 +++++++++---------
 .../src/pipeline/src/prepare_data.py          |  20 +-
 2 files changed, 100 insertions(+), 92 deletions(-)

diff --git a/.github/workflows/MLPipelinePostMerge.yml b/.github/workflows/MLPipelinePostMerge.yml
index b43007d..97a8c2d 100644
--- a/.github/workflows/MLPipelinePostMerge.yml
+++ b/.github/workflows/MLPipelinePostMerge.yml
@@ -1,58 +1,95 @@
 name: Register the model for the given pipeline branch
 
-# on:
-#   push:
-#     branches:
-#       - "model-**"
-
 on:
-  pull_request:
-    types:
-      - closed
+  push:
     branches:
-      - "master"
+      - "model-**"
+
+# on:
+#   pull_request:
+#     types:
+#       - closed
+#     branches:
+#       - "master"
 
 permissions: write-all
 
 jobs:
 
-  Promote-Model-To-Dev:
-    if: github.event.pull_request.merged == true
-    runs-on: ubuntu-latest
+  # Promote-Model-To-Dev:
+  #   if: github.event.pull_request.merged == true
+  #   runs-on: ubuntu-latest
 
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #   - name: Install packages to retrieve artifacts
+  #     run: |
+  #       pip install --upgrade pip
+  #       pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
+
+  #   - name: Retrieve artifacts (dvc.lock)
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/src
+  #       dvc pull -r experiments
+
+  #   - name: Push artifacts to Dev
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/src
+  #       dvc push -r dev
+
+
+  Register-New-Model-Dev:
+    # if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
-    - name: Install packages to retrieve artifacts
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Install packages to register model
+      env:
+        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
       run: |
         pip install --upgrade pip
         pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
 
-    - name: Retrieve artifacts (dvc.lock)
+    - name: Register Model
       env:
         AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
       run: |
-        cd modules/ml-pipeline/src/pipeline/src
-        dvc pull -r experiments
 
-    - name: Push artifacts to Dev
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        cd modules/ml-pipeline/src/pipeline/src
-        dvc push -r dev
+        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
+        # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
 
+        git config user.name "Github-Bot"
+        git config user.email "Github-Bot@no-reply.com"
 
-  # Register-New-Model-Dev:
-  #   if: github.event.pull_request.merged == true
+        git tag model@v0.0.1
+        git push origin model@0.0.1
+
+        # gto register test --repo https://github.com/Hestia-Homes/ML/
+        # echo "chicken" >> test.md
+
+        # gto register ${REGISTER_MODEL_NAME}
+        # gto assign regression --stage dev
+        # gto show
+
+  # Register-Prediction-Image-Dev:
+  #   needs: Promote-Model-To-Dev
+  #   # needs: [Promote-Model-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS
   #   runs-on: ubuntu-latest
-  #   steps:
-  #   - uses: actions/checkout@v2
-  #     with:
-  #       fetch-depth: 0
 
-  #   - name: Install packages to register model
+  #   steps:
+  #   - uses: actions/checkout@v3
+  #   - name: Install packages to retrieve artifacts
   #     env:
   #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
   #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
@@ -60,65 +97,30 @@ jobs:
   #       pip install --upgrade pip
   #       pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
 
-  #   - name: Register Model
+  #   - name: Retrieve artifacts (dvc.lock)
   #     env:
   #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
   #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
   #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/src
+  #       dvc pull -r dev
 
+  #   - name: Build Prediction docker image (TODO - NEED LAMBDA IMAGE, need to add version from gto registry)
+  #     run: |
+  #       cd modules/ml-pipeline/src/pipeline/
   #       REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-  #       # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
+  #       docker build . --file Prediction.Dockerfile --tag ${REGISTER_MODEL_NAME}
 
-  #       git config user.name "Github-Bot"
-  #       git config user.email "Github-Bot@no-reply.com"
+  #   - name: ECR Login - Dev
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       echo "LOGIN TO ECR"
 
-  #       # gto register test --repo https://github.com/Hestia-Homes/ML/
-  #       # echo "chicken" >> test.md
-
-  #       git checkout master
-  #       gto register ${REGISTER_MODEL_NAME}
-  #       gto assign regression --stage dev
-  #       gto show
-
-  Register-Prediction-Image-Dev:
-    needs: Promote-Model-To-Dev
-    # needs: [Promote-Model-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Install packages to retrieve artifacts
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        pip install --upgrade pip
-        pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
-
-    - name: Retrieve artifacts (dvc.lock)
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        cd modules/ml-pipeline/src/pipeline/src
-        dvc pull -r dev
-
-    - name: Build Prediction docker image (TODO - NEED LAMBDA IMAGE, need to add version from gto registry)
-      run: |
-        cd modules/ml-pipeline/src/pipeline/
-        REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
-        docker build . --file Prediction.Dockerfile --tag ${REGISTER_MODEL_NAME}
-
-    - name: ECR Login - Dev
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        echo "LOGIN TO ECR"
-
-    - name: Push Prediction image to ECR - Dev
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
-      run: |
-        echo "PUSH TO ECR"
+  #   - name: Push Prediction image to ECR - Dev
+  #     env:
+  #       AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
+  #       AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
+  #     run: |
+  #       echo "PUSH TO ECR"
diff --git a/modules/ml-pipeline/src/pipeline/src/prepare_data.py b/modules/ml-pipeline/src/pipeline/src/prepare_data.py
index 53ff8fc..6df07fb 100644
--- a/modules/ml-pipeline/src/pipeline/src/prepare_data.py
+++ b/modules/ml-pipeline/src/pipeline/src/prepare_data.py
@@ -50,7 +50,7 @@ def prepare_data(
     new_feature_funcs: dict,
     output_train_filepath: str = "train.parquet",
     output_test_filepath: str = "test.parquet",
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
+) -> Tuple[pd.DataFrame, Union[pd.DataFrame, None]]:
     """
     Given a client and location, load data into the pipeline
     :param dataclient: DataClient, Determines how to get data from the given provider (cloud or local)
@@ -78,9 +78,13 @@ def prepare_data(
     logger.info("--- Splitting data ---")
     logger.info("----------------------")
 
-    train, test = train_test_split(
-        data, train_size=train_proportion, test_size=(1 - train_proportion)
-    )
+    if train_proportion == 1:
+        train = data
+        test = None
+    else:
+        train, test = train_test_split(
+            data, train_size=train_proportion, test_size=(1 - train_proportion)
+        )
 
     logger.info("-----------------------")
     logger.info("--- Outputting data ---")
@@ -89,9 +93,11 @@ def prepare_data(
     datahandler.save_data(
         dataclient=output_dataclient, obj=train, location=output_train_filepath
     )
-    datahandler.save_data(
-        dataclient=output_dataclient, obj=test, location=output_test_filepath
-    )
+
+    if test:
+        datahandler.save_data(
+            dataclient=output_dataclient, obj=test, location=output_test_filepath
+        )
 
     return train, test