Merge pull request #70 from Hestia-Homes/sap-dev

Sap dev
2026-06-08 11:17:25 +00:00 · 2023-10-11 09:36:44 +01:00 · 2023-10-11 09:36:44 +01:00 · b570829b5a
commit b570829b5a
parent 051f07df77 4597c12795
7 changed files with 67 additions and 10 deletions
--- a/.github/workflows/MLPipelinePostMerge.yml
+++ b/.github/workflows/MLPipelinePostMerge.yml
@ -42,7 +42,14 @@ jobs:
        if [ -z "${latest_version}" ]; then
          increment_version="1.0.0"
        else
-          increment_version=$(echo ${latest_version} | awk -F'.' '{OFS="."; $1+=1; print}')
+          increment_version=$(echo ${latest_version} | awk 'BEGIN {
+              FS="\\."   # Set the field separator to a period
+              OFS="."    # Set the output field separator to a period
+          }
+          {
+              major = $1 + 1   # Increment the major version
+              print major, "0", "0"   # Print the new version
+          }')
        fi

        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
@ -80,7 +87,14 @@ jobs:
        if [ -z "${latest_version}" ]; then
          increment_version="0.1.0"
        else
-          increment_version=$(echo ${latest_version} | awk 'BEGIN{FS=OFS="."} {$2++; print}')
+          increment_version=$(echo ${latest_version} | awk 'BEGIN {
+              FS="\\."   # Set the field separator to a period
+              OFS="."    # Set the output field separator to a period
+          }
+          {
+              minor = $2 + 1   # Increment the minor version
+              print $1, minor, "0"   # Print the new version
+          }')
        fi

        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
@ -118,7 +132,14 @@ jobs:
        if [ -z "${latest_version}" ]; then
          increment_version="0.0.1"
        else
-          increment_version=$(echo ${latest_version} | awk 'BEGIN{FS=OFS="."} {$3++; print}')
+          increment_version=$(echo ${latest_version} | awk 'BEGIN {
+              FS="\\."   # Set the field separator to a period
+              OFS="."    # Set the output field separator to a period
+          }
+          {
+              patch = $3 + 1   # Increment the patch version
+              print $1, $2, patch   # Print the new version
+          }')
        fi

        new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
@ -188,7 +209,7 @@ jobs:
        git config user.name "Github-Bot"
        git config user.email "Github-Bot@no-reply.com"

-        latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/')
+        latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}')
        if [ -z "${latest_dev_version}" ]; then
          increment_version="1"
        else
@ -196,7 +217,7 @@ jobs:
        fi

        new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version}
-        latest_version=$(gto show model@latest --ref | awk -F"@" '{print $2}')
+        latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}')

        echo ${new_tag}

--- a/MODEL_REGISTRY.md
+++ b/MODEL_REGISTRY.md
@ -8,9 +8,17 @@
        "active": true
    },
    "sap": {
-        "version": "v0.0.3",
+        "version": "v0.1.0",
        "stage": {
-            "dev": "v0.0.3"
+            "dev": "v0.1.0"
+        },
+        "registered": true,
+        "active": true
+    },
+    "heat": {
+        "version": "v0.0.1",
+        "stage": {
+            "dev": "v0.0.1"
        },
        "registered": true,
        "active": true
--- a/deployment/handlers/prediction_app.py
+++ b/deployment/handlers/prediction_app.py
@ -107,6 +107,7 @@ def handler(event, context):
            predictions_column_name=generate_predictions_params[
                "predictions_column_name"
            ],
+            identifier_column=generate_predictions_params["identifier_column"],
        )

        return {
--- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml
+++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml
@ -43,6 +43,7 @@ default:
    test_data_filepath: ./data/prepared_data/test.parquet
    predictions_output_filepath: ./data/predictions/predictions.parquet
    predictions_column_name: predictions
+    identifier_column: id

  generate_metrics:
    dataclient_type: local
--- a/modules/ml-pipeline/src/pipeline/core/DataClient.py
+++ b/modules/ml-pipeline/src/pipeline/core/DataClient.py
@ -142,9 +142,15 @@ class AWSS3Client:
        buffer = BytesIO()
        obj.to_parquet(buffer, index=False)

+        # Reset the buffer position to the beginning
+        buffer.seek(0)
+
        bucket, key = location.strip("s3://").split("/", 1)
        self.client.upload_fileobj(buffer, bucket, key)

+        # Close the buffer
+        buffer.close()
+
    def _load_parquet(self, location: str, load_config: dict) -> pd.DataFrame:
        """
        Load a parquet file
--- a/modules/ml-pipeline/src/pipeline/core/MLMetrics.py
+++ b/modules/ml-pipeline/src/pipeline/core/MLMetrics.py
@ -4,6 +4,7 @@ Implementation of MLMetrics, all of which will have two methods:
 - Generate Plot Suite
 """

+import numpy as np
 import pandas as pd
 from typing import Union
 from sklearn.metrics import (
@ -14,6 +15,18 @@ from sklearn.metrics import (
 )
 from core.interface.InterfaceMetrics import MLMetrics

+# Define the function to return the SMAPE value
+def symmetric_mape(actual, predicted) -> float:
+
+    # Convert actual and predicted to numpy
+    # array data type if not already
+    if not all([isinstance(actual, np.ndarray), isinstance(predicted, np.ndarray)]):
+        actual, predicted = np.array(actual), np.array(predicted)
+
+    return np.mean(
+        np.abs(predicted - actual) / ((np.abs(predicted) + np.abs(actual)) / 2)
+    )
+

 def metrics_factory(metrics_type: str) -> MLMetrics:
    metrics = {
@ -34,7 +47,7 @@ class RegressionMetrics:
        median_absolute_error,
        mean_squared_error,
        mean_absolute_percentage_error,
-        # max_error
+        symmetric_mape,
    ]

    def generate_metrics(
--- a/modules/ml-pipeline/src/pipeline/generate_predictions.py
+++ b/modules/ml-pipeline/src/pipeline/generate_predictions.py
@ -14,6 +14,7 @@ def generate_predictions(
    test_data_filepath: str,
    predictions_output_filepath: str,
    predictions_column_name: str,
+    identifier_column: str = "id",
 ):
    """
    For a given model, we generate prediction and evaluate this against the true target
@ -52,6 +53,12 @@ def generate_predictions(
    predictions_df = pd.DataFrame(predictions)
    predictions_df.columns = [predictions_column_name]

-    output_dataclient.save_data(
-        obj=predictions_df, location=predictions_output_filepath, save_config=None
+    output_df = (
+        pd.concat([test_data[identifier_column], predictions_df], axis=1)
+        if identifier_column in test_data.columns
+        else predictions_df
+    )
+
+    output_dataclient.save_data(
+        obj=output_df, location=predictions_output_filepath, save_config=None
    )