mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
commit
b570829b5a
7 changed files with 67 additions and 10 deletions
31
.github/workflows/MLPipelinePostMerge.yml
vendored
31
.github/workflows/MLPipelinePostMerge.yml
vendored
|
|
@ -42,7 +42,14 @@ jobs:
|
|||
if [ -z "${latest_version}" ]; then
|
||||
increment_version="1.0.0"
|
||||
else
|
||||
increment_version=$(echo ${latest_version} | awk -F'.' '{OFS="."; $1+=1; print}')
|
||||
increment_version=$(echo ${latest_version} | awk 'BEGIN {
|
||||
FS="\\." # Set the field separator to a period
|
||||
OFS="." # Set the output field separator to a period
|
||||
}
|
||||
{
|
||||
major = $1 + 1 # Increment the major version
|
||||
print major, "0", "0" # Print the new version
|
||||
}')
|
||||
fi
|
||||
|
||||
new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
|
||||
|
|
@ -80,7 +87,14 @@ jobs:
|
|||
if [ -z "${latest_version}" ]; then
|
||||
increment_version="0.1.0"
|
||||
else
|
||||
increment_version=$(echo ${latest_version} | awk 'BEGIN{FS=OFS="."} {$2++; print}')
|
||||
increment_version=$(echo ${latest_version} | awk 'BEGIN {
|
||||
FS="\\." # Set the field separator to a period
|
||||
OFS="." # Set the output field separator to a period
|
||||
}
|
||||
{
|
||||
minor = $2 + 1 # Increment the minor version
|
||||
print $1, minor, "0" # Print the new version
|
||||
}')
|
||||
fi
|
||||
|
||||
new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
|
||||
|
|
@ -118,7 +132,14 @@ jobs:
|
|||
if [ -z "${latest_version}" ]; then
|
||||
increment_version="0.0.1"
|
||||
else
|
||||
increment_version=$(echo ${latest_version} | awk 'BEGIN{FS=OFS="."} {$3++; print}')
|
||||
increment_version=$(echo ${latest_version} | awk 'BEGIN {
|
||||
FS="\\." # Set the field separator to a period
|
||||
OFS="." # Set the output field separator to a period
|
||||
}
|
||||
{
|
||||
patch = $3 + 1 # Increment the patch version
|
||||
print $1, $2, patch # Print the new version
|
||||
}')
|
||||
fi
|
||||
|
||||
new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
|
||||
|
|
@ -188,7 +209,7 @@ jobs:
|
|||
git config user.name "Github-Bot"
|
||||
git config user.email "Github-Bot@no-reply.com"
|
||||
|
||||
latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/')
|
||||
latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}')
|
||||
if [ -z "${latest_dev_version}" ]; then
|
||||
increment_version="1"
|
||||
else
|
||||
|
|
@ -196,7 +217,7 @@ jobs:
|
|||
fi
|
||||
|
||||
new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version}
|
||||
latest_version=$(gto show model@latest --ref | awk -F"@" '{print $2}')
|
||||
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}')
|
||||
|
||||
echo ${new_tag}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,9 +8,17 @@
|
|||
"active": true
|
||||
},
|
||||
"sap": {
|
||||
"version": "v0.0.3",
|
||||
"version": "v0.1.0",
|
||||
"stage": {
|
||||
"dev": "v0.0.3"
|
||||
"dev": "v0.1.0"
|
||||
},
|
||||
"registered": true,
|
||||
"active": true
|
||||
},
|
||||
"heat": {
|
||||
"version": "v0.0.1",
|
||||
"stage": {
|
||||
"dev": "v0.0.1"
|
||||
},
|
||||
"registered": true,
|
||||
"active": true
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ def handler(event, context):
|
|||
predictions_column_name=generate_predictions_params[
|
||||
"predictions_column_name"
|
||||
],
|
||||
identifier_column=generate_predictions_params["identifier_column"],
|
||||
)
|
||||
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ default:
|
|||
test_data_filepath: ./data/prepared_data/test.parquet
|
||||
predictions_output_filepath: ./data/predictions/predictions.parquet
|
||||
predictions_column_name: predictions
|
||||
identifier_column: id
|
||||
|
||||
generate_metrics:
|
||||
dataclient_type: local
|
||||
|
|
|
|||
|
|
@ -142,9 +142,15 @@ class AWSS3Client:
|
|||
buffer = BytesIO()
|
||||
obj.to_parquet(buffer, index=False)
|
||||
|
||||
# Reset the buffer position to the beginning
|
||||
buffer.seek(0)
|
||||
|
||||
bucket, key = location.strip("s3://").split("/", 1)
|
||||
self.client.upload_fileobj(buffer, bucket, key)
|
||||
|
||||
# Close the buffer
|
||||
buffer.close()
|
||||
|
||||
def _load_parquet(self, location: str, load_config: dict) -> pd.DataFrame:
|
||||
"""
|
||||
Load a parquet file
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ Implementation of MLMetrics, all of which will have two methods:
|
|||
- Generate Plot Suite
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Union
|
||||
from sklearn.metrics import (
|
||||
|
|
@ -14,6 +15,18 @@ from sklearn.metrics import (
|
|||
)
|
||||
from core.interface.InterfaceMetrics import MLMetrics
|
||||
|
||||
# Define the function to return the SMAPE value
|
||||
def symmetric_mape(actual, predicted) -> float:
|
||||
|
||||
# Convert actual and predicted to numpy
|
||||
# array data type if not already
|
||||
if not all([isinstance(actual, np.ndarray), isinstance(predicted, np.ndarray)]):
|
||||
actual, predicted = np.array(actual), np.array(predicted)
|
||||
|
||||
return np.mean(
|
||||
np.abs(predicted - actual) / ((np.abs(predicted) + np.abs(actual)) / 2)
|
||||
)
|
||||
|
||||
|
||||
def metrics_factory(metrics_type: str) -> MLMetrics:
|
||||
metrics = {
|
||||
|
|
@ -34,7 +47,7 @@ class RegressionMetrics:
|
|||
median_absolute_error,
|
||||
mean_squared_error,
|
||||
mean_absolute_percentage_error,
|
||||
# max_error
|
||||
symmetric_mape,
|
||||
]
|
||||
|
||||
def generate_metrics(
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ def generate_predictions(
|
|||
test_data_filepath: str,
|
||||
predictions_output_filepath: str,
|
||||
predictions_column_name: str,
|
||||
identifier_column: str = "id",
|
||||
):
|
||||
"""
|
||||
For a given model, we generate prediction and evaluate this against the true target
|
||||
|
|
@ -52,6 +53,12 @@ def generate_predictions(
|
|||
predictions_df = pd.DataFrame(predictions)
|
||||
predictions_df.columns = [predictions_column_name]
|
||||
|
||||
output_dataclient.save_data(
|
||||
obj=predictions_df, location=predictions_output_filepath, save_config=None
|
||||
output_df = (
|
||||
pd.concat([test_data[identifier_column], predictions_df], axis=1)
|
||||
if identifier_column in test_data.columns
|
||||
else predictions_df
|
||||
)
|
||||
|
||||
output_dataclient.save_data(
|
||||
obj=output_df, location=predictions_output_filepath, save_config=None
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue