From dda9065a88b604b1c2ccf8e163d5289cbe328d5e Mon Sep 17 00:00:00 2001 From: quandanrepo <45804868+quandanrepo@users.noreply.github.com> Date: Tue, 10 Oct 2023 11:45:50 +0100 Subject: [PATCH 1/6] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 5879c41..6a63c61 100644 --- a/README.md +++ b/README.md @@ -81,4 +81,8 @@ To test the Lambda function, use the following curl command: ```json curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data_with_id.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"' ``` + +```json +curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data_with_id.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}' +``` This will send a POST request to the running Lambda function and pass in the required data as JSON. From d3b1bb4bb9bf03a1d5708e1c4f77642d4e33112a Mon Sep 17 00:00:00 2001 From: quandanrepo <45804868+quandanrepo@users.noreply.github.com> Date: Tue, 10 Oct 2023 11:49:37 +0100 Subject: [PATCH 2/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6a63c61..a37f5fb 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,6 @@ curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d ``` ```json -curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data_with_id.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}' +curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}' ``` This will send a POST request to the running Lambda function and pass in the required data as JSON. From 391cc6643591c4eeb1dd516d4d0465d0b9ea817a Mon Sep 17 00:00:00 2001 From: quandanrepo <45804868+quandanrepo@users.noreply.github.com> Date: Tue, 10 Oct 2023 11:53:52 +0100 Subject: [PATCH 3/6] Update README.md --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index a37f5fb..7f018ad 100644 --- a/README.md +++ b/README.md @@ -79,10 +79,7 @@ Sets the RUNTIME_ENVIRONMENT variable to dev. To test the Lambda function, use the following curl command: ```json -curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data_with_id.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"' +curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/one_sample_test_dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}' ``` -```json -curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{"body": "{\"file_location\": \"s3://retrofit-data-dev/sap_change_model/dataset.parquet\", \"property_id\": 1, \"portfolio_id\": 4, \"created_at\": \"now\"}"}' -``` This will send a POST request to the running Lambda function and pass in the required data as JSON. From 70b3008dc5f19214cecbbec377c53529cadd428d Mon Sep 17 00:00:00 2001 From: quandanrepo <45804868+quandanrepo@users.noreply.github.com> Date: Tue, 10 Oct 2023 11:56:56 +0100 Subject: [PATCH 4/6] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7f018ad..55cae8e 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ In order for this to be set up, some key environment variables needs to be inser secrets. Each different model and protected branch has its own set of secrets which allows for flexibility between different pipelines. -For example, for the branch sap_change-dev, the prefix=SAP_CHANGE_DEV, and the following secrets are: +For example, for the branch sap-dev, the prefix=SAP_DEV, and the following secrets are: - {prefix}_ECR_URI, which is the URI of the ECR repository to push to. For example, for the sap change model this is the lambda-sap-prediction-dev repository. @@ -58,7 +58,7 @@ First, navigate to the root directory of the repository. Open a terminal and exe 2. command to build the Docker image: ```bash -docker build -t sap_change -f deployment/Dockerfile.prediction.lambda . +docker build -t sap -f deployment/Dockerfile.prediction.lambda . ``` This will build a Docker image tagged as sap_change using the Dockerfile.prediction.lambda located @@ -68,7 +68,7 @@ in the deployment directory. Once the image is built, you can run it using the following command: ```bash -docker run -p 9000:8080 -v ~/.aws/credentials:/root/.aws/credentials:ro -e RUNTIME_ENVIRONMENT=dev sap_change +docker run -p 9000:8080 -v ~/.aws/credentials:/root/.aws/credentials:ro -e RUNTIME_ENVIRONMENT=dev -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev sap ``` This command does the following: From 9e7d0fa538ab1d01b502d0554d819ba6e3d7b36a Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 10 Oct 2023 12:32:25 +0000 Subject: [PATCH 5/6] add new model --- .../src/pipeline/configs/build_model.yaml | 2 +- modules/ml-pipeline/src/pipeline/dvc.lock | 42 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml index fdeb8c5..d296e6a 100644 --- a/modules/ml-pipeline/src/pipeline/configs/build_model.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/build_model.yaml @@ -13,6 +13,6 @@ default: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error #mean_absolute_error - time_limit: 1000 + time_limit: 4000 presets: medium_quality excluded_model_types: ['KNN', 'RF'] diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 46211fa..13851cf 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -29,8 +29,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 71e63a792f7723e2aea0709efde1a92b.dir - size: 31751660 + md5: e0be70d5025e40dd0d655d9949f72130.dir + size: 31800776 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -41,8 +41,8 @@ stages: size: 5359 - path: data/prepared_data hash: md5 - md5: 71e63a792f7723e2aea0709efde1a92b.dir - size: 31751660 + md5: e0be70d5025e40dd0d655d9949f72130.dir + size: 31800776 nfiles: 2 params: configs/build_model.yaml: @@ -58,7 +58,7 @@ stages: output_filepath: ./data/model/allmodels/ problem_type: regression eval_metric: mean_squared_error - time_limit: 1000 + time_limit: 4000 presets: medium_quality excluded_model_types: - KNN @@ -66,13 +66,13 @@ stages: outs: - path: data/model/ hash: md5 - md5: 0ffc51be7c8381c9e4106309e3e05ca3.dir - size: 345904743 + md5: 14ca33cde5e86770135f768abaf84978.dir + size: 422447808 nfiles: 27 - path: metrics/fit_metrics.json hash: md5 - md5: 3d4ff3a3ca3c327e2c1e9aa1338c18ce - size: 220 + md5: 41bfb8d2da8f06d1864d73ce125cc6aa + size: 221 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -82,13 +82,13 @@ stages: size: 3028 - path: data/model hash: md5 - md5: 0ffc51be7c8381c9e4106309e3e05ca3.dir - size: 345904743 + md5: 14ca33cde5e86770135f768abaf84978.dir + size: 422447808 nfiles: 27 - path: data/prepared_data hash: md5 - md5: 71e63a792f7723e2aea0709efde1a92b.dir - size: 31751660 + md5: e0be70d5025e40dd0d655d9949f72130.dir + size: 31800776 nfiles: 2 params: configs/settings.yaml: @@ -100,8 +100,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 00ff804016290d56e1490e59c098b060.dir - size: 351811 + md5: 40d0c7a7fd4a15add0615e322cf341a0.dir + size: 352151 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -112,13 +112,13 @@ stages: size: 4487 - path: data/predictions hash: md5 - md5: 00ff804016290d56e1490e59c098b060.dir - size: 351811 + md5: 40d0c7a7fd4a15add0615e322cf341a0.dir + size: 352151 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 71e63a792f7723e2aea0709efde1a92b.dir - size: 31751660 + md5: e0be70d5025e40dd0d655d9949f72130.dir + size: 31800776 nfiles: 2 params: configs/settings.yaml: @@ -128,8 +128,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 63ef63e4fabe929b914a0059ceeddabc - size: 221 + md5: 4e023650240e78d6ad761f1db7aac922 + size: 220 startup_cleanup: cmd: python 0_startup_cleanup.py deps: From 4c6c5330d82bd4ffacea213b0c7b1ee4593ee525 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 10 Oct 2023 12:33:44 +0000 Subject: [PATCH 6/6] add new model, new branch --- modules/ml-pipeline/src/pipeline/4_generate_metrics.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/4_generate_metrics.py b/modules/ml-pipeline/src/pipeline/4_generate_metrics.py index 7b115a2..1f97d87 100644 --- a/modules/ml-pipeline/src/pipeline/4_generate_metrics.py +++ b/modules/ml-pipeline/src/pipeline/4_generate_metrics.py @@ -4,9 +4,7 @@ After the model is built, we can evaluate its performance """ import os -import yaml import pandas as pd -from pathlib import Path from core.interface.InterfaceModels import MLModel from core.interface.InterfaceMetrics import MLMetrics from core.interface.InterfaceDataClient import DataClient