add single row dataset for testing

This commit is contained in:
Michael Duong 2025-11-02 13:11:33 +00:00
parent ab3b2bb1d0
commit e04f6125e0
6 changed files with 72 additions and 39 deletions

View file

@ -54,13 +54,40 @@ jobs:
cd modules/ml-pipeline/src/pipeline
dvc pull -r experiments
- name: Set timestamp
id: set_timestamp
run: |
echo "timestamp=$(date +%Y%m%d)" >> $GITHUB_ENV
echo "Generated timestamp: $timestamp"
- name: Upload sample row dataset to S3
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-pipeline/src/pipeline/data/prepared_data/
aws s3 cp sample_test.parquet s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}_sample_test.parquet
- name: Build Lambda docker Image
run: |
docker build . --file ./deployment/Dockerfile.prediction.lambda --tag lambda_test
- name: Run lambda docker container
- name: Remove uploaded sample row dataset from S3
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
docker run lambda_test
aws s3 rm s3://retrofit-data-dev/sap_change_model/sample_data_for_cicd/${timestamp}_sample_test.parquet
# - name: Run lambda docker container
# env:
# AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
# run: |
# docker run -p 9000:8080 \
# -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} \
# -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} \
# -e RUNTIME_ENVIRONMENT=dev \
# -e PREDICTIONS_BUCKET=retrofit-sap-predictions-dev lambda_test
Verify-Model:

View file

@ -29,6 +29,7 @@ data_filepath = prepare_data_params["data_filepath"]
train_proportion = prepare_data_params["train_proportion"]
output_train_filepath = prepare_data_params["output_train_filepath"]
output_test_filepath = prepare_data_params["output_test_filepath"]
sample_test_filepath = prepare_data_params["sample_test_filepath"]
feature_processor_config = feature_process_params["feature_processor_config"]
logger.info(f"--- Initiate DataClient ---")
@ -99,6 +100,10 @@ def prepare_data(
logger.info("--- Outputting data ---")
output_dataclient.save_data(
obj=data.sample(1), location=sample_test_filepath, save_config=None
)
output_dataclient.save_data(
obj=train, location=output_train_filepath, save_config=None
)

View file

@ -14,7 +14,7 @@ default:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error #mean_absolute_error
time_limit: 1800
time_limit: 180
presets: medium_quality
excluded_model_types: ['RF', 'CAT', 'NN_TORCH', 'KNN', 'XT']
infer_limit: 0.0005

View file

@ -26,6 +26,7 @@ default:
train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet
sample_test_filepath: ./data/prepared_data/sample_test.parquet
feature_processor:
feature_processor_type: dataframe

View file

@ -16,8 +16,8 @@ stages:
deps:
- path: 1_prepare_data.py
hash: md5
md5: 11a3b8bfdfe199ab7ecc39ccc5652649
size: 4298
md5: a5ce162e1c402c0f811a80ef78cf4dd5
size: 4481
params:
configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns:
@ -61,9 +61,9 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
md5: 02b2c25e488f75c4a676540c127b8930.dir
size: 45890160
nfiles: 3
build_model:
cmd: python 2_build_model.py
deps:
@ -73,9 +73,9 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
md5: 02b2c25e488f75c4a676540c127b8930.dir
size: 45890160
nfiles: 3
params:
configs/build_model.yaml:
default:
@ -91,7 +91,7 @@ stages:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error
time_limit: 1800
time_limit: 180
presets: medium_quality
excluded_model_types:
- RF
@ -107,18 +107,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: 4fa77e3f129d2e6f9ef7222c44978c26.dir
size: 3474669
md5: 7f9a534daf824434262bee89e2ee2cfd.dir
size: 3475064
nfiles: 1
- path: data/model/
hash: md5
md5: e27b9216bc7455f8245d5b49f27b2707.dir
size: 753575768
nfiles: 30
md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir
size: 414148418
nfiles: 24
- path: metrics/fit_metrics.json
hash: md5
md5: 426a162284ca9e29c043eb1d72e547e6
size: 224
md5: 7763f689b46c38ec8f0cc605deac4c2a
size: 221
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -128,14 +128,14 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: e27b9216bc7455f8245d5b49f27b2707.dir
size: 753575768
nfiles: 30
md5: c67bb2e8b24d9c574bc7c522ac3d66b9.dir
size: 414148418
nfiles: 24
- path: data/prepared_data
hash: md5
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
md5: 02b2c25e488f75c4a676540c127b8930.dir
size: 45890160
nfiles: 3
params:
configs/settings.yaml:
default.generate_predictions.input_dataclient_type: local
@ -148,8 +148,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
size: 484524
md5: 2d9353f60e16d4f85dd4a08a71dce548.dir
size: 483856
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -160,14 +160,14 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 6e004c7f4812b5cabbee62fe8fb0d82f.dir
size: 484524
md5: 2d9353f60e16d4f85dd4a08a71dce548.dir
size: 483856
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 5c56787d9e6450e26a78c15700e104c7.dir
size: 45746089
nfiles: 2
md5: 02b2c25e488f75c4a676540c127b8930.dir
size: 45890160
nfiles: 3
params:
configs/settings.yaml:
default.generate_metrics.dataclient_type: local
@ -176,8 +176,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: b9ae6d24424f2d5389697577e9076b91
size: 223
md5: 8a52e3a0047c68b9de5c371a1d406f73
size: 224
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
@ -197,9 +197,9 @@ stages:
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: 32d78c20d91fedf2f5dbb4162f323e25
size: 356
md5: 666f73f6fdb49484737f1a7edd798727
size: 363
- path: metrics/scenario_table.md
hash: md5
md5: 52cbd19566151b0c300f9673252704d2
md5: 71c9fcb9ec304353aba0d7f5c58ca8b2
size: 872

View file

@ -1,4 +1,4 @@
/fit_metrics.json
/metrics.json
/scenario_table.md
/scenario_metrics.md
/metrics.json
/fit_metrics.json