From 620c1d10a1a009d2416ecc927793e4caa9787715 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 19 Apr 2024 16:22:06 +0100 Subject: [PATCH 01/20] correct the dockerignore files and test model with just tabular --- .dockerignore | 9 ++ deployment/.dockerignore | 12 ++- modules/ml-pipeline/src/.dockerignore | 10 +- modules/ml-pipeline/src/Prediction.Dockerfile | 2 +- modules/ml-pipeline/src/pipeline/dvc.lock | 94 ++++++------------- .../predictions/requirements-dev.txt | 2 +- .../requirements/predictions/requirements.txt | 2 +- .../training/requirements-dev.txt | 2 +- .../requirements/training/requirements.txt | 2 +- 9 files changed, 55 insertions(+), 80 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..84abbe6 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +modules/ml-pipeline/src/pipeline/data/predictions +modules/ml-pipeline/src/pipeline/data/fit_predictions +modules/ml-pipeline/src/pipeline/data/prepared_data +modules/ml-pipeline/src/pipeline/data/model/allmodels +modules/ml-pipeline/src/pipeline/metrics +modules/ml-pipeline/src/pipeline/__pycache__ +modules/ml-pipeline/src/pipeline/.dvc +modules/ml-pipeline/src/pipeline/analysis +modules/ml-pipeline/src/pipeline/metrics diff --git a/deployment/.dockerignore b/deployment/.dockerignore index e01cbd5..8b8a7fb 100644 --- a/deployment/.dockerignore +++ b/deployment/.dockerignore @@ -1,4 +1,8 @@ -modules/ml-pipeline/src/pipeline/data/predictions* -modules/ml-pipeline/src/pipeline/data/prepared_data* -modules/ml-pipeline/src/pipeline/data/model/allmodels* -modules/ml-pipeline/src/pipeline/metrics* +modules/ml-pipeline/src/pipeline/data/predictions +modules/ml-pipeline/src/pipeline/data/prepared_data +modules/ml-pipeline/src/pipeline/data/model/allmodels +modules/ml-pipeline/src/pipeline/metrics +modules/ml-pipeline/src/__pycache__ +modules/ml-pipeline/src/.dvc +modules/ml-pipeline/src/analysis +modules/ml-pipeline/src/metrics diff --git a/modules/ml-pipeline/src/.dockerignore b/modules/ml-pipeline/src/.dockerignore index 14f71d7..5feb57d 100644 --- a/modules/ml-pipeline/src/.dockerignore +++ b/modules/ml-pipeline/src/.dockerignore @@ -1,4 +1,6 @@ -pipeline/data/predictions* -pipeline/data/prepared_data/train.parquet* -pipeline/data/model/allmodels* -pipeline/metrics* +pipeline/data/predictions +pipeline/data/prepared_data/train.parquet +pipeline/data/model/allmodels +pipeline/metrics +pipeline/.dvc +pipeline/analysis diff --git a/modules/ml-pipeline/src/Prediction.Dockerfile b/modules/ml-pipeline/src/Prediction.Dockerfile index a6fc539..e0a292c 100644 --- a/modules/ml-pipeline/src/Prediction.Dockerfile +++ b/modules/ml-pipeline/src/Prediction.Dockerfile @@ -1,7 +1,7 @@ # Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow) FROM python:3.10.12-slim -RUN apt-get update && apt-get install -y libgomp1 +RUN apt-get update && apt-get install -y libgomp1 gcc python3-dev COPY pipeline/requirements/predictions/requirements.txt requirements.txt diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 104dc83..ca4bfdd 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -35,46 +35,6 @@ stages: - number_habitable_rooms - number_heated_rooms default.feature_processor.feature_processor_config.retain_features: - - uprn - - sap_starting - - hot_water_energy_eff_ending - - mainheat_energy_eff_ending - - constituency - - roof_energy_eff_ending - - walls_energy_eff_ending - - secondheat_description_ending - - property_type - - mainheatc_energy_eff_ending - - built_form - - walls_insulation_thickness_ending - - potential_energy_efficiency - - transaction_type_ending - - floor_thermal_transmittance_ending - - low_energy_lighting_ending - - heat_demand_starting - - photo_supply_ending - - carbon_starting - - walls_thermal_transmittance_ending - - roof_insulation_thickness_ending - - total_floor_area_ending - - number_open_fireplaces_ending - - windows_energy_eff_ending - - floor_height_ending - - extension_count_ending - - has_air_source_heat_pump_ending - - charging_system_ending - - construction_age_band - - glazed_type_ending - - roof_thermal_transmittance_ending - - floor_insulation_thickness_ending - - has_mains_gas_ending - - estimated_perimeter_starting - - energy_consumption_potential - - environment_impact_potential - - heater_type_ending - - multi_glaze_proportion_ending - - lighting_energy_eff_ending - - fixed_lighting_outlets_count default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: sap_ending @@ -89,8 +49,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir + size: 44713582 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -101,8 +61,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir + size: 44713582 nfiles: 2 params: configs/build_model.yaml: @@ -134,18 +94,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: de46250d454c4d713ab580b10ff3fd31.dir - size: 3349318 + md5: 750cd7426e2909ed36bc05601b5e04c4.dir + size: 3349190 nfiles: 1 - path: data/model/ hash: md5 - md5: 18bd7a93ece75a65d3a950b7dfdab4fb.dir - size: 735951861 - nfiles: 35 + md5: aa5eb60be553f2a01e63783cf8f1fad1.dir + size: 765461992 + nfiles: 36 - path: metrics/fit_metrics.json hash: md5 - md5: 8a952a5e884c268e6059357a627b9251 - size: 224 + md5: ca5190b3292210c57a58668fdb48296c + size: 226 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -155,13 +115,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 18bd7a93ece75a65d3a950b7dfdab4fb.dir - size: 735951861 - nfiles: 35 + md5: aa5eb60be553f2a01e63783cf8f1fad1.dir + size: 765461992 + nfiles: 36 - path: data/prepared_data hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir + size: 44713582 nfiles: 2 params: configs/settings.yaml: @@ -173,8 +133,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 07ef721a0dc94a52e3ba7a70ac45b8ff.dir - size: 463563 + md5: 934c94e5b1a2c70db3dc865ee056f460.dir + size: 463619 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -185,13 +145,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 07ef721a0dc94a52e3ba7a70ac45b8ff.dir - size: 463563 + md5: 934c94e5b1a2c70db3dc865ee056f460.dir + size: 463619 nfiles: 1 - path: data/prepared_data hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir + size: 44713582 nfiles: 2 params: configs/settings.yaml: @@ -201,15 +161,15 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 9f863f47799d42c101eba3b03a179455 + md5: aa2a511ac759225549636ba05d6b667c size: 224 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: - path: 5_generate_scenarios.py hash: md5 - md5: a18f6c6ae2082f038df47386cf3e418e - size: 4896 + md5: 40506749fefd926d47c60ff5b16db307 + size: 5337 params: configs/scenarios.yaml: default.scenarios: @@ -222,9 +182,9 @@ stages: outs: - path: metrics/scenario_metrics.md hash: md5 - md5: 64e7db945ff655ae03c20c9845f19106 + md5: 4085a6ea3d044ad2fe7ac63b0a685fed size: 363 - path: metrics/scenario_table.md hash: md5 - md5: d4f8afe07b774374aeaa48f1b7b8a5fc + md5: a2b3da77921b5dcc10f7068646e0eae3 size: 2133 diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt index 734419a..35875d9 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt @@ -1,7 +1,7 @@ joblib==1.3.2 boto3==1.28.17 pandas==2.1.4 -autogluon==1.0.0 +autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 dynaconf==3.2.1 pyarrow==13.0.0 pre-commit==3.3.3 diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt index 937b000..6a96822 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt @@ -1,7 +1,7 @@ joblib==1.3.2 boto3==1.28.17 pandas==2.1.4 -autogluon==1.0.0 +autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 dynaconf==3.2.1 pyarrow==13.0.0 PyYAML==6.0.1 diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt index fe06a4d..6e1ea2f 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt @@ -1,7 +1,7 @@ joblib==1.3.2 boto3==1.28.17 pandas==2.1.4 -autogluon==1.0.0 +autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 ray==2.6.3 dynaconf==3.2.1 alibi==0.9.5 diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt index a5bccd3..e4e319c 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt @@ -1,4 +1,4 @@ boto3==1.28.41 pandas==2.1.4 -autogluon==1.0.0 +autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 dynaconf==3.2.1 From 50a3e2d5be0f0e02af3a16785cecab9e6674d8a2 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 19 Apr 2024 16:25:26 +0100 Subject: [PATCH 02/20] correct the dockerignore files and test model with just tabular --- deployment/.dockerignore | 1 + 1 file changed, 1 insertion(+) diff --git a/deployment/.dockerignore b/deployment/.dockerignore index 8b8a7fb..c4103de 100644 --- a/deployment/.dockerignore +++ b/deployment/.dockerignore @@ -1,4 +1,5 @@ modules/ml-pipeline/src/pipeline/data/predictions +modules/ml-pipeline/src/pipeline/data/fit_predictions modules/ml-pipeline/src/pipeline/data/prepared_data modules/ml-pipeline/src/pipeline/data/model/allmodels modules/ml-pipeline/src/pipeline/metrics From 380bd6b595566853ec9d183ca02ad137f4cb82d5 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 19 Apr 2024 17:34:10 +0100 Subject: [PATCH 03/20] correct the dockerignore files and test model with just tabular --- modules/ml-pipeline/src/.dockerignore | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/ml-pipeline/src/.dockerignore b/modules/ml-pipeline/src/.dockerignore index 5feb57d..2e9277d 100644 --- a/modules/ml-pipeline/src/.dockerignore +++ b/modules/ml-pipeline/src/.dockerignore @@ -2,5 +2,4 @@ pipeline/data/predictions pipeline/data/prepared_data/train.parquet pipeline/data/model/allmodels pipeline/metrics -pipeline/.dvc pipeline/analysis From 87e3cc391eff1d3cf3a21bdb7b8631a165b8b5bd Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 19 Apr 2024 17:48:15 +0100 Subject: [PATCH 04/20] push files to s3 --- modules/ml-pipeline/src/.dockerignore | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ml-pipeline/src/.dockerignore b/modules/ml-pipeline/src/.dockerignore index 2e9277d..5feb57d 100644 --- a/modules/ml-pipeline/src/.dockerignore +++ b/modules/ml-pipeline/src/.dockerignore @@ -2,4 +2,5 @@ pipeline/data/predictions pipeline/data/prepared_data/train.parquet pipeline/data/model/allmodels pipeline/metrics +pipeline/.dvc pipeline/analysis From 7a3477c0e1db4ea28c7c714397ebb670ddea70c7 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Mon, 22 Apr 2024 13:30:58 +0100 Subject: [PATCH 05/20] change to all packages --- modules/ml-pipeline/src/.dockerignore | 1 + .../src/pipeline/requirements/predictions/requirements-dev.txt | 2 +- .../src/pipeline/requirements/predictions/requirements.txt | 2 +- .../src/pipeline/requirements/training/requirements-dev.txt | 2 +- .../src/pipeline/requirements/training/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/ml-pipeline/src/.dockerignore b/modules/ml-pipeline/src/.dockerignore index 5feb57d..f99f14d 100644 --- a/modules/ml-pipeline/src/.dockerignore +++ b/modules/ml-pipeline/src/.dockerignore @@ -1,5 +1,6 @@ pipeline/data/predictions pipeline/data/prepared_data/train.parquet +pipeline/data/fit_predictions pipeline/data/model/allmodels pipeline/metrics pipeline/.dvc diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt index 35875d9..4dc4c36 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements-dev.txt @@ -1,7 +1,7 @@ joblib==1.3.2 boto3==1.28.17 pandas==2.1.4 -autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 +autogluon.tabular[all]==1.0.0 dynaconf==3.2.1 pyarrow==13.0.0 pre-commit==3.3.3 diff --git a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt index 6a96822..35bdb05 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt @@ -1,7 +1,7 @@ joblib==1.3.2 boto3==1.28.17 pandas==2.1.4 -autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 +autogluon.tabular[all]==1.0.0 dynaconf==3.2.1 pyarrow==13.0.0 PyYAML==6.0.1 diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt index 6e1ea2f..93a042e 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements-dev.txt @@ -1,7 +1,7 @@ joblib==1.3.2 boto3==1.28.17 pandas==2.1.4 -autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 +autogluon.tabular[all]==1.0.0 ray==2.6.3 dynaconf==3.2.1 alibi==0.9.5 diff --git a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt index e4e319c..edeb764 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/training/requirements.txt @@ -1,4 +1,4 @@ boto3==1.28.41 pandas==2.1.4 -autogluon.tabular[lightgbm,xgboost,fastai]==1.0.0 +autogluon.tabular[all]==1.0.0 dynaconf==3.2.1 From 874b1db5f3db74d490fc75a57bcd9d132647e68d Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Mon, 22 Apr 2024 19:01:56 +0100 Subject: [PATCH 06/20] add ignored file to dockerignore --- modules/ml-pipeline/src/.dockerignore | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ml-pipeline/src/.dockerignore b/modules/ml-pipeline/src/.dockerignore index 5feb57d..c9a79ac 100644 --- a/modules/ml-pipeline/src/.dockerignore +++ b/modules/ml-pipeline/src/.dockerignore @@ -1,4 +1,5 @@ pipeline/data/predictions +pipeline/data/fit_predictions pipeline/data/prepared_data/train.parquet pipeline/data/model/allmodels pipeline/metrics From f43d0774798c0f0042e36e4f4a36d70720a905ac Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Mon, 22 Apr 2024 19:10:40 +0100 Subject: [PATCH 07/20] use previous model with new downstream processes --- modules/ml-pipeline/src/pipeline/dvc.lock | 94 ++++++++++++++++------- 1 file changed, 67 insertions(+), 27 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index ca4bfdd..104dc83 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -35,6 +35,46 @@ stages: - number_habitable_rooms - number_heated_rooms default.feature_processor.feature_processor_config.retain_features: + - uprn + - sap_starting + - hot_water_energy_eff_ending + - mainheat_energy_eff_ending + - constituency + - roof_energy_eff_ending + - walls_energy_eff_ending + - secondheat_description_ending + - property_type + - mainheatc_energy_eff_ending + - built_form + - walls_insulation_thickness_ending + - potential_energy_efficiency + - transaction_type_ending + - floor_thermal_transmittance_ending + - low_energy_lighting_ending + - heat_demand_starting + - photo_supply_ending + - carbon_starting + - walls_thermal_transmittance_ending + - roof_insulation_thickness_ending + - total_floor_area_ending + - number_open_fireplaces_ending + - windows_energy_eff_ending + - floor_height_ending + - extension_count_ending + - has_air_source_heat_pump_ending + - charging_system_ending + - construction_age_band + - glazed_type_ending + - roof_thermal_transmittance_ending + - floor_insulation_thickness_ending + - has_mains_gas_ending + - estimated_perimeter_starting + - energy_consumption_potential + - environment_impact_potential + - heater_type_ending + - multi_glaze_proportion_ending + - lighting_energy_eff_ending + - fixed_lighting_outlets_count default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: sap_ending @@ -49,8 +89,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir - size: 44713582 + md5: efa416abea618ae6220a0c3d597603cf.dir + size: 44750997 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -61,8 +101,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir - size: 44713582 + md5: efa416abea618ae6220a0c3d597603cf.dir + size: 44750997 nfiles: 2 params: configs/build_model.yaml: @@ -94,18 +134,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 750cd7426e2909ed36bc05601b5e04c4.dir - size: 3349190 + md5: de46250d454c4d713ab580b10ff3fd31.dir + size: 3349318 nfiles: 1 - path: data/model/ hash: md5 - md5: aa5eb60be553f2a01e63783cf8f1fad1.dir - size: 765461992 - nfiles: 36 + md5: 18bd7a93ece75a65d3a950b7dfdab4fb.dir + size: 735951861 + nfiles: 35 - path: metrics/fit_metrics.json hash: md5 - md5: ca5190b3292210c57a58668fdb48296c - size: 226 + md5: 8a952a5e884c268e6059357a627b9251 + size: 224 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -115,13 +155,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: aa5eb60be553f2a01e63783cf8f1fad1.dir - size: 765461992 - nfiles: 36 + md5: 18bd7a93ece75a65d3a950b7dfdab4fb.dir + size: 735951861 + nfiles: 35 - path: data/prepared_data hash: md5 - md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir - size: 44713582 + md5: efa416abea618ae6220a0c3d597603cf.dir + size: 44750997 nfiles: 2 params: configs/settings.yaml: @@ -133,8 +173,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 934c94e5b1a2c70db3dc865ee056f460.dir - size: 463619 + md5: 07ef721a0dc94a52e3ba7a70ac45b8ff.dir + size: 463563 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -145,13 +185,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 934c94e5b1a2c70db3dc865ee056f460.dir - size: 463619 + md5: 07ef721a0dc94a52e3ba7a70ac45b8ff.dir + size: 463563 nfiles: 1 - path: data/prepared_data hash: md5 - md5: a1ff276b1cbd2db0b8e2982cfd524b40.dir - size: 44713582 + md5: efa416abea618ae6220a0c3d597603cf.dir + size: 44750997 nfiles: 2 params: configs/settings.yaml: @@ -161,15 +201,15 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: aa2a511ac759225549636ba05d6b667c + md5: 9f863f47799d42c101eba3b03a179455 size: 224 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: - path: 5_generate_scenarios.py hash: md5 - md5: 40506749fefd926d47c60ff5b16db307 - size: 5337 + md5: a18f6c6ae2082f038df47386cf3e418e + size: 4896 params: configs/scenarios.yaml: default.scenarios: @@ -182,9 +222,9 @@ stages: outs: - path: metrics/scenario_metrics.md hash: md5 - md5: 4085a6ea3d044ad2fe7ac63b0a685fed + md5: 64e7db945ff655ae03c20c9845f19106 size: 363 - path: metrics/scenario_table.md hash: md5 - md5: a2b3da77921b5dcc10f7068646e0eae3 + md5: d4f8afe07b774374aeaa48f1b7b8a5fc size: 2133 From b985bbf753e232f0fc321f36f19833751b555746 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 26 May 2024 09:28:00 +0100 Subject: [PATCH 08/20] new model with is_as_built_ending and is assumed columns --- .../src/pipeline/configs/scenarios.yaml | 3 +- .../src/pipeline/configs/settings.yaml | 3 +- modules/ml-pipeline/src/pipeline/dvc.lock | 92 ++++++------------- 3 files changed, 30 insertions(+), 68 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml index 2df0cb6..8190888 100644 --- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml @@ -5,6 +5,7 @@ default: scenario_data_filepaths: # - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet - - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet + # - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet + - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet comparison_output_filepath: ./metrics/scenario_table.md metrics_output_filepath: ./metrics/scenario_metrics.md diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index f42b2be..9d466af 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -18,7 +18,8 @@ default: prepare_data: input_dataclient_type: aws-s3 output_dataclient_type: local - data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet + # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet + data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet train_proportion: 0.9 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 104dc83..f2fc8be 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -35,52 +35,12 @@ stages: - number_habitable_rooms - number_heated_rooms default.feature_processor.feature_processor_config.retain_features: - - uprn - - sap_starting - - hot_water_energy_eff_ending - - mainheat_energy_eff_ending - - constituency - - roof_energy_eff_ending - - walls_energy_eff_ending - - secondheat_description_ending - - property_type - - mainheatc_energy_eff_ending - - built_form - - walls_insulation_thickness_ending - - potential_energy_efficiency - - transaction_type_ending - - floor_thermal_transmittance_ending - - low_energy_lighting_ending - - heat_demand_starting - - photo_supply_ending - - carbon_starting - - walls_thermal_transmittance_ending - - roof_insulation_thickness_ending - - total_floor_area_ending - - number_open_fireplaces_ending - - windows_energy_eff_ending - - floor_height_ending - - extension_count_ending - - has_air_source_heat_pump_ending - - charging_system_ending - - construction_age_band - - glazed_type_ending - - roof_thermal_transmittance_ending - - floor_insulation_thickness_ending - - has_mains_gas_ending - - estimated_perimeter_starting - - energy_consumption_potential - - environment_impact_potential - - heater_type_ending - - multi_glaze_proportion_ending - - lighting_energy_eff_ending - - fixed_lighting_outlets_count default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_type: dataframe default.prepare_data.data_filepath: - s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet + s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.output_dataclient_type: local default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet @@ -89,8 +49,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: 26bbe6b1dafae18eb50689604b925c87.dir + size: 45002224 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -101,8 +61,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: 26bbe6b1dafae18eb50689604b925c87.dir + size: 45002224 nfiles: 2 params: configs/build_model.yaml: @@ -134,18 +94,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: de46250d454c4d713ab580b10ff3fd31.dir - size: 3349318 + md5: 7ba44b4af6ecedf3ffebcf7512731d3d.dir + size: 3348905 nfiles: 1 - path: data/model/ hash: md5 - md5: 18bd7a93ece75a65d3a950b7dfdab4fb.dir - size: 735951861 + md5: ef62a6f9b9336fb5b648589d6e0d54d6.dir + size: 737305293 nfiles: 35 - path: metrics/fit_metrics.json hash: md5 - md5: 8a952a5e884c268e6059357a627b9251 - size: 224 + md5: 38600703e1ece1447e5d0fd80b4de8b7 + size: 217 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -155,13 +115,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 18bd7a93ece75a65d3a950b7dfdab4fb.dir - size: 735951861 + md5: ef62a6f9b9336fb5b648589d6e0d54d6.dir + size: 737305293 nfiles: 35 - path: data/prepared_data hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: 26bbe6b1dafae18eb50689604b925c87.dir + size: 45002224 nfiles: 2 params: configs/settings.yaml: @@ -173,8 +133,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 07ef721a0dc94a52e3ba7a70ac45b8ff.dir - size: 463563 + md5: 7d88320b1cd3c690032438fad6cb2200.dir + size: 463523 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -185,13 +145,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 07ef721a0dc94a52e3ba7a70ac45b8ff.dir - size: 463563 + md5: 7d88320b1cd3c690032438fad6cb2200.dir + size: 463523 nfiles: 1 - path: data/prepared_data hash: md5 - md5: efa416abea618ae6220a0c3d597603cf.dir - size: 44750997 + md5: 26bbe6b1dafae18eb50689604b925c87.dir + size: 45002224 nfiles: 2 params: configs/settings.yaml: @@ -201,8 +161,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 9f863f47799d42c101eba3b03a179455 - size: 224 + md5: e17ee59f3a04178a153c5746bf897e74 + size: 223 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: @@ -216,15 +176,15 @@ stages: input_dataclient_type: aws-s3 output_dataclient_type: local scenario_data_filepaths: - - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet + - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet comparison_output_filepath: ./metrics/scenario_table.md metrics_output_filepath: ./metrics/scenario_metrics.md outs: - path: metrics/scenario_metrics.md hash: md5 - md5: 64e7db945ff655ae03c20c9845f19106 + md5: 84fcae91af1480ee0c8fc1616af359b3 size: 363 - path: metrics/scenario_table.md hash: md5 - md5: d4f8afe07b774374aeaa48f1b7b8a5fc + md5: 8571ff812c2f7c71eb0b1534ff6ecff5 size: 2133 From a4dffe527a6fb2e4a53729d32036f5e11fdc9aae Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 26 May 2024 09:47:08 +0100 Subject: [PATCH 09/20] add testing script --- .github/workflows/MLPipelineTESTING.yml | 238 ++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 .github/workflows/MLPipelineTESTING.yml diff --git a/.github/workflows/MLPipelineTESTING.yml b/.github/workflows/MLPipelineTESTING.yml new file mode 100644 index 0000000..92c1792 --- /dev/null +++ b/.github/workflows/MLPipelineTESTING.yml @@ -0,0 +1,238 @@ +name: Register the model for the given pipeline branch (TESTING) + +on: + push: + branches: + - "sap-dev-gto" + +# on: +# pull_request: +# types: +# - closed +# branches: +# - "sap-dev" +# - "heat-dev" +# - "carbon-dev" + +permissions: write-all + +jobs: + Register-Major-Model-Dev: + # if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'major')) }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install packages to register model + run: | + pip install --upgrade pip + pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt + + - name: Register Model + run: | + # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') + REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') + + git config user.name "Github-Bot" + git config user.email "Github-Bot@no-reply.com" + + latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false + if [ -z "${latest_version}" ]; then + increment_version="1.0.0" + else + increment_version=$(echo ${latest_version} | awk 'BEGIN { + FS="\\." # Set the field separator to a period + OFS="." # Set the output field separator to a period + } + { + major = $1 + 1 # Increment the major version + print major, "0", "0" # Print the new version + }') + fi + + new_tag=${REGISTER_MODEL_NAME}@v${increment_version} + + # git tag -a ${new_tag} -m "Registering new Major Version" + # git push origin ${new_tag} + + # gto show --json > MODEL_REGISTRY.md + # git add . + # git commit -m "Update Registry" + # git push + + Register-Minor-Model-Dev: + # if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install packages to register model + run: | + pip install --upgrade pip + pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt + + - name: Register Model + run: | + # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') + REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') + + git config user.name "Github-Bot" + git config user.email "Github-Bot@no-reply.com" + + latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') + if [ -z "${latest_version}" ]; then + increment_version="0.1.0" + else + increment_version=$(echo ${latest_version} | awk 'BEGIN { + FS="\\." # Set the field separator to a period + OFS="." # Set the output field separator to a period + } + { + minor = $2 + 1 # Increment the minor version + print $1, minor, "0" # Print the new version + }') + fi + + new_tag=${REGISTER_MODEL_NAME}@v${increment_version} + + # git tag -a ${new_tag} -m "Registering new Minor Version" + # git push origin ${new_tag} + + # gto show --json > MODEL_REGISTRY.md + # git add . + # git commit -m "Update Registry" + # git push + + Register-Patch-Model-Dev: + # if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install packages to register model + run: | + pip install --upgrade pip + pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt + + - name: Register Model + run: | + # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') + REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') + + git config user.name "Github-Bot" + git config user.email "Github-Bot@no-reply.com" + + latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') + if [ -z "${latest_version}" ]; then + increment_version="0.0.1" + else + increment_version=$(echo ${latest_version} | awk 'BEGIN { + FS="\\." # Set the field separator to a period + OFS="." # Set the output field separator to a period + } + { + patch = $3 + 1 # Increment the patch version + print $1, $2, patch # Print the new version + }') + fi + + new_tag=${REGISTER_MODEL_NAME}@v${increment_version} + + # git tag -a ${new_tag} -m "Registering new Patch Version" + # git push origin ${new_tag} + + # gto show --json > MODEL_REGISTRY.md + # git add . + # git commit -m "Update Registry" + # git push + + Promote-Artefacts-To-Dev: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Install packages to retrieve artifacts + run: | + pip install --upgrade pip + pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt + + - name: Retrieve artifacts (dvc.lock) + env: + AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} + run: | + cd modules/ml-pipeline/src/pipeline + dvc pull -r experiments + + - name: Push artifacts to Dev + env: + AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} + run: | + cd modules/ml-pipeline/src/pipeline + dvc push -r dev + + Register-New-Model-Dev: + needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev] + if: | + always() && + (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') && + (needs.Register-Minor-Model-Dev.result == 'success' || needs.Register-Minor-Model-Dev.result == 'skipped') && + (needs.Register-Patch-Model-Dev.result == 'success' || needs.Register-Patch-Model-Dev.result == 'skipped') + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Install packages to register model + run: | + pip install --upgrade pip + pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt + + - name: Register Model + env: + TARGET_BRANCH: ${{ github.base_ref }} + run: | + + REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') + # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') + + git config user.name "Github-Bot" + git config user.email "Github-Bot@no-reply.com" + + latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}') + if [ -z "${latest_dev_version}" ]; then + increment_version="1" + else + increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}') + fi + + new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version} + latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}') + + echo ${new_tag} + + commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}') + git checkout ${commit_hash} + + # git pull #Get new model registry md file changes + git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}" + git push origin ${new_tag} + + git checkout ${TARGET_BRANCH} + git fetch --all + git pull + + gto show --json > MODEL_REGISTRY.md + git add . + git commit -m "Update Registry" + git push origin ${TARGET_BRANCH} From 0768ace94787e547c637ac6b6e4ae395db2edf52 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 26 May 2024 09:50:39 +0100 Subject: [PATCH 10/20] add testing script --- .github/workflows/MLPipelineTESTING.yml | 176 ++++++++++++------------ 1 file changed, 89 insertions(+), 87 deletions(-) diff --git a/.github/workflows/MLPipelineTESTING.yml b/.github/workflows/MLPipelineTESTING.yml index 92c1792..f2a200a 100644 --- a/.github/workflows/MLPipelineTESTING.yml +++ b/.github/workflows/MLPipelineTESTING.yml @@ -38,21 +38,23 @@ jobs: git config user.name "Github-Bot" git config user.email "Github-Bot@no-reply.com" - latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false - if [ -z "${latest_version}" ]; then - increment_version="1.0.0" - else - increment_version=$(echo ${latest_version} | awk 'BEGIN { - FS="\\." # Set the field separator to a period - OFS="." # Set the output field separator to a period - } - { - major = $1 + 1 # Increment the major version - print major, "0", "0" # Print the new version - }') - fi + gto show - new_tag=${REGISTER_MODEL_NAME}@v${increment_version} + # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false + # if [ -z "${latest_version}" ]; then + # increment_version="1.0.0" + # else + # increment_version=$(echo ${latest_version} | awk 'BEGIN { + # FS="\\." # Set the field separator to a period + # OFS="." # Set the output field separator to a period + # } + # { + # major = $1 + 1 # Increment the major version + # print major, "0", "0" # Print the new version + # }') + # fi + + # new_tag=${REGISTER_MODEL_NAME}@v${increment_version} # git tag -a ${new_tag} -m "Registering new Major Version" # git push origin ${new_tag} @@ -83,21 +85,21 @@ jobs: git config user.name "Github-Bot" git config user.email "Github-Bot@no-reply.com" - latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') - if [ -z "${latest_version}" ]; then - increment_version="0.1.0" - else - increment_version=$(echo ${latest_version} | awk 'BEGIN { - FS="\\." # Set the field separator to a period - OFS="." # Set the output field separator to a period - } - { - minor = $2 + 1 # Increment the minor version - print $1, minor, "0" # Print the new version - }') - fi + # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') + # if [ -z "${latest_version}" ]; then + # increment_version="0.1.0" + # else + # increment_version=$(echo ${latest_version} | awk 'BEGIN { + # FS="\\." # Set the field separator to a period + # OFS="." # Set the output field separator to a period + # } + # { + # minor = $2 + 1 # Increment the minor version + # print $1, minor, "0" # Print the new version + # }') + # fi - new_tag=${REGISTER_MODEL_NAME}@v${increment_version} + # new_tag=${REGISTER_MODEL_NAME}@v${increment_version} # git tag -a ${new_tag} -m "Registering new Minor Version" # git push origin ${new_tag} @@ -128,21 +130,21 @@ jobs: git config user.name "Github-Bot" git config user.email "Github-Bot@no-reply.com" - latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') - if [ -z "${latest_version}" ]; then - increment_version="0.0.1" - else - increment_version=$(echo ${latest_version} | awk 'BEGIN { - FS="\\." # Set the field separator to a period - OFS="." # Set the output field separator to a period - } - { - patch = $3 + 1 # Increment the patch version - print $1, $2, patch # Print the new version - }') - fi + # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') + # if [ -z "${latest_version}" ]; then + # increment_version="0.0.1" + # else + # increment_version=$(echo ${latest_version} | awk 'BEGIN { + # FS="\\." # Set the field separator to a period + # OFS="." # Set the output field separator to a period + # } + # { + # patch = $3 + 1 # Increment the patch version + # print $1, $2, patch # Print the new version + # }') + # fi - new_tag=${REGISTER_MODEL_NAME}@v${increment_version} + # new_tag=${REGISTER_MODEL_NAME}@v${increment_version} # git tag -a ${new_tag} -m "Registering new Patch Version" # git push origin ${new_tag} @@ -179,60 +181,60 @@ jobs: cd modules/ml-pipeline/src/pipeline dvc push -r dev - Register-New-Model-Dev: - needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev] - if: | - always() && - (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') && - (needs.Register-Minor-Model-Dev.result == 'success' || needs.Register-Minor-Model-Dev.result == 'skipped') && - (needs.Register-Patch-Model-Dev.result == 'success' || needs.Register-Patch-Model-Dev.result == 'skipped') + # Register-New-Model-Dev: + # needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev] + # if: | + # always() && + # (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') && + # (needs.Register-Minor-Model-Dev.result == 'success' || needs.Register-Minor-Model-Dev.result == 'skipped') && + # (needs.Register-Patch-Model-Dev.result == 'success' || needs.Register-Patch-Model-Dev.result == 'skipped') - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v3 + # with: + # fetch-depth: 0 - - name: Install packages to register model - run: | - pip install --upgrade pip - pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt + # - name: Install packages to register model + # run: | + # pip install --upgrade pip + # pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt - - name: Register Model - env: - TARGET_BRANCH: ${{ github.base_ref }} - run: | + # - name: Register Model + # env: + # TARGET_BRANCH: ${{ github.base_ref }} + # run: | - REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') - # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') + # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') + # # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') - git config user.name "Github-Bot" - git config user.email "Github-Bot@no-reply.com" + # git config user.name "Github-Bot" + # git config user.email "Github-Bot@no-reply.com" - latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}') - if [ -z "${latest_dev_version}" ]; then - increment_version="1" - else - increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}') - fi + # latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}') + # if [ -z "${latest_dev_version}" ]; then + # increment_version="1" + # else + # increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}') + # fi - new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version} - latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}') + # new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version} + # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}') - echo ${new_tag} + # echo ${new_tag} - commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}') - git checkout ${commit_hash} + # commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}') + # git checkout ${commit_hash} - # git pull #Get new model registry md file changes - git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}" - git push origin ${new_tag} + # # git pull #Get new model registry md file changes + # git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}" + # git push origin ${new_tag} - git checkout ${TARGET_BRANCH} - git fetch --all - git pull + # git checkout ${TARGET_BRANCH} + # git fetch --all + # git pull - gto show --json > MODEL_REGISTRY.md - git add . - git commit -m "Update Registry" - git push origin ${TARGET_BRANCH} + # gto show --json > MODEL_REGISTRY.md + # git add . + # git commit -m "Update Registry" + # git push origin ${TARGET_BRANCH} From 9e23eae27af4c22a5f52280c718b1551a6db31ca Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 26 May 2024 09:54:22 +0100 Subject: [PATCH 11/20] add testing script --- .../pipeline/requirements/version_control/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt index a2b9531..173550d 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt @@ -1,4 +1,4 @@ -dvc==3.36.0 -dvc-s3==3.0.1 -gto==1.6.1 +dvc==3.51.0 +dvc-s3==3.2.0 +gto==1.7.1 pyOpenSSL==23.3.0 From e0954b52bce8088ec2b1550d2a58fd40de454b87 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sun, 26 May 2024 09:56:05 +0100 Subject: [PATCH 12/20] Upgrade dvc packages to fix pygit2 error --- .github/workflows/MLPipelineTESTING.yml | 240 ------------------------ 1 file changed, 240 deletions(-) delete mode 100644 .github/workflows/MLPipelineTESTING.yml diff --git a/.github/workflows/MLPipelineTESTING.yml b/.github/workflows/MLPipelineTESTING.yml deleted file mode 100644 index f2a200a..0000000 --- a/.github/workflows/MLPipelineTESTING.yml +++ /dev/null @@ -1,240 +0,0 @@ -name: Register the model for the given pipeline branch (TESTING) - -on: - push: - branches: - - "sap-dev-gto" - -# on: -# pull_request: -# types: -# - closed -# branches: -# - "sap-dev" -# - "heat-dev" -# - "carbon-dev" - -permissions: write-all - -jobs: - Register-Major-Model-Dev: - # if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'major')) }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install packages to register model - run: | - pip install --upgrade pip - pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt - - - name: Register Model - run: | - # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') - REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') - - git config user.name "Github-Bot" - git config user.email "Github-Bot@no-reply.com" - - gto show - - # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false - # if [ -z "${latest_version}" ]; then - # increment_version="1.0.0" - # else - # increment_version=$(echo ${latest_version} | awk 'BEGIN { - # FS="\\." # Set the field separator to a period - # OFS="." # Set the output field separator to a period - # } - # { - # major = $1 + 1 # Increment the major version - # print major, "0", "0" # Print the new version - # }') - # fi - - # new_tag=${REGISTER_MODEL_NAME}@v${increment_version} - - # git tag -a ${new_tag} -m "Registering new Major Version" - # git push origin ${new_tag} - - # gto show --json > MODEL_REGISTRY.md - # git add . - # git commit -m "Update Registry" - # git push - - Register-Minor-Model-Dev: - # if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install packages to register model - run: | - pip install --upgrade pip - pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt - - - name: Register Model - run: | - # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') - REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') - - git config user.name "Github-Bot" - git config user.email "Github-Bot@no-reply.com" - - # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') - # if [ -z "${latest_version}" ]; then - # increment_version="0.1.0" - # else - # increment_version=$(echo ${latest_version} | awk 'BEGIN { - # FS="\\." # Set the field separator to a period - # OFS="." # Set the output field separator to a period - # } - # { - # minor = $2 + 1 # Increment the minor version - # print $1, minor, "0" # Print the new version - # }') - # fi - - # new_tag=${REGISTER_MODEL_NAME}@v${increment_version} - - # git tag -a ${new_tag} -m "Registering new Minor Version" - # git push origin ${new_tag} - - # gto show --json > MODEL_REGISTRY.md - # git add . - # git commit -m "Update Registry" - # git push - - Register-Patch-Model-Dev: - # if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install packages to register model - run: | - pip install --upgrade pip - pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt - - - name: Register Model - run: | - # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') - REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') - - git config user.name "Github-Bot" - git config user.email "Github-Bot@no-reply.com" - - # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') - # if [ -z "${latest_version}" ]; then - # increment_version="0.0.1" - # else - # increment_version=$(echo ${latest_version} | awk 'BEGIN { - # FS="\\." # Set the field separator to a period - # OFS="." # Set the output field separator to a period - # } - # { - # patch = $3 + 1 # Increment the patch version - # print $1, $2, patch # Print the new version - # }') - # fi - - # new_tag=${REGISTER_MODEL_NAME}@v${increment_version} - - # git tag -a ${new_tag} -m "Registering new Patch Version" - # git push origin ${new_tag} - - # gto show --json > MODEL_REGISTRY.md - # git add . - # git commit -m "Update Registry" - # git push - - Promote-Artefacts-To-Dev: - if: github.event.pull_request.merged == true - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Install packages to retrieve artifacts - run: | - pip install --upgrade pip - pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt - - - name: Retrieve artifacts (dvc.lock) - env: - AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} - run: | - cd modules/ml-pipeline/src/pipeline - dvc pull -r experiments - - - name: Push artifacts to Dev - env: - AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }} - run: | - cd modules/ml-pipeline/src/pipeline - dvc push -r dev - - # Register-New-Model-Dev: - # needs: [Register-Major-Model-Dev, Register-Minor-Model-Dev, Register-Patch-Model-Dev] - # if: | - # always() && - # (needs.Register-Major-Model-Dev.result == 'success' || needs.Register-Major-Model-Dev.result == 'skipped') && - # (needs.Register-Minor-Model-Dev.result == 'success' || needs.Register-Minor-Model-Dev.result == 'skipped') && - # (needs.Register-Patch-Model-Dev.result == 'success' || needs.Register-Patch-Model-Dev.result == 'skipped') - - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v3 - # with: - # fetch-depth: 0 - - # - name: Install packages to register model - # run: | - # pip install --upgrade pip - # pip install -r modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt - - # - name: Register Model - # env: - # TARGET_BRANCH: ${{ github.base_ref }} - # run: | - - # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}') - # # REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}') - - # git config user.name "Github-Bot" - # git config user.email "Github-Bot@no-reply.com" - - # latest_dev_version=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk '{print $NF}' | awk '/dev/' | awk 'END {print}') - # if [ -z "${latest_dev_version}" ]; then - # increment_version="1" - # else - # increment_version=$(echo ${latest_dev_version} | awk '{print $NF}' | awk -F"#" '{print $3}' | awk '{$1++; print}') - # fi - - # new_tag=${REGISTER_MODEL_NAME}#dev#${increment_version} - # latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@" '{print $2}') - - # echo ${new_tag} - - # commit_hash=$(gto history ${REGISTER_MODEL_NAME} --asc --plain | awk "/${latest_version}/" | awk '{print $(NF-1)}') - # git checkout ${commit_hash} - - # # git pull #Get new model registry md file changes - # git tag -a ${new_tag} -m "Assigning stage dev to artifact ${REGISTER_MODEL_NAME} version ${latest_version}" - # git push origin ${new_tag} - - # git checkout ${TARGET_BRANCH} - # git fetch --all - # git pull - - # gto show --json > MODEL_REGISTRY.md - # git add . - # git commit -m "Update Registry" - # git push origin ${TARGET_BRANCH} From a78c5a50b0976541239a696f68b9422ff63fca83 Mon Sep 17 00:00:00 2001 From: Github-Bot Date: Sun, 26 May 2024 09:07:46 +0000 Subject: [PATCH 13/20] Update Registry --- MODEL_REGISTRY.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md index 97c4388..606521a 100644 --- a/MODEL_REGISTRY.md +++ b/MODEL_REGISTRY.md @@ -8,7 +8,7 @@ "active": true }, "sap": { - "version": "v0.11.0", + "version": "v0.12.0", "stage": { "dev": "v0.11.0" }, @@ -16,17 +16,17 @@ "active": true }, "heat": { - "version": "v0.4.0", + "version": "v0.5.0", "stage": { - "dev": "v0.4.0" + "dev": "v0.5.0" }, "registered": true, "active": true }, "carbon": { - "version": "v0.4.0", + "version": "v0.5.0", "stage": { - "dev": "v0.4.0" + "dev": "v0.5.0" }, "registered": true, "active": true From 396a5ffb087acf0186f3762cc117fad18a0668d2 Mon Sep 17 00:00:00 2001 From: Github-Bot Date: Sun, 26 May 2024 09:08:23 +0000 Subject: [PATCH 14/20] Update Registry --- MODEL_REGISTRY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md index 606521a..9b31dab 100644 --- a/MODEL_REGISTRY.md +++ b/MODEL_REGISTRY.md @@ -10,7 +10,7 @@ "sap": { "version": "v0.12.0", "stage": { - "dev": "v0.11.0" + "dev": "v0.12.0" }, "registered": true, "active": true From 5e0118ca0b1b03d32540050d8b2880b08e7273a8 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 28 May 2024 16:55:47 +0100 Subject: [PATCH 15/20] change deployment - pineed serverless pajkage --- .github/workflows/Deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Deploy.yml b/.github/workflows/Deploy.yml index 6e34d36..265a324 100644 --- a/.github/workflows/Deploy.yml +++ b/.github/workflows/Deploy.yml @@ -19,8 +19,8 @@ jobs: - name: Install Serverless and plugins run: | - npm install -g serverless - npm install -g serverless-domain-manager + npm install -g serverless@^3.38.0 + npm install -g serverless-domain-manager@^7.3.8 - name: Install DVC run: | From 6f00d6afb80d1aa572181402430832674f86e9dd Mon Sep 17 00:00:00 2001 From: Github-Bot Date: Tue, 28 May 2024 15:57:55 +0000 Subject: [PATCH 16/20] Update Registry --- MODEL_REGISTRY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md index 9b31dab..5d47b16 100644 --- a/MODEL_REGISTRY.md +++ b/MODEL_REGISTRY.md @@ -8,7 +8,7 @@ "active": true }, "sap": { - "version": "v0.12.0", + "version": "v0.13.0", "stage": { "dev": "v0.12.0" }, From dc260fddd0efdc4a90e80fbd0438a121ca0234f1 Mon Sep 17 00:00:00 2001 From: Github-Bot Date: Tue, 28 May 2024 15:58:31 +0000 Subject: [PATCH 17/20] Update Registry --- MODEL_REGISTRY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md index 5d47b16..ff4b5a3 100644 --- a/MODEL_REGISTRY.md +++ b/MODEL_REGISTRY.md @@ -10,7 +10,7 @@ "sap": { "version": "v0.13.0", "stage": { - "dev": "v0.12.0" + "dev": "v0.13.0" }, "registered": true, "active": true From 8399092e20805f614e738924e1e3c69c8d9d5fec Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 28 May 2024 19:58:46 +0100 Subject: [PATCH 18/20] formatting --- .../src/pipeline/configs/scenarios.yaml | 4 +- .../src/pipeline/configs/settings.yaml | 4 +- modules/ml-pipeline/src/pipeline/dvc.lock | 58 +++++++++---------- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml index 8190888..0d4ee07 100644 --- a/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/scenarios.yaml @@ -6,6 +6,8 @@ default: # - s3://retrofit-data-dev/scenario_data/22-03-2024-19-20-09/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/24-03-2024-20-23-25/recommendations_scoring_data.parquet # - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet - - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet + # - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet + # - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet + - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet comparison_output_filepath: ./metrics/scenario_table.md metrics_output_filepath: ./metrics/scenario_metrics.md diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 9d466af..838e9a9 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -19,7 +19,9 @@ default: input_dataclient_type: aws-s3 output_dataclient_type: local # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet - data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet + # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet + # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet + data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet train_proportion: 0.9 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index f2fc8be..31315db 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -40,7 +40,7 @@ stages: default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_type: dataframe default.prepare_data.data_filepath: - s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet + s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.output_dataclient_type: local default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet @@ -49,8 +49,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 26bbe6b1dafae18eb50689604b925c87.dir - size: 45002224 + md5: 80c9e138146a1d96b9d16091c207e2e8.dir + size: 45056059 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -61,8 +61,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 26bbe6b1dafae18eb50689604b925c87.dir - size: 45002224 + md5: 80c9e138146a1d96b9d16091c207e2e8.dir + size: 45056059 nfiles: 2 params: configs/build_model.yaml: @@ -94,18 +94,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 7ba44b4af6ecedf3ffebcf7512731d3d.dir - size: 3348905 + md5: d9c9afc05e8780db47c0548b19bf7d19.dir + size: 3349989 nfiles: 1 - path: data/model/ hash: md5 - md5: ef62a6f9b9336fb5b648589d6e0d54d6.dir - size: 737305293 - nfiles: 35 + md5: 13c3100e1486c27a83a8a47491077842.dir + size: 773523079 + nfiles: 36 - path: metrics/fit_metrics.json hash: md5 - md5: 38600703e1ece1447e5d0fd80b4de8b7 - size: 217 + md5: 2ff70a2a45813e1bcdf2ea3aa8e07d4a + size: 224 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -115,13 +115,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: ef62a6f9b9336fb5b648589d6e0d54d6.dir - size: 737305293 - nfiles: 35 + md5: 13c3100e1486c27a83a8a47491077842.dir + size: 773523079 + nfiles: 36 - path: data/prepared_data hash: md5 - md5: 26bbe6b1dafae18eb50689604b925c87.dir - size: 45002224 + md5: 80c9e138146a1d96b9d16091c207e2e8.dir + size: 45056059 nfiles: 2 params: configs/settings.yaml: @@ -133,8 +133,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 7d88320b1cd3c690032438fad6cb2200.dir - size: 463523 + md5: 5d07bcebf3160a72bb18dfd79106e85c.dir + size: 463197 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -145,13 +145,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 7d88320b1cd3c690032438fad6cb2200.dir - size: 463523 + md5: 5d07bcebf3160a72bb18dfd79106e85c.dir + size: 463197 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 26bbe6b1dafae18eb50689604b925c87.dir - size: 45002224 + md5: 80c9e138146a1d96b9d16091c207e2e8.dir + size: 45056059 nfiles: 2 params: configs/settings.yaml: @@ -161,30 +161,30 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: e17ee59f3a04178a153c5746bf897e74 + md5: 3e08df02fd5c5d094bcf936e1338d596 size: 223 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: - path: 5_generate_scenarios.py hash: md5 - md5: a18f6c6ae2082f038df47386cf3e418e - size: 4896 + md5: 40506749fefd926d47c60ff5b16db307 + size: 5337 params: configs/scenarios.yaml: default.scenarios: input_dataclient_type: aws-s3 output_dataclient_type: local scenario_data_filepaths: - - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet + - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet comparison_output_filepath: ./metrics/scenario_table.md metrics_output_filepath: ./metrics/scenario_metrics.md outs: - path: metrics/scenario_metrics.md hash: md5 - md5: 84fcae91af1480ee0c8fc1616af359b3 + md5: fa4d6d7bbd7818613800da5f8f37ea96 size: 363 - path: metrics/scenario_table.md hash: md5 - md5: 8571ff812c2f7c71eb0b1534ff6ecff5 + md5: d6baf100a1623cc2467c2f8221d314c9 size: 2133 From 9925b54af23d7f0a840de3ff1758a9955bb46f5f Mon Sep 17 00:00:00 2001 From: Github-Bot Date: Thu, 30 May 2024 11:47:04 +0000 Subject: [PATCH 19/20] Update Registry --- MODEL_REGISTRY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md index ff4b5a3..971208e 100644 --- a/MODEL_REGISTRY.md +++ b/MODEL_REGISTRY.md @@ -8,7 +8,7 @@ "active": true }, "sap": { - "version": "v0.13.0", + "version": "v0.14.0", "stage": { "dev": "v0.13.0" }, From d09c534e0d99876ea282ff6dd7cb4600196cc419 Mon Sep 17 00:00:00 2001 From: Github-Bot Date: Thu, 30 May 2024 11:47:46 +0000 Subject: [PATCH 20/20] Update Registry --- MODEL_REGISTRY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MODEL_REGISTRY.md b/MODEL_REGISTRY.md index 971208e..2fea343 100644 --- a/MODEL_REGISTRY.md +++ b/MODEL_REGISTRY.md @@ -10,7 +10,7 @@ "sap": { "version": "v0.14.0", "stage": { - "dev": "v0.13.0" + "dev": "v0.14.0" }, "registered": true, "active": true