add new model for new data

This commit is contained in:
Michael Duong 2024-05-30 21:44:21 +01:00
parent c89ae0f38a
commit c7aedcde04
2 changed files with 190 additions and 1 deletions

View file

@ -8,6 +8,6 @@ default:
# - s3://retrofit-data-dev/scenario_data/27-03-2024-11-38-15/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-08-47-45/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/26-05-2024-10-44-53/recommendations_scoring_data.parquet
- s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
# - s3://retrofit-data-dev/scenario_data/28-05-2024-19-22-41/recommendations_scoring_data.parquet
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md

View file

@ -0,0 +1,189 @@
schema: '2.0'
stages:
startup_cleanup:
cmd: python 0_startup_cleanup.py
deps:
- path: 0_startup_cleanup.py
hash: md5
md5: b1b12f6b6393fbf8b83d23684df0a3d4
size: 1220
params:
configs/settings.yaml:
default.startup_cleanup.artefacts: ./data
default.startup_cleanup.metrics: ./metrics
prepare_data:
cmd: python 1_prepare_data.py
deps:
- path: 1_prepare_data.py
hash: md5
md5: 11a3b8bfdfe199ab7ecc39ccc5652649
size: 4298
params:
configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns:
- heat_demand_change
- carbon_change
- rdsap_change
- heat_demand_ending
- sap_ending
- days_to_starting
- days_to_ending
- number_habitable_rooms_starting
- number_habitable_rooms_ending
- number_heated_rooms_starting
- number_heated_rooms_ending
- number_habitable_rooms
- number_heated_rooms
default.feature_processor.feature_processor_config.retain_features:
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: carbon_ending
default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 0.9
outs:
- path: data/prepared_data/
hash: md5
md5: e2efac20634b919381adfb962a42d40a.dir
size: 36961727
nfiles: 2
build_model:
cmd: python 2_build_model.py
deps:
- path: 2_build_model.py
hash: md5
md5: 7231450b78920b0c5e7c6bada496b24a
size: 4820
- path: data/prepared_data
hash: md5
md5: e2efac20634b919381adfb962a42d40a.dir
size: 36961727
nfiles: 2
params:
configs/build_model.yaml:
default:
build_model:
model_type: AutogluonAutoML
model_save_filepath: ./data/model/optimised/
fit_metrics_filepath: ./metrics/fit_metrics.json
fit_predictions_filepath: ./data/fit_predictions/predictions.parquet
SKLearnLinearRegression:
SKLearnSVMRegression:
kernel: linear
AutogluonAutoML:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error
time_limit: 1800
presets: medium_quality
excluded_model_types:
- RF
- CAT
- NN_TORCH
- KNN
- XT
infer_limit: 0.05
infer_limit_batch_size: 10000
ag_args_ensemble:
num_folds_parallel: 2
outs:
- path: data/fit_predictions/
hash: md5
md5: d2568a3244df4d3444b6190599f74b96.dir
size: 3661106
nfiles: 1
- path: data/model/
hash: md5
md5: 756100e033e0bd4445a437e43f4c53af.dir
size: 730442848
nfiles: 36
- path: metrics/fit_metrics.json
hash: md5
md5: 3bcb3b9728521cd341eb71af109ca778
size: 227
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
- path: 3_generate_predictions.py
hash: md5
md5: 0a70ad4dfe99414a75d1261c75a177b9
size: 2464
- path: data/model
hash: md5
md5: 756100e033e0bd4445a437e43f4c53af.dir
size: 730442848
nfiles: 36
- path: data/prepared_data
hash: md5
md5: e2efac20634b919381adfb962a42d40a.dir
size: 36961727
nfiles: 2
params:
configs/settings.yaml:
default.generate_predictions.input_dataclient_type: local
default.generate_predictions.output_dataclient_type: local
default.generate_predictions.predictions_column_name: predictions
default.generate_predictions.predictions_output_filepath: ./data/predictions/predictions.parquet
default.generate_predictions.test_data_filepath: ./data/prepared_data/test.parquet
outs:
- path: data/predictions/
hash: md5
md5: 09f3584d6fbd447dd2714eb2774139d5.dir
size: 499683
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
deps:
- path: 4_generate_metrics.py
hash: md5
md5: 4fedb86d89d528f0a6597934ba3890a0
size: 3484
- path: data/predictions
hash: md5
md5: 09f3584d6fbd447dd2714eb2774139d5.dir
size: 499683
nfiles: 1
- path: data/prepared_data
hash: md5
md5: e2efac20634b919381adfb962a42d40a.dir
size: 36961727
nfiles: 2
params:
configs/settings.yaml:
default.generate_metrics.dataclient_type: local
default.generate_metrics.metrics_output_filepath: ./metrics/metrics.json
default.generate_metrics.metrics_type: Regression
outs:
- path: metrics/metrics.json
hash: md5
md5: abf8720d06f073f47501aa1172527e9e
size: 225
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
- path: 5_generate_scenarios.py
hash: md5
md5: 40506749fefd926d47c60ff5b16db307
size: 5337
params:
configs/scenarios.yaml:
default.scenarios:
input_dataclient_type: aws-s3
output_dataclient_type: local
scenario_data_filepaths:
comparison_output_filepath: ./metrics/scenario_table.md
metrics_output_filepath: ./metrics/scenario_metrics.md
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: d41d8cd98f00b204e9800998ecf8427e
size: 0
- path: metrics/scenario_table.md
hash: md5
md5: d41d8cd98f00b204e9800998ecf8427e
size: 0