diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 483fdd5..34e03fb 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -20,7 +20,8 @@ default: output_dataclient_type: local # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet - data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet + # data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet + data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet train_proportion: 0.9 output_train_filepath: ./data/prepared_data/train.parquet output_test_filepath: ./data/prepared_data/test.parquet @@ -38,7 +39,7 @@ default: 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', 'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting', 'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending', - 'floor_thermal_transmittance', 'floor_thermal_transmittance_ending'] + 'floor_thermal_transmittance', 'floor_thermal_transmittance_ending', 'lodgement_date_starting', 'lodgement_date_ending',] retain_features: null # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', # 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending', diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index e37c23b..16b7b07 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -42,13 +42,15 @@ stages: - hot_water_cost_ending - floor_thermal_transmittance - floor_thermal_transmittance_ending + - lodgement_date_starting + - lodgement_date_ending default.feature_processor.feature_processor_config.retain_features: default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: sap_ending default.feature_processor.feature_processor_type: dataframe - default.prepare_data.data_filepath: - s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet + default.prepare_data.data_filepath: + s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.output_dataclient_type: local default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet @@ -57,8 +59,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 9159a400187e6d65687b5e411a4cb0de.dir - size: 48034631 + md5: 7cc090d55cb296ce5d360d655c06e861.dir + size: 46183314 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -69,8 +71,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 9159a400187e6d65687b5e411a4cb0de.dir - size: 48034631 + md5: 7cc090d55cb296ce5d360d655c06e861.dir + size: 46183314 nfiles: 2 params: configs/build_model.yaml: @@ -102,17 +104,17 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 6ac50c46e6fd740ccf76da4c2bf6735d.dir - size: 3615441 + md5: a6196bf08607c43ba6bc637611bb32b0.dir + size: 3491001 nfiles: 1 - path: data/model/ hash: md5 - md5: 2212643103819177f58da1d3063c8c94.dir - size: 761489901 - nfiles: 35 + md5: b225d7b01356cecefb3794a9a3cd19b5.dir + size: 790430916 + nfiles: 36 - path: metrics/fit_metrics.json hash: md5 - md5: d379cf95e07eb7c8797b4b766f8292cf + md5: 33421d5e3a2d569dbe6d4486c568a2b7 size: 225 generate_predictions: cmd: python 3_generate_predictions.py @@ -123,13 +125,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 2212643103819177f58da1d3063c8c94.dir - size: 761489901 - nfiles: 35 + md5: b225d7b01356cecefb3794a9a3cd19b5.dir + size: 790430916 + nfiles: 36 - path: data/prepared_data hash: md5 - md5: 9159a400187e6d65687b5e411a4cb0de.dir - size: 48034631 + md5: 7cc090d55cb296ce5d360d655c06e861.dir + size: 46183314 nfiles: 2 params: configs/settings.yaml: @@ -141,8 +143,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: e8bd8e8ba88a667ccea645890d348e62.dir - size: 507948 + md5: bd6821db9abc95af8c74aa20effd7f37.dir + size: 487194 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -153,13 +155,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: e8bd8e8ba88a667ccea645890d348e62.dir - size: 507948 + md5: bd6821db9abc95af8c74aa20effd7f37.dir + size: 487194 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 9159a400187e6d65687b5e411a4cb0de.dir - size: 48034631 + md5: 7cc090d55cb296ce5d360d655c06e861.dir + size: 46183314 nfiles: 2 params: configs/settings.yaml: @@ -169,8 +171,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: a8cf405272776730f5818d50b20c6f43 - size: 222 + md5: 9c2a7802554f5c2f750b2242c6003026 + size: 223 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: @@ -190,9 +192,9 @@ stages: outs: - path: metrics/scenario_metrics.md hash: md5 - md5: a5d9c42d38ef50e4fdf99a3e6043af2a + md5: c01524a0cc2e61151c106d7049af3bf9 size: 356 - path: metrics/scenario_table.md hash: md5 - md5: 3e48c953451af8852572299b66988910 + md5: a995c8ef7ffbe2ca254441150817ae21 size: 872