add new model, with latest data

This commit is contained in:
Michael Duong 2025-09-15 14:56:53 +01:00
parent ce1ebb6174
commit 88c5b6c93a
2 changed files with 32 additions and 29 deletions

View file

@ -20,7 +20,8 @@ default:
output_dataclient_type: local
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-03-22-57-23/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet
@ -38,7 +39,7 @@ default:
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting',
'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending',
'floor_thermal_transmittance', 'floor_thermal_transmittance_ending']
'floor_thermal_transmittance', 'floor_thermal_transmittance_ending', 'lodgement_date_starting', 'lodgement_date_ending',]
retain_features: null
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',

View file

@ -42,13 +42,15 @@ stages:
- hot_water_cost_ending
- floor_thermal_transmittance
- floor_thermal_transmittance_ending
- lodgement_date_starting
- lodgement_date_ending
default.feature_processor.feature_processor_config.retain_features:
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending
default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2024-10-08-21-58-03/dataset_rooms.parquet
default.prepare_data.data_filepath:
s3://retrofit-data-dev/sap_change_model/2025-09-05-14-05-32/dataset_rooms.parquet
default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
@ -57,8 +59,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: 9159a400187e6d65687b5e411a4cb0de.dir
size: 48034631
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -69,8 +71,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 9159a400187e6d65687b5e411a4cb0de.dir
size: 48034631
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
nfiles: 2
params:
configs/build_model.yaml:
@ -102,17 +104,17 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: 6ac50c46e6fd740ccf76da4c2bf6735d.dir
size: 3615441
md5: a6196bf08607c43ba6bc637611bb32b0.dir
size: 3491001
nfiles: 1
- path: data/model/
hash: md5
md5: 2212643103819177f58da1d3063c8c94.dir
size: 761489901
nfiles: 35
md5: b225d7b01356cecefb3794a9a3cd19b5.dir
size: 790430916
nfiles: 36
- path: metrics/fit_metrics.json
hash: md5
md5: d379cf95e07eb7c8797b4b766f8292cf
md5: 33421d5e3a2d569dbe6d4486c568a2b7
size: 225
generate_predictions:
cmd: python 3_generate_predictions.py
@ -123,13 +125,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 2212643103819177f58da1d3063c8c94.dir
size: 761489901
nfiles: 35
md5: b225d7b01356cecefb3794a9a3cd19b5.dir
size: 790430916
nfiles: 36
- path: data/prepared_data
hash: md5
md5: 9159a400187e6d65687b5e411a4cb0de.dir
size: 48034631
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
nfiles: 2
params:
configs/settings.yaml:
@ -141,8 +143,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: e8bd8e8ba88a667ccea645890d348e62.dir
size: 507948
md5: bd6821db9abc95af8c74aa20effd7f37.dir
size: 487194
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -153,13 +155,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: e8bd8e8ba88a667ccea645890d348e62.dir
size: 507948
md5: bd6821db9abc95af8c74aa20effd7f37.dir
size: 487194
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 9159a400187e6d65687b5e411a4cb0de.dir
size: 48034631
md5: 7cc090d55cb296ce5d360d655c06e861.dir
size: 46183314
nfiles: 2
params:
configs/settings.yaml:
@ -169,8 +171,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: a8cf405272776730f5818d50b20c6f43
size: 222
md5: 9c2a7802554f5c2f750b2242c6003026
size: 223
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
@ -190,9 +192,9 @@ stages:
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: a5d9c42d38ef50e4fdf99a3e6043af2a
md5: c01524a0cc2e61151c106d7049af3bf9
size: 356
- path: metrics/scenario_table.md
hash: md5
md5: 3e48c953451af8852572299b66988910
md5: a995c8ef7ffbe2ca254441150817ae21
size: 872