try categorical type for extensions

This commit is contained in:
Michael Duong 2024-10-17 19:30:47 +01:00
parent 18797f4324
commit e84193053f
4 changed files with 42 additions and 31 deletions

View file

@ -8,25 +8,25 @@
"active": true
},
"sap": {
"version": "v0.15.0",
"version": "v0.16.0",
"stage": {
"dev": "v0.15.0"
"dev": "v0.16.0"
},
"registered": true,
"active": true
},
"heat": {
"version": "v0.6.0",
"version": "v0.7.0",
"stage": {
"dev": "v0.6.0"
"dev": "v0.7.0"
},
"registered": true,
"active": true
},
"carbon": {
"version": "v0.6.0",
"version": "v0.7.0",
"stage": {
"dev": "v0.6.0"
"dev": "v0.7.0"
},
"registered": true,
"active": true

View file

@ -45,6 +45,12 @@ def keep_non_zero_rdsap(df):
return df
def convert_extension_columns_to_categorical(df):
df["extension_count_starting"] = df["extension_count_starting"].astype("category")
df["extension_count_ending"] = df["extension_count_ending"].astype("category")
return df
# def keep_ending_columns(df):
# ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)]
# keep_columns = df.columns[ending_column_index].to_list()
@ -54,6 +60,7 @@ def keep_non_zero_rdsap(df):
# return df
business_logic = {
"convert_extension_columns_to_categorical": convert_extension_columns_to_categorical,
# "keep_non_zero_rdsap": keep_non_zero_rdsap,
# "keep_flats": keep_flats,
# "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,

View file

@ -36,7 +36,9 @@ default:
drop_columns: [
"heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending",
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting', 'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending',]
'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting',
'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending',
'floor_thermal_transmittance', 'floor_thermal_transmittance_ending']
retain_features: null
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',

View file

@ -40,6 +40,8 @@ stages:
- heating_cost_ending
- hot_water_cost_starting
- hot_water_cost_ending
- floor_thermal_transmittance
- floor_thermal_transmittance_ending
default.feature_processor.feature_processor_config.retain_features:
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
@ -55,8 +57,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
size: 49655735
md5: b4e5adebb1acfd9a84b785c6ae62ebec.dir
size: 47975005
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -67,8 +69,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
size: 49655735
md5: b4e5adebb1acfd9a84b785c6ae62ebec.dir
size: 47975005
nfiles: 2
params:
configs/build_model.yaml:
@ -100,18 +102,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: a1c97fa17adae60ee97696c95cfebe7e.dir
size: 3618488
md5: 37d014de108113e6bf181d59c82cac9d.dir
size: 3617984
nfiles: 1
- path: data/model/
hash: md5
md5: 7e6f38163a2cfe208ace26702ae28793.dir
size: 762167138
md5: 1978ae8a3960b419ab764fd47a3693ca.dir
size: 761166981
nfiles: 35
- path: metrics/fit_metrics.json
hash: md5
md5: 8a6db7d72f9b4b8a5e411beaa3a2a413
size: 224
md5: f7ad23a19a3f53281e3fecd8b18568fe
size: 223
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -121,13 +123,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 7e6f38163a2cfe208ace26702ae28793.dir
size: 762167138
md5: 1978ae8a3960b419ab764fd47a3693ca.dir
size: 761166981
nfiles: 35
- path: data/prepared_data
hash: md5
md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
size: 49655735
md5: b4e5adebb1acfd9a84b785c6ae62ebec.dir
size: 47975005
nfiles: 2
params:
configs/settings.yaml:
@ -139,8 +141,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 14729e943275748a19e86cf47d6561ee.dir
size: 508468
md5: e0472eb66c1876b95c9df7f71aff004d.dir
size: 508648
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -151,13 +153,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 14729e943275748a19e86cf47d6561ee.dir
size: 508468
md5: e0472eb66c1876b95c9df7f71aff004d.dir
size: 508648
nfiles: 1
- path: data/prepared_data
hash: md5
md5: cd82e70e3cfcd0421cddf9a2c4b3e476.dir
size: 49655735
md5: b4e5adebb1acfd9a84b785c6ae62ebec.dir
size: 47975005
nfiles: 2
params:
configs/settings.yaml:
@ -167,8 +169,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: e076cba5f5737fd6d95931931411fdd0
size: 225
md5: fc7b825ce93c38e3fa1b4124de195743
size: 224
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps:
@ -188,9 +190,9 @@ stages:
outs:
- path: metrics/scenario_metrics.md
hash: md5
md5: e210b90c92765061a7653842153f8054
md5: c6300902b312e95d42f5d6cf5b94efaf
size: 356
- path: metrics/scenario_table.md
hash: md5
md5: 63cba3e3c1107ce8b91d33f3e4a82d05
md5: 61cc58bf790be5da91efe3aee9a78122
size: 872