test less features

This commit is contained in:
Michael Duong 2024-02-17 16:26:49 +00:00
parent 81e7c2a4bd
commit cec3cc60e7
3 changed files with 116 additions and 30 deletions

View file

@ -14,7 +14,7 @@ default:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error #mean_absolute_error
time_limit: 60
time_limit: 4000
presets: medium_quality
excluded_model_types: ['RF', 'FASTAI', 'CAT', 'NN_TORCH', 'KNN', 'XT']
infer_limit: 0.05

View file

@ -24,7 +24,7 @@ default:
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
# data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
train_proportion: 1
train_proportion: 0.95
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet
@ -36,8 +36,35 @@ default:
target: sap_ending
identifier_columns: ["uprn"]
drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
# retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
retain_features: null
retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
'walls_energy_eff_ending', 'secondheat_description_ending',
'property_type', 'mainheatc_energy_eff_ending', 'built_form',
'walls_insulation_thickness_ending', 'potential_energy_efficiency',
'transaction_type_ending', 'mainheat_energy_eff_starting',
'floor_thermal_transmittance_ending', 'hot_water_energy_eff_starting',
'low_energy_lighting_ending', 'heat_demand_starting',
'photo_supply_ending', 'carbon_starting',
'walls_thermal_transmittance_ending', 'fuel_type_ending',
'roof_insulation_thickness_ending', 'transaction_type_starting',
'total_floor_area_ending', 'number_open_fireplaces_ending',
'roof_insulation_thickness', 'windows_energy_eff_ending',
'walls_insulation_thickness', 'floor_height_ending',
'secondheat_description_starting', 'floor_thermal_transmittance',
'mainheatc_energy_eff_starting', 'extension_count_ending',
'has_air_source_heat_pump_ending', 'walls_energy_eff_starting',
'charging_system_ending', 'construction_age_band', 'glazed_type_ending',
'roof_thermal_transmittance_ending',
'floor_insulation_thickness_ending', 'has_mains_gas_ending',
'estimated_perimeter_starting', 'energy_consumption_potential',
'environment_impact_potential', 'roof_energy_eff_starting',
'another_property_below', 'heater_type_ending',
'walls_thermal_transmittance', 'total_floor_area_starting',
'multi_glaze_proportion_ending', 'is_suspended',
'floor_height_starting', 'lighting_energy_eff_ending',
'energy_tariff_ending', 'fixed_lighting_outlets_count',
'low_energy_lighting_starting', 'mechanical_ventilation_ending']
# retain_features: null
generate_predictions:
input_dataclient_type: local

View file

@ -18,6 +18,65 @@ stages:
- days_to_starting
- days_to_ending
default.feature_processor.feature_processor_config.retain_features:
- uprn
- sap_starting
- hot_water_energy_eff_ending
- mainheat_energy_eff_ending
- constituency
- roof_energy_eff_ending
- walls_energy_eff_ending
- secondheat_description_ending
- property_type
- mainheatc_energy_eff_ending
- built_form
- walls_insulation_thickness_ending
- potential_energy_efficiency
- transaction_type_ending
- mainheat_energy_eff_starting
- floor_thermal_transmittance_ending
- hot_water_energy_eff_starting
- low_energy_lighting_ending
- heat_demand_starting
- photo_supply_ending
- carbon_starting
- walls_thermal_transmittance_ending
- fuel_type_ending
- roof_insulation_thickness_ending
- transaction_type_starting
- total_floor_area_ending
- number_open_fireplaces_ending
- roof_insulation_thickness
- windows_energy_eff_ending
- walls_insulation_thickness
- floor_height_ending
- secondheat_description_starting
- floor_thermal_transmittance
- mainheatc_energy_eff_starting
- extension_count_ending
- has_air_source_heat_pump_ending
- walls_energy_eff_starting
- charging_system_ending
- construction_age_band
- glazed_type_ending
- roof_thermal_transmittance_ending
- floor_insulation_thickness_ending
- has_mains_gas_ending
- estimated_perimeter_starting
- energy_consumption_potential
- environment_impact_potential
- roof_energy_eff_starting
- another_property_below
- heater_type_ending
- walls_thermal_transmittance
- total_floor_area_starting
- multi_glaze_proportion_ending
- is_suspended
- floor_height_starting
- lighting_energy_eff_ending
- energy_tariff_ending
- fixed_lighting_outlets_count
- low_energy_lighting_starting
- mechanical_ventilation_ending
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending
@ -27,12 +86,12 @@ stages:
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
default.prepare_data.output_train_filepath: ./data/prepared_data/train.parquet
default.prepare_data.train_proportion: 1
default.prepare_data.train_proportion: 0.95
outs:
- path: data/prepared_data/
hash: md5
md5: 174752a2b228f7af687fe91de77ca0b8.dir
size: 42622503
md5: 59f8ea78ec225f5a05de451c6145e2d5.dir
size: 34059502
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -43,8 +102,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 174752a2b228f7af687fe91de77ca0b8.dir
size: 42622503
md5: 59f8ea78ec225f5a05de451c6145e2d5.dir
size: 34059502
nfiles: 2
params:
configs/build_model.yaml:
@ -61,7 +120,7 @@ stages:
output_filepath: ./data/model/allmodels/
problem_type: regression
eval_metric: mean_squared_error
time_limit: 60
time_limit: 4000
presets: medium_quality
excluded_model_types:
- RF
@ -75,18 +134,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: a7e32ced2c7ca88a1e80ed0c2135388d.dir
size: 3675177
md5: bb74626ff3d33581efe750955cdff860.dir
size: 3539589
nfiles: 1
- path: data/model/
hash: md5
md5: 6d81c99ee00e03bba69db468161dfe19.dir
size: 335451645
nfiles: 21
md5: e100d4dcccc1c7d30367b0ca0672e3af.dir
size: 654714285
nfiles: 31
- path: metrics/fit_metrics.json
hash: md5
md5: 296fd7785e867da96eec96683384c444
size: 224
md5: d074f5aa588d3405be65a9684f192465
size: 226
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -96,13 +155,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 6d81c99ee00e03bba69db468161dfe19.dir
size: 335451645
nfiles: 21
md5: e100d4dcccc1c7d30367b0ca0672e3af.dir
size: 654714285
nfiles: 31
- path: data/prepared_data
hash: md5
md5: 174752a2b228f7af687fe91de77ca0b8.dir
size: 42622503
md5: 59f8ea78ec225f5a05de451c6145e2d5.dir
size: 34059502
nfiles: 2
params:
configs/settings.yaml:
@ -114,8 +173,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 3fd770fe0f8064cfc30c2b68575f9e7f.dir
size: 647505
md5: 36e26c509176caae6290f75ad486810d.dir
size: 232044
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -126,13 +185,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 3fd770fe0f8064cfc30c2b68575f9e7f.dir
size: 647505
md5: 36e26c509176caae6290f75ad486810d.dir
size: 232044
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 174752a2b228f7af687fe91de77ca0b8.dir
size: 42622503
md5: 59f8ea78ec225f5a05de451c6145e2d5.dir
size: 34059502
nfiles: 2
params:
configs/settings.yaml:
@ -142,7 +201,7 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: fa4972e309c6e278d986f305543b3084
md5: 7b71931c5857358ca2603889de6abb3a
size: 223
startup_cleanup:
cmd: python 0_startup_cleanup.py
@ -173,5 +232,5 @@ stages:
outs:
- path: metrics/scenario_table.md
hash: md5
md5: 634d39623623a82ce8554a38d3fb82b0
md5: 72db7530c9ca42470ee8bd1a1e7b52b4
size: 1648