remove hot_water_kwh feature, lower mean squared error

This commit is contained in:
Michael Duong 2024-07-12 22:46:32 +01:00
parent 99d28e8b61
commit 9785181e80
2 changed files with 156 additions and 25 deletions

View file

@ -36,14 +36,78 @@ default:
target: heating_kwh
identifier_columns: ["uprn"]
# drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
drop_columns: []
drop_columns: ["hot_water_kwh"]
# [
# "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
# "heating_cost_ending", "hot_water_cost_ending",
# # "days_to_starting", "days_to_ending",
# 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending',
# 'number_habitable_rooms', 'number_heated_rooms']
retain_features: null
retain_features: ['uprn', 'heating-cost-current',
'co2-emissions-current',
'hot-water-cost-current',
'total-floor-area',
'secondheat-description',
'environment-impact-current',
'floor-description',
'mainheat-energy-eff',
'current-energy-efficiency',
'mainheat-env-eff',
'walls-energy-eff',
'roof-energy-eff',
'property-type',
'mainheat-description',
'hot-water-env-eff',
'mechanical-ventilation',
'floor-level',
'built-form',
'walls-description',
'mainheatcont-description',
'roof-description',
'energy-consumption-current',
'construction-age-band',
'hotwater-description',
'lodgement-datetime',
'main-fuel',
'hot-water-energy-eff',
'co2-emiss-curr-per-floor-area',
'windows-energy-eff',
'current-energy-rating',
'lodgement-year',
'extension-count',
'number-open-fireplaces',
'number-heated-rooms',
'lodgement-date',
'number-habitable-rooms',
'windows-description',
'local-authority',
'photo-supply',
'heat-loss-corridor',
'posttown',
'address',
'flat-top-storey',
'unheated-corridor-length',
'fixed-lighting-outlets-count',
'inspection-date',
'tenure',
'county',
'constituency-label',
'multi-glaze-proportion',
'solar-water-heating-flag',
'address2',
'energy-tariff',
'floor-height',
'constituency',
'uprn-source',
'transaction-type',
'floor-energy-eff',
'postcode',
'lodgement-month',
'lighting-cost-current',
'glazed-area',
'address1',
'floor-env-eff',
'main-heating-controls']
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
# 'walls_energy_eff_ending', 'secondheat_description_ending',

View file

@ -20,8 +20,75 @@ stages:
size: 4298
params:
configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns: []
default.feature_processor.feature_processor_config.drop_columns:
- hot_water_kwh
default.feature_processor.feature_processor_config.retain_features:
- uprn
- heating-cost-current
- co2-emissions-current
- hot-water-cost-current
- total-floor-area
- secondheat-description
- environment-impact-current
- floor-description
- mainheat-energy-eff
- current-energy-efficiency
- mainheat-env-eff
- walls-energy-eff
- roof-energy-eff
- property-type
- mainheat-description
- hot-water-env-eff
- mechanical-ventilation
- floor-level
- built-form
- walls-description
- mainheatcont-description
- roof-description
- energy-consumption-current
- construction-age-band
- hotwater-description
- lodgement-datetime
- main-fuel
- hot-water-energy-eff
- co2-emiss-curr-per-floor-area
- windows-energy-eff
- current-energy-rating
- lodgement-year
- extension-count
- number-open-fireplaces
- number-heated-rooms
- lodgement-date
- number-habitable-rooms
- windows-description
- local-authority
- photo-supply
- heat-loss-corridor
- posttown
- address
- flat-top-storey
- unheated-corridor-length
- fixed-lighting-outlets-count
- inspection-date
- tenure
- county
- constituency-label
- multi-glaze-proportion
- solar-water-heating-flag
- address2
- energy-tariff
- floor-height
- constituency
- uprn-source
- transaction-type
- floor-energy-eff
- postcode
- lodgement-month
- lighting-cost-current
- glazed-area
- address1
- floor-env-eff
- main-heating-controls
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: heating_kwh
@ -36,8 +103,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: c162c4b80826f42b33cce656446460f3.dir
size: 23784411
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -48,8 +115,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: c162c4b80826f42b33cce656446460f3.dir
size: 23784411
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
nfiles: 2
params:
configs/build_model.yaml:
@ -81,18 +148,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: c6925db5c6b2ff0f95e97aed727462a1.dir
size: 726994
md5: e08a232adc7f805d5d97ed7e93d667b3.dir
size: 726970
nfiles: 1
- path: data/model/
hash: md5
md5: f8cd16b81139a2ed1f40009204b5bb67.dir
size: 557447134
nfiles: 36
md5: 3daab783532ba88d40eb905ff65b0f1c.dir
size: 400927883
nfiles: 37
- path: metrics/fit_metrics.json
hash: md5
md5: 35cc792ba9bda3755928561c512aed3c
size: 214
md5: 9d6a478739e42b2f5f8500de585e9cf9
size: 212
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -102,13 +169,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: f8cd16b81139a2ed1f40009204b5bb67.dir
size: 557447134
nfiles: 36
md5: 3daab783532ba88d40eb905ff65b0f1c.dir
size: 400927883
nfiles: 37
- path: data/prepared_data
hash: md5
md5: c162c4b80826f42b33cce656446460f3.dir
size: 23784411
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
nfiles: 2
params:
configs/settings.yaml:
@ -120,7 +187,7 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 4edc571b115ec0b8be1d2689e97ff36f.dir
md5: 98a3db098cf2ad9bf786fb77b0ce643f.dir
size: 77479
nfiles: 1
generate_metrics:
@ -132,13 +199,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 4edc571b115ec0b8be1d2689e97ff36f.dir
md5: 98a3db098cf2ad9bf786fb77b0ce643f.dir
size: 77479
nfiles: 1
- path: data/prepared_data
hash: md5
md5: c162c4b80826f42b33cce656446460f3.dir
size: 23784411
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
nfiles: 2
params:
configs/settings.yaml:
@ -148,8 +215,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 32811206191a4b2f24234d1f94b80b70
size: 222
md5: 41ccaa41fd34009602d0df571e6453e9
size: 219
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps: