use retain features, removed env features

This commit is contained in:
Michael Duong 2024-08-06 22:37:17 +01:00
parent 73d8c31613
commit 294d9017f1
2 changed files with 125 additions and 42 deletions

View file

@ -30,9 +30,59 @@ default:
subsample_seed: 0
target: hot_water_kwh
identifier_columns: ["uprn"]
drop_columns: ["heating_kwh", 'lodgement-datetime', 'lodgement-date', 'number-habitable-rooms', 'local-authority', 'posttown', 'address', 'inspection-date',
"county", "constituency-label", 'address2', 'uprn-source', 'postcode', 'address1', 'mainheat-env-eff', 'environment-impact-current', 'hot-water-env-eff', 'floor-env-eff']
retain_features: null
drop_columns: ["heating_kwh"]
retain_features: [
'uprn',
'heating-cost-current',
'co2-emissions-current',
'hot-water-cost-current',
'total-floor-area',
'secondheat-description',
'floor-description',
'mainheat-energy-eff',
'current-energy-efficiency',
'walls-energy-eff',
'roof-energy-eff',
'property-type',
'mainheat-description',
'mechanical-ventilation',
'floor-level',
'built-form',
'walls-description',
'mainheatcont-description',
'roof-description',
'energy-consumption-current',
'construction-age-band',
'hotwater-description',
'main-fuel',
'hot-water-energy-eff',
'co2-emiss-curr-per-floor-area',
'windows-energy-eff',
'current-energy-rating',
'lodgement-year',
'extension-count',
'number-open-fireplaces',
'number-heated-rooms',
'windows-description',
'photo-supply',
'heat-loss-corridor',
'flat-top-storey',
'unheated-corridor-length',
'fixed-lighting-outlets-count',
'tenure',
'multi-glaze-proportion',
'solar-water-heating-flag',
'energy-tariff',
'floor-height',
'constituency',
'transaction-type',
'floor-energy-eff',
'lodgement-month',
'lighting-cost-current',
'glazed-area',
'main-heating-controls',
'estimate_annual_kwh',
]
generate_predictions:
input_dataclient_type: local

View file

@ -22,24 +22,57 @@ stages:
configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns:
- heating_kwh
- lodgement-datetime
- lodgement-date
- number-habitable-rooms
- local-authority
- posttown
- address
- inspection-date
- county
- constituency-label
- address2
- uprn-source
- postcode
- address1
- mainheat-env-eff
- environment-impact-current
- hot-water-env-eff
- floor-env-eff
default.feature_processor.feature_processor_config.retain_features:
- uprn
- heating-cost-current
- co2-emissions-current
- hot-water-cost-current
- total-floor-area
- secondheat-description
- floor-description
- mainheat-energy-eff
- current-energy-efficiency
- walls-energy-eff
- roof-energy-eff
- property-type
- mainheat-description
- mechanical-ventilation
- floor-level
- built-form
- walls-description
- mainheatcont-description
- roof-description
- energy-consumption-current
- construction-age-band
- hotwater-description
- main-fuel
- hot-water-energy-eff
- co2-emiss-curr-per-floor-area
- windows-energy-eff
- current-energy-rating
- lodgement-year
- extension-count
- number-open-fireplaces
- number-heated-rooms
- windows-description
- photo-supply
- heat-loss-corridor
- flat-top-storey
- unheated-corridor-length
- fixed-lighting-outlets-count
- tenure
- multi-glaze-proportion
- solar-water-heating-flag
- energy-tariff
- floor-height
- constituency
- transaction-type
- floor-energy-eff
- lodgement-month
- lighting-cost-current
- glazed-area
- main-heating-controls
- estimate_annual_kwh
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: hot_water_kwh
@ -54,8 +87,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
size: 35173792
md5: 19abfc15e24cd8a869a0f3f087e09584.dir
size: 13396685
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -66,8 +99,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
size: 35173792
md5: 19abfc15e24cd8a869a0f3f087e09584.dir
size: 13396685
nfiles: 2
params:
configs/build_model.yaml:
@ -99,18 +132,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: 56598af2325ec699349cdb166b1e807b.dir
size: 1789771
md5: 0e2f1131b24eafa09ccbab91a36cac6c.dir
size: 1787805
nfiles: 1
- path: data/model/
hash: md5
md5: ce995d18c2f40aefe1f5757d621bb4d4.dir
size: 592064916
md5: 77c0900cda64c1beaef6782d04fd712e.dir
size: 518798187
nfiles: 36
- path: metrics/fit_metrics.json
hash: md5
md5: 4c169dc1d437e5fea43e47616f46dafc
size: 219
md5: 0e03bb46cd03e9521095bf141ee92ed7
size: 220
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -120,13 +153,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: ce995d18c2f40aefe1f5757d621bb4d4.dir
size: 592064916
md5: 77c0900cda64c1beaef6782d04fd712e.dir
size: 518798187
nfiles: 36
- path: data/prepared_data
hash: md5
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
size: 35173792
md5: 19abfc15e24cd8a869a0f3f087e09584.dir
size: 13396685
nfiles: 2
params:
configs/settings.yaml:
@ -138,8 +171,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 590da9bdeb1e1b442e52bce52f1da0dc.dir
size: 192586
md5: efc7b9dd9d40e42ad93e3e5acbeac92d.dir
size: 192502
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -150,13 +183,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 590da9bdeb1e1b442e52bce52f1da0dc.dir
size: 192586
md5: efc7b9dd9d40e42ad93e3e5acbeac92d.dir
size: 192502
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
size: 35173792
md5: 19abfc15e24cd8a869a0f3f087e09584.dir
size: 13396685
nfiles: 2
params:
configs/settings.yaml:
@ -166,8 +199,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: b80014eacb59a824aff78667352e7c95
size: 221
md5: a8b7699dd2b171da5fead1fb04d954aa
size: 220
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps: