mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
add estimate_annual_kwh feature, remove env features
This commit is contained in:
parent
ec7149e56e
commit
a10be25fd2
3 changed files with 45 additions and 144 deletions
|
|
@ -160,7 +160,9 @@ def add_features_from_code(df):
|
|||
|
||||
return df
|
||||
|
||||
|
||||
def add_estimate_annual_kwh(df):
|
||||
df['estimate_annual_kwh'] = df['energy-consumption-current'] * df['total-floor-area']
|
||||
return df
|
||||
|
||||
# def keep_ending_columns(df):
|
||||
# ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)]
|
||||
|
|
@ -172,7 +174,8 @@ def add_features_from_code(df):
|
|||
|
||||
business_logic = {
|
||||
"add_features_from_code": add_features_from_code,
|
||||
"remove_hotwaterkwh_bottom_percentile": remove_hotwaterkwh_bottom_percentile
|
||||
"remove_hotwaterkwh_bottom_percentile": remove_hotwaterkwh_bottom_percentile,
|
||||
"add_estimate_annual_kwh": add_estimate_annual_kwh
|
||||
# "keep_non_zero_rdsap": keep_non_zero_rdsap,
|
||||
# "keep_flats": keep_flats,
|
||||
# "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,
|
||||
|
|
|
|||
|
|
@ -30,75 +30,9 @@ default:
|
|||
subsample_seed: 0
|
||||
target: hot_water_kwh
|
||||
identifier_columns: ["uprn"]
|
||||
drop_columns: ["heating_kwh"]
|
||||
retain_features: [
|
||||
'uprn',
|
||||
'heating-cost-current',
|
||||
'co2-emissions-current',
|
||||
'hot-water-cost-current',
|
||||
'total-floor-area',
|
||||
'secondheat-description',
|
||||
'environment-impact-current',
|
||||
'floor-description',
|
||||
'mainheat-energy-eff',
|
||||
'current-energy-efficiency',
|
||||
'mainheat-env-eff',
|
||||
'walls-energy-eff',
|
||||
'roof-energy-eff',
|
||||
'property-type',
|
||||
'mainheat-description',
|
||||
'hot-water-env-eff',
|
||||
'mechanical-ventilation',
|
||||
'floor-level',
|
||||
'built-form',
|
||||
'walls-description',
|
||||
'mainheatcont-description',
|
||||
'roof-description',
|
||||
'energy-consumption-current',
|
||||
'construction-age-band',
|
||||
'hotwater-description',
|
||||
# 'lodgement-datetime',
|
||||
'main-fuel',
|
||||
'hot-water-energy-eff',
|
||||
'co2-emiss-curr-per-floor-area',
|
||||
'windows-energy-eff',
|
||||
'current-energy-rating',
|
||||
'lodgement-year',
|
||||
'extension-count',
|
||||
'number-open-fireplaces',
|
||||
'number-heated-rooms',
|
||||
# 'lodgement-date',
|
||||
# 'number-habitable-rooms',
|
||||
'windows-description',
|
||||
# 'local-authority',
|
||||
'photo-supply',
|
||||
'heat-loss-corridor',
|
||||
# 'posttown',
|
||||
# 'address',
|
||||
'flat-top-storey',
|
||||
'unheated-corridor-length',
|
||||
'fixed-lighting-outlets-count',
|
||||
# 'inspection-date',
|
||||
'tenure',
|
||||
# 'county',
|
||||
# 'constituency-label',
|
||||
'multi-glaze-proportion',
|
||||
'solar-water-heating-flag',
|
||||
# 'address2',
|
||||
'energy-tariff',
|
||||
'floor-height',
|
||||
'constituency',
|
||||
# 'uprn-source',
|
||||
'transaction-type',
|
||||
'floor-energy-eff',
|
||||
# 'postcode',
|
||||
'lodgement-month',
|
||||
'lighting-cost-current',
|
||||
'glazed-area',
|
||||
# 'address1',
|
||||
'floor-env-eff',
|
||||
'main-heating-controls'
|
||||
]
|
||||
drop_columns: ["heating_kwh", 'lodgement-datetime', 'lodgement-date', 'number-habitable-rooms', 'local-authority', 'posttown', 'address', 'inspection-date',
|
||||
"county", "constituency-label", 'address2', 'uprn-source', 'postcode', 'address1', 'mainheat-env-eff', 'environment-impact-current', 'hot-water-env-eff', 'floor-env-eff']
|
||||
retain_features: null
|
||||
|
||||
generate_predictions:
|
||||
input_dataclient_type: local
|
||||
|
|
|
|||
|
|
@ -22,60 +22,24 @@ stages:
|
|||
configs/settings.yaml:
|
||||
default.feature_processor.feature_processor_config.drop_columns:
|
||||
- heating_kwh
|
||||
default.feature_processor.feature_processor_config.retain_features:
|
||||
- uprn
|
||||
- heating-cost-current
|
||||
- co2-emissions-current
|
||||
- hot-water-cost-current
|
||||
- total-floor-area
|
||||
- secondheat-description
|
||||
- environment-impact-current
|
||||
- floor-description
|
||||
- mainheat-energy-eff
|
||||
- current-energy-efficiency
|
||||
- lodgement-datetime
|
||||
- lodgement-date
|
||||
- number-habitable-rooms
|
||||
- local-authority
|
||||
- posttown
|
||||
- address
|
||||
- inspection-date
|
||||
- county
|
||||
- constituency-label
|
||||
- address2
|
||||
- uprn-source
|
||||
- postcode
|
||||
- address1
|
||||
- mainheat-env-eff
|
||||
- walls-energy-eff
|
||||
- roof-energy-eff
|
||||
- property-type
|
||||
- mainheat-description
|
||||
- environment-impact-current
|
||||
- hot-water-env-eff
|
||||
- mechanical-ventilation
|
||||
- floor-level
|
||||
- built-form
|
||||
- walls-description
|
||||
- mainheatcont-description
|
||||
- roof-description
|
||||
- energy-consumption-current
|
||||
- construction-age-band
|
||||
- hotwater-description
|
||||
- main-fuel
|
||||
- hot-water-energy-eff
|
||||
- co2-emiss-curr-per-floor-area
|
||||
- windows-energy-eff
|
||||
- current-energy-rating
|
||||
- lodgement-year
|
||||
- extension-count
|
||||
- number-open-fireplaces
|
||||
- number-heated-rooms
|
||||
- windows-description
|
||||
- photo-supply
|
||||
- heat-loss-corridor
|
||||
- flat-top-storey
|
||||
- unheated-corridor-length
|
||||
- fixed-lighting-outlets-count
|
||||
- tenure
|
||||
- multi-glaze-proportion
|
||||
- solar-water-heating-flag
|
||||
- energy-tariff
|
||||
- floor-height
|
||||
- constituency
|
||||
- transaction-type
|
||||
- floor-energy-eff
|
||||
- lodgement-month
|
||||
- lighting-cost-current
|
||||
- glazed-area
|
||||
- floor-env-eff
|
||||
- main-heating-controls
|
||||
default.feature_processor.feature_processor_config.retain_features:
|
||||
default.feature_processor.feature_processor_config.subsample_amount:
|
||||
default.feature_processor.feature_processor_config.subsample_seed: 0
|
||||
default.feature_processor.feature_processor_config.target: hot_water_kwh
|
||||
|
|
@ -90,8 +54,8 @@ stages:
|
|||
outs:
|
||||
- path: data/prepared_data/
|
||||
hash: md5
|
||||
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
|
||||
size: 13131853
|
||||
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
|
||||
size: 35173792
|
||||
nfiles: 2
|
||||
build_model:
|
||||
cmd: python 2_build_model.py
|
||||
|
|
@ -102,8 +66,8 @@ stages:
|
|||
size: 4820
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
|
||||
size: 13131853
|
||||
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
|
||||
size: 35173792
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/build_model.yaml:
|
||||
|
|
@ -135,17 +99,17 @@ stages:
|
|||
outs:
|
||||
- path: data/fit_predictions/
|
||||
hash: md5
|
||||
md5: 3e48cec68f702bc822eed8fcb2c5c603.dir
|
||||
size: 1787931
|
||||
md5: 56598af2325ec699349cdb166b1e807b.dir
|
||||
size: 1789771
|
||||
nfiles: 1
|
||||
- path: data/model/
|
||||
hash: md5
|
||||
md5: 37f7480141e920c68faacd39478a1a68.dir
|
||||
size: 451364406
|
||||
nfiles: 35
|
||||
md5: ce995d18c2f40aefe1f5757d621bb4d4.dir
|
||||
size: 592064916
|
||||
nfiles: 36
|
||||
- path: metrics/fit_metrics.json
|
||||
hash: md5
|
||||
md5: e7a2a5efea57b1ddd1431b713d78bb11
|
||||
md5: 4c169dc1d437e5fea43e47616f46dafc
|
||||
size: 219
|
||||
generate_predictions:
|
||||
cmd: python 3_generate_predictions.py
|
||||
|
|
@ -156,13 +120,13 @@ stages:
|
|||
size: 2464
|
||||
- path: data/model
|
||||
hash: md5
|
||||
md5: 37f7480141e920c68faacd39478a1a68.dir
|
||||
size: 451364406
|
||||
nfiles: 35
|
||||
md5: ce995d18c2f40aefe1f5757d621bb4d4.dir
|
||||
size: 592064916
|
||||
nfiles: 36
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
|
||||
size: 13131853
|
||||
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
|
||||
size: 35173792
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
|
|
@ -174,8 +138,8 @@ stages:
|
|||
outs:
|
||||
- path: data/predictions/
|
||||
hash: md5
|
||||
md5: 9267a66c6fae4da5a589faab76fac14c.dir
|
||||
size: 192482
|
||||
md5: 590da9bdeb1e1b442e52bce52f1da0dc.dir
|
||||
size: 192586
|
||||
nfiles: 1
|
||||
generate_metrics:
|
||||
cmd: python 4_generate_metrics.py
|
||||
|
|
@ -186,13 +150,13 @@ stages:
|
|||
size: 3484
|
||||
- path: data/predictions
|
||||
hash: md5
|
||||
md5: 9267a66c6fae4da5a589faab76fac14c.dir
|
||||
size: 192482
|
||||
md5: 590da9bdeb1e1b442e52bce52f1da0dc.dir
|
||||
size: 192586
|
||||
nfiles: 1
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
|
||||
size: 13131853
|
||||
md5: 0364b2ef5dd7674f97473fdecf3f3a02.dir
|
||||
size: 35173792
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
|
|
@ -202,7 +166,7 @@ stages:
|
|||
outs:
|
||||
- path: metrics/metrics.json
|
||||
hash: md5
|
||||
md5: 3a08c29f028f5e3cb50fb8cd3608e2f4
|
||||
md5: b80014eacb59a824aff78667352e7c95
|
||||
size: 221
|
||||
generate_scenerio_metrics:
|
||||
cmd: python 5_generate_scenarios.py
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue