Merge pull request #140 from Hestia-Homes/heatingkwh-dev-model

remove the rounding the 100 kwh
This commit is contained in:
KhalimCK 2024-08-06 20:35:05 +01:00 committed by GitHub
commit 8a053fc775
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 47 additions and 148 deletions

View file

@ -194,12 +194,17 @@ def remove_high_ratio_of_area_to_rooms(df):
df = df[df['area-to-heated-rooms'] < df['area-to-heated-rooms'].quantile(0.9995)].reset_index(drop=True)
return df
def add_estimate_annual_kwh(df):
df['estimate_annual_kwh'] = df['energy-consumption-current'] * df['total-floor-area']
return df
business_logic = {
"add_features_from_code": add_features_from_code,
"remove_heatingkwh_bottom_percentile": remove_heatingkwh_bottom_percentile,
"round_to_100s": round_to_100s,
# "round_to_100s": round_to_100s,
"enforce_minimum_habitable_room_size": enforce_minimum_habitable_room_size,
"remove_high_ratio_of_area_to_rooms": remove_high_ratio_of_area_to_rooms
"remove_high_ratio_of_area_to_rooms": remove_high_ratio_of_area_to_rooms,
"add_estimate_annual_kwh": add_estimate_annual_kwh,
# "keep_non_zero_rdsap": keep_non_zero_rdsap,
# "keep_flats": keep_flats,
# "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,

View file

@ -36,75 +36,9 @@ default:
subsample_seed: 0
target: heating_kwh
identifier_columns: ["uprn"]
drop_columns: ["hot_water_kwh"]
retain_features: [
'uprn',
'heating-cost-current',
'co2-emissions-current',
'hot-water-cost-current',
'total-floor-area',
'secondheat-description',
'environment-impact-current',
'floor-description',
'mainheat-energy-eff',
'current-energy-efficiency',
'mainheat-env-eff',
'walls-energy-eff',
'roof-energy-eff',
'property-type',
'mainheat-description',
'hot-water-env-eff',
'mechanical-ventilation',
'floor-level',
'built-form',
'walls-description',
'mainheatcont-description',
'roof-description',
'energy-consumption-current',
'construction-age-band',
'hotwater-description',
# 'lodgement-datetime',
'main-fuel',
'hot-water-energy-eff',
'co2-emiss-curr-per-floor-area',
'windows-energy-eff',
'current-energy-rating',
'lodgement-year',
'extension-count',
'number-open-fireplaces',
'number-heated-rooms',
# 'lodgement-date',
# 'number-habitable-rooms',
'windows-description',
# 'local-authority',
'photo-supply',
'heat-loss-corridor',
# 'posttown',
# 'address',
'flat-top-storey',
'unheated-corridor-length',
'fixed-lighting-outlets-count',
# 'inspection-date',
'tenure',
# 'county',
# 'constituency-label',
'multi-glaze-proportion',
'solar-water-heating-flag',
# 'address2',
'energy-tariff',
'floor-height',
'constituency',
# 'uprn-source',
'transaction-type',
'floor-energy-eff',
# 'postcode',
'lodgement-month',
'lighting-cost-current',
'glazed-area',
# 'address1',
'floor-env-eff',
'main-heating-controls'
]
drop_columns: ["hot_water_kwh", 'lodgement-datetime', 'lodgement-date', 'number-habitable-rooms', 'local-authority', 'posttown', 'address', 'inspection-date',
"county", "constituency-label", 'address2', 'uprn-source', 'postcode', 'address1',]
retain_features: null
generate_predictions:
input_dataclient_type: local

View file

@ -22,60 +22,20 @@ stages:
configs/settings.yaml:
default.feature_processor.feature_processor_config.drop_columns:
- hot_water_kwh
- lodgement-datetime
- lodgement-date
- number-habitable-rooms
- local-authority
- posttown
- address
- inspection-date
- county
- constituency-label
- address2
- uprn-source
- postcode
- address1
default.feature_processor.feature_processor_config.retain_features:
- uprn
- heating-cost-current
- co2-emissions-current
- hot-water-cost-current
- total-floor-area
- secondheat-description
- environment-impact-current
- floor-description
- mainheat-energy-eff
- current-energy-efficiency
- mainheat-env-eff
- walls-energy-eff
- roof-energy-eff
- property-type
- mainheat-description
- hot-water-env-eff
- mechanical-ventilation
- floor-level
- built-form
- walls-description
- mainheatcont-description
- roof-description
- energy-consumption-current
- construction-age-band
- hotwater-description
- main-fuel
- hot-water-energy-eff
- co2-emiss-curr-per-floor-area
- windows-energy-eff
- current-energy-rating
- lodgement-year
- extension-count
- number-open-fireplaces
- number-heated-rooms
- windows-description
- photo-supply
- heat-loss-corridor
- flat-top-storey
- unheated-corridor-length
- fixed-lighting-outlets-count
- tenure
- multi-glaze-proportion
- solar-water-heating-flag
- energy-tariff
- floor-height
- constituency
- transaction-type
- floor-energy-eff
- lodgement-month
- lighting-cost-current
- glazed-area
- floor-env-eff
- main-heating-controls
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: heating_kwh
@ -90,8 +50,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir
size: 10405713
md5: f506f1f059945c0f014c3f505a63726c.dir
size: 30388447
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -102,8 +62,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir
size: 10405713
md5: f506f1f059945c0f014c3f505a63726c.dir
size: 30388447
nfiles: 2
params:
configs/build_model.yaml:
@ -135,18 +95,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: dee9c58e45081cf5734895a18f31906f.dir
size: 1545644
md5: 9a2abeada227b8bb4c13d6c745bef581.dir
size: 1547064
nfiles: 1
- path: data/model/
hash: md5
md5: 2da6dc420a308a31e5450ab24b7d4c40.dir
size: 297721035
nfiles: 35
md5: 43b72f9284e92842cbc82bc7cc0950e2.dir
size: 506201607
nfiles: 36
- path: metrics/fit_metrics.json
hash: md5
md5: 23032c58977677c6790415aa79e48aa8
size: 216
md5: 4a496483bffad3efe671f29110729e48
size: 221
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -156,13 +116,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 2da6dc420a308a31e5450ab24b7d4c40.dir
size: 297721035
nfiles: 35
md5: 43b72f9284e92842cbc82bc7cc0950e2.dir
size: 506201607
nfiles: 36
- path: data/prepared_data
hash: md5
md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir
size: 10405713
md5: f506f1f059945c0f014c3f505a63726c.dir
size: 30388447
nfiles: 2
params:
configs/settings.yaml:
@ -174,8 +134,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: d93b71cd8f21df7928a423db8a2c4e2b.dir
size: 163544
md5: 88832d623c3e437eaec221307ac33aae.dir
size: 163584
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -186,13 +146,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: d93b71cd8f21df7928a423db8a2c4e2b.dir
size: 163544
md5: 88832d623c3e437eaec221307ac33aae.dir
size: 163584
nfiles: 1
- path: data/prepared_data
hash: md5
md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir
size: 10405713
md5: f506f1f059945c0f014c3f505a63726c.dir
size: 30388447
nfiles: 2
params:
configs/settings.yaml:
@ -202,8 +162,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: f611572ff9273930f0c386903ee2ba63
size: 217
md5: f2783bdec0f0974b6d799609c6189467
size: 222
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps: