mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-30 13:10:43 +00:00
Merge pull request #141 from Hestia-Homes/heatingkwh-dev-model
remove the area-to-heated rooms feature, and env features
This commit is contained in:
commit
44fe3fa00a
3 changed files with 128 additions and 40 deletions
|
|
@ -192,6 +192,7 @@ def remove_high_ratio_of_area_to_rooms(df):
|
||||||
|
|
||||||
# Remove top 0.05% of area-to-heated-rooms
|
# Remove top 0.05% of area-to-heated-rooms
|
||||||
df = df[df['area-to-heated-rooms'] < df['area-to-heated-rooms'].quantile(0.9995)].reset_index(drop=True)
|
df = df[df['area-to-heated-rooms'] < df['area-to-heated-rooms'].quantile(0.9995)].reset_index(drop=True)
|
||||||
|
df = df.drop(columns=['area-to-heated-rooms'])
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def add_estimate_annual_kwh(df):
|
def add_estimate_annual_kwh(df):
|
||||||
|
|
|
||||||
|
|
@ -36,9 +36,59 @@ default:
|
||||||
subsample_seed: 0
|
subsample_seed: 0
|
||||||
target: heating_kwh
|
target: heating_kwh
|
||||||
identifier_columns: ["uprn"]
|
identifier_columns: ["uprn"]
|
||||||
drop_columns: ["hot_water_kwh", 'lodgement-datetime', 'lodgement-date', 'number-habitable-rooms', 'local-authority', 'posttown', 'address', 'inspection-date',
|
drop_columns: ["hot_water_kwh"]
|
||||||
"county", "constituency-label", 'address2', 'uprn-source', 'postcode', 'address1',]
|
retain_features: [
|
||||||
retain_features: null
|
'uprn',
|
||||||
|
'heating-cost-current',
|
||||||
|
'co2-emissions-current',
|
||||||
|
'hot-water-cost-current',
|
||||||
|
'total-floor-area',
|
||||||
|
'secondheat-description',
|
||||||
|
'floor-description',
|
||||||
|
'mainheat-energy-eff',
|
||||||
|
'current-energy-efficiency',
|
||||||
|
'walls-energy-eff',
|
||||||
|
'roof-energy-eff',
|
||||||
|
'property-type',
|
||||||
|
'mainheat-description',
|
||||||
|
'mechanical-ventilation',
|
||||||
|
'floor-level',
|
||||||
|
'built-form',
|
||||||
|
'walls-description',
|
||||||
|
'mainheatcont-description',
|
||||||
|
'roof-description',
|
||||||
|
'energy-consumption-current',
|
||||||
|
'construction-age-band',
|
||||||
|
'hotwater-description',
|
||||||
|
'main-fuel',
|
||||||
|
'hot-water-energy-eff',
|
||||||
|
'co2-emiss-curr-per-floor-area',
|
||||||
|
'windows-energy-eff',
|
||||||
|
'current-energy-rating',
|
||||||
|
'lodgement-year',
|
||||||
|
'extension-count',
|
||||||
|
'number-open-fireplaces',
|
||||||
|
'number-heated-rooms',
|
||||||
|
'windows-description',
|
||||||
|
'photo-supply',
|
||||||
|
'heat-loss-corridor',
|
||||||
|
'flat-top-storey',
|
||||||
|
'unheated-corridor-length',
|
||||||
|
'fixed-lighting-outlets-count',
|
||||||
|
'tenure',
|
||||||
|
'multi-glaze-proportion',
|
||||||
|
'solar-water-heating-flag',
|
||||||
|
'energy-tariff',
|
||||||
|
'floor-height',
|
||||||
|
'constituency',
|
||||||
|
'transaction-type',
|
||||||
|
'floor-energy-eff',
|
||||||
|
'lodgement-month',
|
||||||
|
'lighting-cost-current',
|
||||||
|
'glazed-area',
|
||||||
|
'main-heating-controls',
|
||||||
|
'estimate_annual_kwh',
|
||||||
|
]
|
||||||
|
|
||||||
generate_predictions:
|
generate_predictions:
|
||||||
input_dataclient_type: local
|
input_dataclient_type: local
|
||||||
|
|
|
||||||
|
|
@ -22,20 +22,57 @@ stages:
|
||||||
configs/settings.yaml:
|
configs/settings.yaml:
|
||||||
default.feature_processor.feature_processor_config.drop_columns:
|
default.feature_processor.feature_processor_config.drop_columns:
|
||||||
- hot_water_kwh
|
- hot_water_kwh
|
||||||
- lodgement-datetime
|
|
||||||
- lodgement-date
|
|
||||||
- number-habitable-rooms
|
|
||||||
- local-authority
|
|
||||||
- posttown
|
|
||||||
- address
|
|
||||||
- inspection-date
|
|
||||||
- county
|
|
||||||
- constituency-label
|
|
||||||
- address2
|
|
||||||
- uprn-source
|
|
||||||
- postcode
|
|
||||||
- address1
|
|
||||||
default.feature_processor.feature_processor_config.retain_features:
|
default.feature_processor.feature_processor_config.retain_features:
|
||||||
|
- uprn
|
||||||
|
- heating-cost-current
|
||||||
|
- co2-emissions-current
|
||||||
|
- hot-water-cost-current
|
||||||
|
- total-floor-area
|
||||||
|
- secondheat-description
|
||||||
|
- floor-description
|
||||||
|
- mainheat-energy-eff
|
||||||
|
- current-energy-efficiency
|
||||||
|
- walls-energy-eff
|
||||||
|
- roof-energy-eff
|
||||||
|
- property-type
|
||||||
|
- mainheat-description
|
||||||
|
- mechanical-ventilation
|
||||||
|
- floor-level
|
||||||
|
- built-form
|
||||||
|
- walls-description
|
||||||
|
- mainheatcont-description
|
||||||
|
- roof-description
|
||||||
|
- energy-consumption-current
|
||||||
|
- construction-age-band
|
||||||
|
- hotwater-description
|
||||||
|
- main-fuel
|
||||||
|
- hot-water-energy-eff
|
||||||
|
- co2-emiss-curr-per-floor-area
|
||||||
|
- windows-energy-eff
|
||||||
|
- current-energy-rating
|
||||||
|
- lodgement-year
|
||||||
|
- extension-count
|
||||||
|
- number-open-fireplaces
|
||||||
|
- number-heated-rooms
|
||||||
|
- windows-description
|
||||||
|
- photo-supply
|
||||||
|
- heat-loss-corridor
|
||||||
|
- flat-top-storey
|
||||||
|
- unheated-corridor-length
|
||||||
|
- fixed-lighting-outlets-count
|
||||||
|
- tenure
|
||||||
|
- multi-glaze-proportion
|
||||||
|
- solar-water-heating-flag
|
||||||
|
- energy-tariff
|
||||||
|
- floor-height
|
||||||
|
- constituency
|
||||||
|
- transaction-type
|
||||||
|
- floor-energy-eff
|
||||||
|
- lodgement-month
|
||||||
|
- lighting-cost-current
|
||||||
|
- glazed-area
|
||||||
|
- main-heating-controls
|
||||||
|
- estimate_annual_kwh
|
||||||
default.feature_processor.feature_processor_config.subsample_amount:
|
default.feature_processor.feature_processor_config.subsample_amount:
|
||||||
default.feature_processor.feature_processor_config.subsample_seed: 0
|
default.feature_processor.feature_processor_config.subsample_seed: 0
|
||||||
default.feature_processor.feature_processor_config.target: heating_kwh
|
default.feature_processor.feature_processor_config.target: heating_kwh
|
||||||
|
|
@ -50,8 +87,8 @@ stages:
|
||||||
outs:
|
outs:
|
||||||
- path: data/prepared_data/
|
- path: data/prepared_data/
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir
|
||||||
size: 30388447
|
size: 11062844
|
||||||
nfiles: 2
|
nfiles: 2
|
||||||
build_model:
|
build_model:
|
||||||
cmd: python 2_build_model.py
|
cmd: python 2_build_model.py
|
||||||
|
|
@ -62,8 +99,8 @@ stages:
|
||||||
size: 4820
|
size: 4820
|
||||||
- path: data/prepared_data
|
- path: data/prepared_data
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir
|
||||||
size: 30388447
|
size: 11062844
|
||||||
nfiles: 2
|
nfiles: 2
|
||||||
params:
|
params:
|
||||||
configs/build_model.yaml:
|
configs/build_model.yaml:
|
||||||
|
|
@ -95,18 +132,18 @@ stages:
|
||||||
outs:
|
outs:
|
||||||
- path: data/fit_predictions/
|
- path: data/fit_predictions/
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: 9a2abeada227b8bb4c13d6c745bef581.dir
|
md5: 5c694b2ec23baca2e7242d3802ba09fe.dir
|
||||||
size: 1547064
|
size: 1546012
|
||||||
nfiles: 1
|
nfiles: 1
|
||||||
- path: data/model/
|
- path: data/model/
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: 43b72f9284e92842cbc82bc7cc0950e2.dir
|
md5: 314c5043b1a421e847af8d3126fba788.dir
|
||||||
size: 506201607
|
size: 278676507
|
||||||
nfiles: 36
|
nfiles: 37
|
||||||
- path: metrics/fit_metrics.json
|
- path: metrics/fit_metrics.json
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: 4a496483bffad3efe671f29110729e48
|
md5: 2e5ab3102b145f5c909232e66210a261
|
||||||
size: 221
|
size: 222
|
||||||
generate_predictions:
|
generate_predictions:
|
||||||
cmd: python 3_generate_predictions.py
|
cmd: python 3_generate_predictions.py
|
||||||
deps:
|
deps:
|
||||||
|
|
@ -116,13 +153,13 @@ stages:
|
||||||
size: 2464
|
size: 2464
|
||||||
- path: data/model
|
- path: data/model
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: 43b72f9284e92842cbc82bc7cc0950e2.dir
|
md5: 314c5043b1a421e847af8d3126fba788.dir
|
||||||
size: 506201607
|
size: 278676507
|
||||||
nfiles: 36
|
nfiles: 37
|
||||||
- path: data/prepared_data
|
- path: data/prepared_data
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir
|
||||||
size: 30388447
|
size: 11062844
|
||||||
nfiles: 2
|
nfiles: 2
|
||||||
params:
|
params:
|
||||||
configs/settings.yaml:
|
configs/settings.yaml:
|
||||||
|
|
@ -134,8 +171,8 @@ stages:
|
||||||
outs:
|
outs:
|
||||||
- path: data/predictions/
|
- path: data/predictions/
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: 88832d623c3e437eaec221307ac33aae.dir
|
md5: 10b0612c10dfaa78e08ccc673c6f984e.dir
|
||||||
size: 163584
|
size: 163560
|
||||||
nfiles: 1
|
nfiles: 1
|
||||||
generate_metrics:
|
generate_metrics:
|
||||||
cmd: python 4_generate_metrics.py
|
cmd: python 4_generate_metrics.py
|
||||||
|
|
@ -146,13 +183,13 @@ stages:
|
||||||
size: 3484
|
size: 3484
|
||||||
- path: data/predictions
|
- path: data/predictions
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: 88832d623c3e437eaec221307ac33aae.dir
|
md5: 10b0612c10dfaa78e08ccc673c6f984e.dir
|
||||||
size: 163584
|
size: 163560
|
||||||
nfiles: 1
|
nfiles: 1
|
||||||
- path: data/prepared_data
|
- path: data/prepared_data
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir
|
||||||
size: 30388447
|
size: 11062844
|
||||||
nfiles: 2
|
nfiles: 2
|
||||||
params:
|
params:
|
||||||
configs/settings.yaml:
|
configs/settings.yaml:
|
||||||
|
|
@ -162,8 +199,8 @@ stages:
|
||||||
outs:
|
outs:
|
||||||
- path: metrics/metrics.json
|
- path: metrics/metrics.json
|
||||||
hash: md5
|
hash: md5
|
||||||
md5: f2783bdec0f0974b6d799609c6189467
|
md5: 22794cfc31f6ffd98eb1db4852677b4f
|
||||||
size: 222
|
size: 220
|
||||||
generate_scenerio_metrics:
|
generate_scenerio_metrics:
|
||||||
cmd: python 5_generate_scenarios.py
|
cmd: python 5_generate_scenarios.py
|
||||||
deps:
|
deps:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue