mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
remove the area-to-heated rooms feature, and env features
This commit is contained in:
parent
9a49caa0cd
commit
318a51589d
3 changed files with 28 additions and 23 deletions
|
|
@ -192,6 +192,7 @@ def remove_high_ratio_of_area_to_rooms(df):
|
|||
|
||||
# Remove top 0.05% of area-to-heated-rooms
|
||||
df = df[df['area-to-heated-rooms'] < df['area-to-heated-rooms'].quantile(0.9995)].reset_index(drop=True)
|
||||
df = df.drop(columns=['area-to-heated-rooms'])
|
||||
return df
|
||||
|
||||
def add_estimate_annual_kwh(df):
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ default:
|
|||
target: heating_kwh
|
||||
identifier_columns: ["uprn"]
|
||||
drop_columns: ["hot_water_kwh", 'lodgement-datetime', 'lodgement-date', 'number-habitable-rooms', 'local-authority', 'posttown', 'address', 'inspection-date',
|
||||
"county", "constituency-label", 'address2', 'uprn-source', 'postcode', 'address1',]
|
||||
"county", "constituency-label", 'address2', 'uprn-source', 'postcode', 'address1', 'mainheat-env-eff', 'environment-impact-current', 'hot-water-env-eff', 'floor-env-eff']
|
||||
retain_features: null
|
||||
|
||||
generate_predictions:
|
||||
|
|
|
|||
|
|
@ -35,6 +35,10 @@ stages:
|
|||
- uprn-source
|
||||
- postcode
|
||||
- address1
|
||||
- mainheat-env-eff
|
||||
- environment-impact-current
|
||||
- hot-water-env-eff
|
||||
- floor-env-eff
|
||||
default.feature_processor.feature_processor_config.retain_features:
|
||||
default.feature_processor.feature_processor_config.subsample_amount:
|
||||
default.feature_processor.feature_processor_config.subsample_seed: 0
|
||||
|
|
@ -50,8 +54,8 @@ stages:
|
|||
outs:
|
||||
- path: data/prepared_data/
|
||||
hash: md5
|
||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
||||
size: 30388447
|
||||
md5: 382d5d02772d4ead3a31fa9420c03417.dir
|
||||
size: 29570807
|
||||
nfiles: 2
|
||||
build_model:
|
||||
cmd: python 2_build_model.py
|
||||
|
|
@ -62,8 +66,8 @@ stages:
|
|||
size: 4820
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
||||
size: 30388447
|
||||
md5: 382d5d02772d4ead3a31fa9420c03417.dir
|
||||
size: 29570807
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/build_model.yaml:
|
||||
|
|
@ -95,18 +99,18 @@ stages:
|
|||
outs:
|
||||
- path: data/fit_predictions/
|
||||
hash: md5
|
||||
md5: 9a2abeada227b8bb4c13d6c745bef581.dir
|
||||
size: 1547064
|
||||
md5: 3ea5e827470bb96408fa70bc45ed6b58.dir
|
||||
size: 1545844
|
||||
nfiles: 1
|
||||
- path: data/model/
|
||||
hash: md5
|
||||
md5: 43b72f9284e92842cbc82bc7cc0950e2.dir
|
||||
size: 506201607
|
||||
nfiles: 36
|
||||
md5: cff0bc3b08dfb48fff929bb4991ea2f5.dir
|
||||
size: 291336375
|
||||
nfiles: 35
|
||||
- path: metrics/fit_metrics.json
|
||||
hash: md5
|
||||
md5: 4a496483bffad3efe671f29110729e48
|
||||
size: 221
|
||||
md5: 63b660c30b855ee0d86b0c1be4ad537e
|
||||
size: 220
|
||||
generate_predictions:
|
||||
cmd: python 3_generate_predictions.py
|
||||
deps:
|
||||
|
|
@ -116,13 +120,13 @@ stages:
|
|||
size: 2464
|
||||
- path: data/model
|
||||
hash: md5
|
||||
md5: 43b72f9284e92842cbc82bc7cc0950e2.dir
|
||||
size: 506201607
|
||||
nfiles: 36
|
||||
md5: cff0bc3b08dfb48fff929bb4991ea2f5.dir
|
||||
size: 291336375
|
||||
nfiles: 35
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
||||
size: 30388447
|
||||
md5: 382d5d02772d4ead3a31fa9420c03417.dir
|
||||
size: 29570807
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
|
|
@ -134,7 +138,7 @@ stages:
|
|||
outs:
|
||||
- path: data/predictions/
|
||||
hash: md5
|
||||
md5: 88832d623c3e437eaec221307ac33aae.dir
|
||||
md5: 88c5ad48fd6035600135a4541f2811a8.dir
|
||||
size: 163584
|
||||
nfiles: 1
|
||||
generate_metrics:
|
||||
|
|
@ -146,13 +150,13 @@ stages:
|
|||
size: 3484
|
||||
- path: data/predictions
|
||||
hash: md5
|
||||
md5: 88832d623c3e437eaec221307ac33aae.dir
|
||||
md5: 88c5ad48fd6035600135a4541f2811a8.dir
|
||||
size: 163584
|
||||
nfiles: 1
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: f506f1f059945c0f014c3f505a63726c.dir
|
||||
size: 30388447
|
||||
md5: 382d5d02772d4ead3a31fa9420c03417.dir
|
||||
size: 29570807
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
|
|
@ -162,8 +166,8 @@ stages:
|
|||
outs:
|
||||
- path: metrics/metrics.json
|
||||
hash: md5
|
||||
md5: f2783bdec0f0974b6d799609c6189467
|
||||
size: 222
|
||||
md5: f3fd84bd242e9f806aaedbb560f2ac7e
|
||||
size: 219
|
||||
generate_scenerio_metrics:
|
||||
cmd: python 5_generate_scenarios.py
|
||||
deps:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue