clipped extremely small heating values

This commit is contained in:
Michael Duong 2024-07-12 23:03:31 +01:00
parent 9785181e80
commit 5cb8a8a6aa
2 changed files with 30 additions and 25 deletions

View file

@ -56,6 +56,10 @@ def keep_non_zero_rdsap(df):
df = df[df["rdsap_change"] != 0]
return df
def remove_heatingkwh_bottom_percentile(df, percentile=0.0001):
df = df[df["heating_kwh"] > df["heating_kwh"].quantile(percentile)]
return df
def add_features_from_code(df):
FEATURES = {
@ -167,7 +171,8 @@ def add_features_from_code(df):
# return df
business_logic = {
"add_features_from_code": add_features_from_code
"add_features_from_code": add_features_from_code,
"remove_heatingkwh_bottom_percentile": remove_heatingkwh_bottom_percentile
# "keep_non_zero_rdsap": keep_non_zero_rdsap,
# "keep_flats": keep_flats,
# "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,

View file

@ -103,8 +103,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
md5: 660630d5c4f0f9a371f5c43221a56e39.dir
size: 14486809
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -115,8 +115,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
md5: 660630d5c4f0f9a371f5c43221a56e39.dir
size: 14486809
nfiles: 2
params:
configs/build_model.yaml:
@ -148,18 +148,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: e08a232adc7f805d5d97ed7e93d667b3.dir
size: 726970
md5: 07b5623892769f33837d89bf6fc6702d.dir
size: 726940
nfiles: 1
- path: data/model/
hash: md5
md5: 3daab783532ba88d40eb905ff65b0f1c.dir
size: 400927883
nfiles: 37
md5: 6f281b6a422453ec853b1d13cb1920de.dir
size: 345477655
nfiles: 36
- path: metrics/fit_metrics.json
hash: md5
md5: 9d6a478739e42b2f5f8500de585e9cf9
size: 212
md5: e6fc8ae0f36b52ce3173515ef75ce526
size: 223
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -169,13 +169,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 3daab783532ba88d40eb905ff65b0f1c.dir
size: 400927883
nfiles: 37
md5: 6f281b6a422453ec853b1d13cb1920de.dir
size: 345477655
nfiles: 36
- path: data/prepared_data
hash: md5
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
md5: 660630d5c4f0f9a371f5c43221a56e39.dir
size: 14486809
nfiles: 2
params:
configs/settings.yaml:
@ -187,8 +187,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 98a3db098cf2ad9bf786fb77b0ce643f.dir
size: 77479
md5: 19d3ead23af278c2ccdf4836180d4c15.dir
size: 77471
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -199,13 +199,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 98a3db098cf2ad9bf786fb77b0ce643f.dir
size: 77479
md5: 19d3ead23af278c2ccdf4836180d4c15.dir
size: 77471
nfiles: 1
- path: data/prepared_data
hash: md5
md5: d1ca07d66c3e28c133d0561423e6d2c8.dir
size: 14503223
md5: 660630d5c4f0f9a371f5c43221a56e39.dir
size: 14486809
nfiles: 2
params:
configs/settings.yaml:
@ -215,8 +215,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 41ccaa41fd34009602d0df571e6453e9
size: 219
md5: 7b62ecaff5b429ef6c31aba95bce9f39
size: 218
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps: