add clipping for both starting and ending for top 1 and 99 percent

This commit is contained in:
Michael Duong 2024-06-06 23:15:05 +01:00
parent e695a10c14
commit ddf24b3cbc
2 changed files with 49 additions and 25 deletions

View file

@ -40,13 +40,34 @@ def remove_unreasonable_habitable_rooms(df):
return df
def remove_top_1_percent_heat_demand(df):
def remove_top_1_percent_heat_demand_starting(df):
# threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
threshold_value = 860
df = df[df["heat_demand_starting"] < threshold_value]
return df
def remove_bottom_1_percent_heat_demand_starting(df):
# threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
threshold_value = 137
df = df[df["heat_demand_starting"] > threshold_value]
return df
def remove_top_1_percent_heat_demand_ending(df):
# threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
threshold_value = 593
df = df[df["heat_demand_ending"] < threshold_value]
return df
def remove_bottom_1_percent_heat_demand_ending(df):
# threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%']
threshold_value = 70
df = df[df["heat_demand_ending"] > threshold_value]
return df
def remove_top_1_percent_carbon(df):
# threshold_value = df.describe(percentiles=[0.99])['CARBON_STARTING']['99%']
threshold_value = 18
@ -66,7 +87,10 @@ business_logic = {
"remove_unreasonable_habitable_rooms": remove_unreasonable_habitable_rooms,
"keep_negative_heat_change": keep_negative_heat_change,
"keep_negative_carbon_change": keep_negative_carbon_change,
"remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand,
"remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand_starting,
"remove_bottom_1_percent_heat_demand": remove_bottom_1_percent_heat_demand_starting,
"remove_top_1_percent_heat_demand_ending": remove_top_1_percent_heat_demand_ending,
"remove_bottom_1_percent_heat_demand_ending": remove_bottom_1_percent_heat_demand_ending,
"remove_top_1_percent_carbon": remove_top_1_percent_carbon,
# "remove_starting_columns": remove_starting_columns
# "keep_ENDING_COLUMNS": keep_ending_columns

View file

@ -49,8 +49,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: 13cd955d579de20efe743f82bc434c7e.dir
size: 37294025
md5: 083fceee99ad873f2e248d8b9839d385.dir
size: 35914893
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -61,8 +61,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 13cd955d579de20efe743f82bc434c7e.dir
size: 37294025
md5: 083fceee99ad873f2e248d8b9839d385.dir
size: 35914893
nfiles: 2
params:
configs/build_model.yaml:
@ -94,17 +94,17 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: b9c9ca64ea6973c409c3a7b8f8ed0c3e.dir
size: 2902493
md5: 00c1b7516f310d392aeef71fc4921ce3.dir
size: 2838428
nfiles: 1
- path: data/model/
hash: md5
md5: a9215bba342ed7ec3f97815dfef94e48.dir
size: 727501601
nfiles: 36
md5: 24520b79d5624e76ac760fa35b02eaf3.dir
size: 681087101
nfiles: 35
- path: metrics/fit_metrics.json
hash: md5
md5: 548a431d58cd4f5a3118235dec734372
md5: fd9aefe2f6fcdada0fc22cb8ccdb3f2f
size: 219
generate_predictions:
cmd: python 3_generate_predictions.py
@ -115,13 +115,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: a9215bba342ed7ec3f97815dfef94e48.dir
size: 727501601
nfiles: 36
md5: 24520b79d5624e76ac760fa35b02eaf3.dir
size: 681087101
nfiles: 35
- path: data/prepared_data
hash: md5
md5: 13cd955d579de20efe743f82bc434c7e.dir
size: 37294025
md5: 083fceee99ad873f2e248d8b9839d385.dir
size: 35914893
nfiles: 2
params:
configs/settings.yaml:
@ -133,8 +133,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 484781d6b359e458a25e9ab728d6514d.dir
size: 380517
md5: 06e63ec07a27906d17c7797bd700efaf.dir
size: 370313
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -145,13 +145,13 @@ stages:
size: 3447
- path: data/predictions
hash: md5
md5: 484781d6b359e458a25e9ab728d6514d.dir
size: 380517
md5: 06e63ec07a27906d17c7797bd700efaf.dir
size: 370313
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 13cd955d579de20efe743f82bc434c7e.dir
size: 37294025
md5: 083fceee99ad873f2e248d8b9839d385.dir
size: 35914893
nfiles: 2
params:
configs/settings.yaml:
@ -161,8 +161,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 4d246765aff7c45079d02b4d8f7527f7
size: 220
md5: fc514bac3fa5140eb5568cd96b68b56a
size: 223
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps: