From 8cfe43b22d0a405b196bb01c78bed5550f6c20d3 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Sat, 8 Jun 2024 10:05:38 +0100 Subject: [PATCH] reduce strictness of the lower threshold to below 0 --- .../configs/feature_processor_logic.py | 24 +++++----- modules/ml-pipeline/src/pipeline/dvc.lock | 48 +++++++++---------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py index 89ba2be..6487fed 100644 --- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py @@ -47,23 +47,23 @@ def remove_top_1_percent_heat_demand_starting(df): return df -def remove_bottom_1_percent_heat_demand_starting(df): +def remove_negative_heat_demand_starting(df): # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] - threshold_value = 137 + threshold_value = 0 df = df[df["heat_demand_starting"] > threshold_value] return df -def remove_top_1_percent_heat_demand_ending(df): - # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] - threshold_value = 593 - df = df[df["heat_demand_ending"] < threshold_value] - return df +# def remove_top_1_percent_heat_demand_ending(df): +# # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] +# threshold_value = 593 +# df = df[df["heat_demand_ending"] < threshold_value] +# return df -def remove_bottom_1_percent_heat_demand_ending(df): +def remove_negative_heat_demand_ending(df): # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] - threshold_value = 70 + threshold_value = 0 df = df[df["heat_demand_ending"] > threshold_value] return df @@ -88,9 +88,9 @@ business_logic = { "keep_negative_heat_change": keep_negative_heat_change, "keep_negative_carbon_change": keep_negative_carbon_change, "remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand_starting, - "remove_bottom_1_percent_heat_demand": remove_bottom_1_percent_heat_demand_starting, - "remove_top_1_percent_heat_demand_ending": remove_top_1_percent_heat_demand_ending, - "remove_bottom_1_percent_heat_demand_ending": remove_bottom_1_percent_heat_demand_ending, + "remove_negative_heat_demand_starting": remove_negative_heat_demand_starting, + # "remove_top_1_percent_heat_demand_ending": remove_top_1_percent_heat_demand_ending, + "remove_negative_heat_demand_ending": remove_negative_heat_demand_ending, "remove_top_1_percent_carbon": remove_top_1_percent_carbon, # "remove_starting_columns": remove_starting_columns # "keep_ENDING_COLUMNS": keep_ending_columns diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 9267aa7..d7c0ce5 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -49,8 +49,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 083fceee99ad873f2e248d8b9839d385.dir - size: 35914893 + md5: 63af74a6d808f37d7ab0010300e3f6bf.dir + size: 37110480 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -61,8 +61,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 083fceee99ad873f2e248d8b9839d385.dir - size: 35914893 + md5: 63af74a6d808f37d7ab0010300e3f6bf.dir + size: 37110480 nfiles: 2 params: configs/build_model.yaml: @@ -94,18 +94,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 00c1b7516f310d392aeef71fc4921ce3.dir - size: 2838428 + md5: adc6e6cd0b396d9327123cabc0f3409e.dir + size: 2901081 nfiles: 1 - path: data/model/ hash: md5 - md5: 24520b79d5624e76ac760fa35b02eaf3.dir - size: 681087101 - nfiles: 35 + md5: 191092d9ed4218368408af7907aff787.dir + size: 719293313 + nfiles: 36 - path: metrics/fit_metrics.json hash: md5 - md5: fd9aefe2f6fcdada0fc22cb8ccdb3f2f - size: 219 + md5: feb839b11d75217c02ee8b3d13362324 + size: 223 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -115,13 +115,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 24520b79d5624e76ac760fa35b02eaf3.dir - size: 681087101 - nfiles: 35 + md5: 191092d9ed4218368408af7907aff787.dir + size: 719293313 + nfiles: 36 - path: data/prepared_data hash: md5 - md5: 083fceee99ad873f2e248d8b9839d385.dir - size: 35914893 + md5: 63af74a6d808f37d7ab0010300e3f6bf.dir + size: 37110480 nfiles: 2 params: configs/settings.yaml: @@ -133,8 +133,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 06e63ec07a27906d17c7797bd700efaf.dir - size: 370313 + md5: 794b1fb87d0693694782bef4d7443dfb.dir + size: 380445 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -145,13 +145,13 @@ stages: size: 3447 - path: data/predictions hash: md5 - md5: 06e63ec07a27906d17c7797bd700efaf.dir - size: 370313 + md5: 794b1fb87d0693694782bef4d7443dfb.dir + size: 380445 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 083fceee99ad873f2e248d8b9839d385.dir - size: 35914893 + md5: 63af74a6d808f37d7ab0010300e3f6bf.dir + size: 37110480 nfiles: 2 params: configs/settings.yaml: @@ -161,8 +161,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: fc514bac3fa5140eb5568cd96b68b56a - size: 223 + md5: c0db9fdd237e4cae5692884d70bb4d4c + size: 221 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: