diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py index 1094862..89ba2be 100644 --- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py @@ -40,13 +40,34 @@ def remove_unreasonable_habitable_rooms(df): return df -def remove_top_1_percent_heat_demand(df): +def remove_top_1_percent_heat_demand_starting(df): # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] threshold_value = 860 df = df[df["heat_demand_starting"] < threshold_value] return df +def remove_bottom_1_percent_heat_demand_starting(df): + # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] + threshold_value = 137 + df = df[df["heat_demand_starting"] > threshold_value] + return df + + +def remove_top_1_percent_heat_demand_ending(df): + # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] + threshold_value = 593 + df = df[df["heat_demand_ending"] < threshold_value] + return df + + +def remove_bottom_1_percent_heat_demand_ending(df): + # threshold_value = df.describe(percentiles=[0.99])['HEAT_DEMAND_STARTING']['99%'] + threshold_value = 70 + df = df[df["heat_demand_ending"] > threshold_value] + return df + + def remove_top_1_percent_carbon(df): # threshold_value = df.describe(percentiles=[0.99])['CARBON_STARTING']['99%'] threshold_value = 18 @@ -66,7 +87,10 @@ business_logic = { "remove_unreasonable_habitable_rooms": remove_unreasonable_habitable_rooms, "keep_negative_heat_change": keep_negative_heat_change, "keep_negative_carbon_change": keep_negative_carbon_change, - "remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand, + "remove_top_1_percent_heat_demand": remove_top_1_percent_heat_demand_starting, + "remove_bottom_1_percent_heat_demand": remove_bottom_1_percent_heat_demand_starting, + "remove_top_1_percent_heat_demand_ending": remove_top_1_percent_heat_demand_ending, + "remove_bottom_1_percent_heat_demand_ending": remove_bottom_1_percent_heat_demand_ending, "remove_top_1_percent_carbon": remove_top_1_percent_carbon, # "remove_starting_columns": remove_starting_columns # "keep_ENDING_COLUMNS": keep_ending_columns diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index fa9148c..9267aa7 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -49,8 +49,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 13cd955d579de20efe743f82bc434c7e.dir - size: 37294025 + md5: 083fceee99ad873f2e248d8b9839d385.dir + size: 35914893 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -61,8 +61,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 13cd955d579de20efe743f82bc434c7e.dir - size: 37294025 + md5: 083fceee99ad873f2e248d8b9839d385.dir + size: 35914893 nfiles: 2 params: configs/build_model.yaml: @@ -94,17 +94,17 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: b9c9ca64ea6973c409c3a7b8f8ed0c3e.dir - size: 2902493 + md5: 00c1b7516f310d392aeef71fc4921ce3.dir + size: 2838428 nfiles: 1 - path: data/model/ hash: md5 - md5: a9215bba342ed7ec3f97815dfef94e48.dir - size: 727501601 - nfiles: 36 + md5: 24520b79d5624e76ac760fa35b02eaf3.dir + size: 681087101 + nfiles: 35 - path: metrics/fit_metrics.json hash: md5 - md5: 548a431d58cd4f5a3118235dec734372 + md5: fd9aefe2f6fcdada0fc22cb8ccdb3f2f size: 219 generate_predictions: cmd: python 3_generate_predictions.py @@ -115,13 +115,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: a9215bba342ed7ec3f97815dfef94e48.dir - size: 727501601 - nfiles: 36 + md5: 24520b79d5624e76ac760fa35b02eaf3.dir + size: 681087101 + nfiles: 35 - path: data/prepared_data hash: md5 - md5: 13cd955d579de20efe743f82bc434c7e.dir - size: 37294025 + md5: 083fceee99ad873f2e248d8b9839d385.dir + size: 35914893 nfiles: 2 params: configs/settings.yaml: @@ -133,8 +133,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 484781d6b359e458a25e9ab728d6514d.dir - size: 380517 + md5: 06e63ec07a27906d17c7797bd700efaf.dir + size: 370313 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -145,13 +145,13 @@ stages: size: 3447 - path: data/predictions hash: md5 - md5: 484781d6b359e458a25e9ab728d6514d.dir - size: 380517 + md5: 06e63ec07a27906d17c7797bd700efaf.dir + size: 370313 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 13cd955d579de20efe743f82bc434c7e.dir - size: 37294025 + md5: 083fceee99ad873f2e248d8b9839d385.dir + size: 35914893 nfiles: 2 params: configs/settings.yaml: @@ -161,8 +161,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 4d246765aff7c45079d02b4d8f7527f7 - size: 220 + md5: fc514bac3fa5140eb5568cd96b68b56a + size: 223 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: