From 1ad96ed3145e2165c8a663697b18dbec4cd1bddb Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 9 Aug 2024 10:54:22 +0100 Subject: [PATCH] remove the same feature: cost, photo and mainheat --- .../src/pipeline/configs/settings.yaml | 10 ++-- modules/ml-pipeline/src/pipeline/dvc.lock | 51 +++++++++---------- 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index e24abf5..f623210 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -33,9 +33,9 @@ default: drop_columns: ["heating_kwh"] retain_features: [ 'uprn', - 'heating-cost-current', + # 'heating-cost-current', 'co2-emissions-current', - 'hot-water-cost-current', + # 'hot-water-cost-current', 'total-floor-area', 'secondheat-description', 'floor-description', @@ -64,7 +64,7 @@ default: 'number-open-fireplaces', 'number-heated-rooms', 'windows-description', - 'photo-supply', + # 'photo-supply', 'heat-loss-corridor', 'flat-top-storey', 'unheated-corridor-length', @@ -78,9 +78,9 @@ default: 'transaction-type', 'floor-energy-eff', 'lodgement-month', - 'lighting-cost-current', + # 'lighting-cost-current', 'glazed-area', - 'main-heating-controls', + # 'main-heating-controls', 'estimate_annual_kwh', ] diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index ff64aca..1061d02 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -24,9 +24,7 @@ stages: - heating_kwh default.feature_processor.feature_processor_config.retain_features: - uprn - - heating-cost-current - co2-emissions-current - - hot-water-cost-current - total-floor-area - secondheat-description - floor-description @@ -55,7 +53,6 @@ stages: - number-open-fireplaces - number-heated-rooms - windows-description - - photo-supply - heat-loss-corridor - flat-top-storey - unheated-corridor-length @@ -69,9 +66,7 @@ stages: - transaction-type - floor-energy-eff - lodgement-month - - lighting-cost-current - glazed-area - - main-heating-controls - estimate_annual_kwh default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 @@ -87,8 +82,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 19abfc15e24cd8a869a0f3f087e09584.dir - size: 13396685 + md5: c45c73e2e25a5c9697a788cfa04f232d.dir + size: 11682246 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -99,8 +94,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 19abfc15e24cd8a869a0f3f087e09584.dir - size: 13396685 + md5: c45c73e2e25a5c9697a788cfa04f232d.dir + size: 11682246 nfiles: 2 params: configs/build_model.yaml: @@ -132,17 +127,17 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 0e2f1131b24eafa09ccbab91a36cac6c.dir - size: 1787805 + md5: 6abffc8f19e3bb14345f0504a96fd214.dir + size: 1788386 nfiles: 1 - path: data/model/ hash: md5 - md5: 77c0900cda64c1beaef6782d04fd712e.dir - size: 518798187 - nfiles: 36 + md5: aee2886545c62efbf26d49f32bd1f328.dir + size: 79940408 + nfiles: 35 - path: metrics/fit_metrics.json hash: md5 - md5: 0e03bb46cd03e9521095bf141ee92ed7 + md5: 14e5b4019f6e5cf49edf7945b71e6a66 size: 220 generate_predictions: cmd: python 3_generate_predictions.py @@ -153,13 +148,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 77c0900cda64c1beaef6782d04fd712e.dir - size: 518798187 - nfiles: 36 + md5: aee2886545c62efbf26d49f32bd1f328.dir + size: 79940408 + nfiles: 35 - path: data/prepared_data hash: md5 - md5: 19abfc15e24cd8a869a0f3f087e09584.dir - size: 13396685 + md5: c45c73e2e25a5c9697a788cfa04f232d.dir + size: 11682246 nfiles: 2 params: configs/settings.yaml: @@ -171,8 +166,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: efc7b9dd9d40e42ad93e3e5acbeac92d.dir - size: 192502 + md5: efe40990a6092494363daa3284a22878.dir + size: 192442 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -183,13 +178,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: efc7b9dd9d40e42ad93e3e5acbeac92d.dir - size: 192502 + md5: efe40990a6092494363daa3284a22878.dir + size: 192442 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 19abfc15e24cd8a869a0f3f087e09584.dir - size: 13396685 + md5: c45c73e2e25a5c9697a788cfa04f232d.dir + size: 11682246 nfiles: 2 params: configs/settings.yaml: @@ -199,8 +194,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: a8b7699dd2b171da5fead1fb04d954aa - size: 220 + md5: c6f913d497eb2f98e801c9e030bd96e9 + size: 222 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: