From c2ad73743af0b6d70a0aa931c408eec139c4309a Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Thu, 8 Aug 2024 23:15:09 +0100 Subject: [PATCH] remove costing columns, photo supply and main-heating-control --- .../src/pipeline/configs/settings.yaml | 10 ++-- modules/ml-pipeline/src/pipeline/dvc.lock | 53 +++++++++---------- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index aeb5907..f7b74be 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -39,9 +39,9 @@ default: drop_columns: ["hot_water_kwh"] retain_features: [ 'uprn', - 'heating-cost-current', + # 'heating-cost-current', 'co2-emissions-current', - 'hot-water-cost-current', + # 'hot-water-cost-current', 'total-floor-area', 'secondheat-description', 'floor-description', @@ -70,7 +70,7 @@ default: 'number-open-fireplaces', 'number-heated-rooms', 'windows-description', - 'photo-supply', + # 'photo-supply', 'heat-loss-corridor', 'flat-top-storey', 'unheated-corridor-length', @@ -84,9 +84,9 @@ default: 'transaction-type', 'floor-energy-eff', 'lodgement-month', - 'lighting-cost-current', + # 'lighting-cost-current', 'glazed-area', - 'main-heating-controls', + # 'main-heating-controls', 'estimate_annual_kwh', ] diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 8fd1507..1e29fb4 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -24,9 +24,7 @@ stages: - hot_water_kwh default.feature_processor.feature_processor_config.retain_features: - uprn - - heating-cost-current - co2-emissions-current - - hot-water-cost-current - total-floor-area - secondheat-description - floor-description @@ -55,7 +53,6 @@ stages: - number-open-fireplaces - number-heated-rooms - windows-description - - photo-supply - heat-loss-corridor - flat-top-storey - unheated-corridor-length @@ -69,9 +66,7 @@ stages: - transaction-type - floor-energy-eff - lodgement-month - - lighting-cost-current - glazed-area - - main-heating-controls - estimate_annual_kwh default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 @@ -87,8 +82,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir - size: 11062844 + md5: a6241dcbb3fe1d3b39d1a300ea64dfc9.dir + size: 9606500 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -99,8 +94,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir - size: 11062844 + md5: a6241dcbb3fe1d3b39d1a300ea64dfc9.dir + size: 9606500 nfiles: 2 params: configs/build_model.yaml: @@ -132,18 +127,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 5c694b2ec23baca2e7242d3802ba09fe.dir - size: 1546012 + md5: 5e07647b4dd0145a6d52d6ef729a3bde.dir + size: 1545562 nfiles: 1 - path: data/model/ hash: md5 - md5: 314c5043b1a421e847af8d3126fba788.dir - size: 278676507 - nfiles: 37 + md5: ce14e6f1e69c5513a04403eb00e0db0a.dir + size: 99464470 + nfiles: 35 - path: metrics/fit_metrics.json hash: md5 - md5: 2e5ab3102b145f5c909232e66210a261 - size: 222 + md5: 425c0c6c13742d2d21051bf7ceb90127 + size: 218 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -153,13 +148,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 314c5043b1a421e847af8d3126fba788.dir - size: 278676507 - nfiles: 37 + md5: ce14e6f1e69c5513a04403eb00e0db0a.dir + size: 99464470 + nfiles: 35 - path: data/prepared_data hash: md5 - md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir - size: 11062844 + md5: a6241dcbb3fe1d3b39d1a300ea64dfc9.dir + size: 9606500 nfiles: 2 params: configs/settings.yaml: @@ -171,8 +166,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 10b0612c10dfaa78e08ccc673c6f984e.dir - size: 163560 + md5: ddaa04115c5dd4299974048080d762f5.dir + size: 163540 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -183,13 +178,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 10b0612c10dfaa78e08ccc673c6f984e.dir - size: 163560 + md5: ddaa04115c5dd4299974048080d762f5.dir + size: 163540 nfiles: 1 - path: data/prepared_data hash: md5 - md5: c0cb87bd677d46f4c1a608c6d6ee6110.dir - size: 11062844 + md5: a6241dcbb3fe1d3b39d1a300ea64dfc9.dir + size: 9606500 nfiles: 2 params: configs/settings.yaml: @@ -199,8 +194,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 22794cfc31f6ffd98eb1db4852677b4f - size: 220 + md5: 0beb72a28af4af37a619181b14c2e311 + size: 218 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: