diff --git a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py index 48a28bf..e93b44b 100644 --- a/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/feature_processor_logic.py @@ -160,7 +160,9 @@ def add_features_from_code(df): return df - +def add_estimate_annual_kwh(df): + df['estimate_annual_kwh'] = df['energy-consumption-current'] * df['total-floor-area'] + return df # def keep_ending_columns(df): # ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)] @@ -172,7 +174,8 @@ def add_features_from_code(df): business_logic = { "add_features_from_code": add_features_from_code, - "remove_hotwaterkwh_bottom_percentile": remove_hotwaterkwh_bottom_percentile + "remove_hotwaterkwh_bottom_percentile": remove_hotwaterkwh_bottom_percentile, + "add_estimate_annual_kwh": add_estimate_annual_kwh # "keep_non_zero_rdsap": keep_non_zero_rdsap, # "keep_flats": keep_flats, # "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size, diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 5fb1d1d..e24abf5 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -38,16 +38,13 @@ default: 'hot-water-cost-current', 'total-floor-area', 'secondheat-description', - 'environment-impact-current', 'floor-description', 'mainheat-energy-eff', 'current-energy-efficiency', - 'mainheat-env-eff', 'walls-energy-eff', 'roof-energy-eff', 'property-type', 'mainheat-description', - 'hot-water-env-eff', 'mechanical-ventilation', 'floor-level', 'built-form', @@ -57,7 +54,6 @@ default: 'energy-consumption-current', 'construction-age-band', 'hotwater-description', - # 'lodgement-datetime', 'main-fuel', 'hot-water-energy-eff', 'co2-emiss-curr-per-floor-area', @@ -67,37 +63,25 @@ default: 'extension-count', 'number-open-fireplaces', 'number-heated-rooms', - # 'lodgement-date', - # 'number-habitable-rooms', 'windows-description', - # 'local-authority', 'photo-supply', 'heat-loss-corridor', - # 'posttown', - # 'address', 'flat-top-storey', 'unheated-corridor-length', 'fixed-lighting-outlets-count', - # 'inspection-date', 'tenure', - # 'county', - # 'constituency-label', 'multi-glaze-proportion', 'solar-water-heating-flag', - # 'address2', 'energy-tariff', 'floor-height', 'constituency', - # 'uprn-source', 'transaction-type', 'floor-energy-eff', - # 'postcode', 'lodgement-month', 'lighting-cost-current', 'glazed-area', - # 'address1', - 'floor-env-eff', - 'main-heating-controls' + 'main-heating-controls', + 'estimate_annual_kwh', ] generate_predictions: diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 06f4eb4..ff64aca 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -29,16 +29,13 @@ stages: - hot-water-cost-current - total-floor-area - secondheat-description - - environment-impact-current - floor-description - mainheat-energy-eff - current-energy-efficiency - - mainheat-env-eff - walls-energy-eff - roof-energy-eff - property-type - mainheat-description - - hot-water-env-eff - mechanical-ventilation - floor-level - built-form @@ -74,8 +71,8 @@ stages: - lodgement-month - lighting-cost-current - glazed-area - - floor-env-eff - main-heating-controls + - estimate_annual_kwh default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: hot_water_kwh @@ -90,8 +87,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 295ac4fd05a1a3373401a7318b0b5186.dir - size: 13131853 + md5: 19abfc15e24cd8a869a0f3f087e09584.dir + size: 13396685 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -102,8 +99,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 295ac4fd05a1a3373401a7318b0b5186.dir - size: 13131853 + md5: 19abfc15e24cd8a869a0f3f087e09584.dir + size: 13396685 nfiles: 2 params: configs/build_model.yaml: @@ -135,18 +132,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 3e48cec68f702bc822eed8fcb2c5c603.dir - size: 1787931 + md5: 0e2f1131b24eafa09ccbab91a36cac6c.dir + size: 1787805 nfiles: 1 - path: data/model/ hash: md5 - md5: 37f7480141e920c68faacd39478a1a68.dir - size: 451364406 - nfiles: 35 + md5: 77c0900cda64c1beaef6782d04fd712e.dir + size: 518798187 + nfiles: 36 - path: metrics/fit_metrics.json hash: md5 - md5: e7a2a5efea57b1ddd1431b713d78bb11 - size: 219 + md5: 0e03bb46cd03e9521095bf141ee92ed7 + size: 220 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -156,13 +153,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 37f7480141e920c68faacd39478a1a68.dir - size: 451364406 - nfiles: 35 + md5: 77c0900cda64c1beaef6782d04fd712e.dir + size: 518798187 + nfiles: 36 - path: data/prepared_data hash: md5 - md5: 295ac4fd05a1a3373401a7318b0b5186.dir - size: 13131853 + md5: 19abfc15e24cd8a869a0f3f087e09584.dir + size: 13396685 nfiles: 2 params: configs/settings.yaml: @@ -174,8 +171,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 9267a66c6fae4da5a589faab76fac14c.dir - size: 192482 + md5: efc7b9dd9d40e42ad93e3e5acbeac92d.dir + size: 192502 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -186,13 +183,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 9267a66c6fae4da5a589faab76fac14c.dir - size: 192482 + md5: efc7b9dd9d40e42ad93e3e5acbeac92d.dir + size: 192502 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 295ac4fd05a1a3373401a7318b0b5186.dir - size: 13131853 + md5: 19abfc15e24cd8a869a0f3f087e09584.dir + size: 13396685 nfiles: 2 params: configs/settings.yaml: @@ -202,8 +199,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 3a08c29f028f5e3cb50fb8cd3608e2f4 - size: 221 + md5: a8b7699dd2b171da5fead1fb04d954aa + size: 220 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: