diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index e0c99a6..7b0f8e8 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -36,14 +36,78 @@ default: target: heating_kwh identifier_columns: ["uprn"] # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"] - drop_columns: [] + drop_columns: ["hot_water_kwh"] # [ # "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", # "heating_cost_ending", "hot_water_cost_ending", # # "days_to_starting", "days_to_ending", # 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', # 'number_habitable_rooms', 'number_heated_rooms'] - retain_features: null + retain_features: ['uprn', 'heating-cost-current', + 'co2-emissions-current', + 'hot-water-cost-current', + 'total-floor-area', + 'secondheat-description', + 'environment-impact-current', + 'floor-description', + 'mainheat-energy-eff', + 'current-energy-efficiency', + 'mainheat-env-eff', + 'walls-energy-eff', + 'roof-energy-eff', + 'property-type', + 'mainheat-description', + 'hot-water-env-eff', + 'mechanical-ventilation', + 'floor-level', + 'built-form', + 'walls-description', + 'mainheatcont-description', + 'roof-description', + 'energy-consumption-current', + 'construction-age-band', + 'hotwater-description', + 'lodgement-datetime', + 'main-fuel', + 'hot-water-energy-eff', + 'co2-emiss-curr-per-floor-area', + 'windows-energy-eff', + 'current-energy-rating', + 'lodgement-year', + 'extension-count', + 'number-open-fireplaces', + 'number-heated-rooms', + 'lodgement-date', + 'number-habitable-rooms', + 'windows-description', + 'local-authority', + 'photo-supply', + 'heat-loss-corridor', + 'posttown', + 'address', + 'flat-top-storey', + 'unheated-corridor-length', + 'fixed-lighting-outlets-count', + 'inspection-date', + 'tenure', + 'county', + 'constituency-label', + 'multi-glaze-proportion', + 'solar-water-heating-flag', + 'address2', + 'energy-tariff', + 'floor-height', + 'constituency', + 'uprn-source', + 'transaction-type', + 'floor-energy-eff', + 'postcode', + 'lodgement-month', + 'lighting-cost-current', + 'glazed-area', + 'address1', + 'floor-env-eff', + 'main-heating-controls'] # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', # 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending', # 'walls_energy_eff_ending', 'secondheat_description_ending', diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 70615b1..14cd48c 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -20,8 +20,75 @@ stages: size: 4298 params: configs/settings.yaml: - default.feature_processor.feature_processor_config.drop_columns: [] + default.feature_processor.feature_processor_config.drop_columns: + - hot_water_kwh default.feature_processor.feature_processor_config.retain_features: + - uprn + - heating-cost-current + - co2-emissions-current + - hot-water-cost-current + - total-floor-area + - secondheat-description + - environment-impact-current + - floor-description + - mainheat-energy-eff + - current-energy-efficiency + - mainheat-env-eff + - walls-energy-eff + - roof-energy-eff + - property-type + - mainheat-description + - hot-water-env-eff + - mechanical-ventilation + - floor-level + - built-form + - walls-description + - mainheatcont-description + - roof-description + - energy-consumption-current + - construction-age-band + - hotwater-description + - lodgement-datetime + - main-fuel + - hot-water-energy-eff + - co2-emiss-curr-per-floor-area + - windows-energy-eff + - current-energy-rating + - lodgement-year + - extension-count + - number-open-fireplaces + - number-heated-rooms + - lodgement-date + - number-habitable-rooms + - windows-description + - local-authority + - photo-supply + - heat-loss-corridor + - posttown + - address + - flat-top-storey + - unheated-corridor-length + - fixed-lighting-outlets-count + - inspection-date + - tenure + - county + - constituency-label + - multi-glaze-proportion + - solar-water-heating-flag + - address2 + - energy-tariff + - floor-height + - constituency + - uprn-source + - transaction-type + - floor-energy-eff + - postcode + - lodgement-month + - lighting-cost-current + - glazed-area + - address1 + - floor-env-eff + - main-heating-controls default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: heating_kwh @@ -36,8 +103,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: c162c4b80826f42b33cce656446460f3.dir - size: 23784411 + md5: d1ca07d66c3e28c133d0561423e6d2c8.dir + size: 14503223 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -48,8 +115,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: c162c4b80826f42b33cce656446460f3.dir - size: 23784411 + md5: d1ca07d66c3e28c133d0561423e6d2c8.dir + size: 14503223 nfiles: 2 params: configs/build_model.yaml: @@ -81,18 +148,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: c6925db5c6b2ff0f95e97aed727462a1.dir - size: 726994 + md5: e08a232adc7f805d5d97ed7e93d667b3.dir + size: 726970 nfiles: 1 - path: data/model/ hash: md5 - md5: f8cd16b81139a2ed1f40009204b5bb67.dir - size: 557447134 - nfiles: 36 + md5: 3daab783532ba88d40eb905ff65b0f1c.dir + size: 400927883 + nfiles: 37 - path: metrics/fit_metrics.json hash: md5 - md5: 35cc792ba9bda3755928561c512aed3c - size: 214 + md5: 9d6a478739e42b2f5f8500de585e9cf9 + size: 212 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -102,13 +169,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: f8cd16b81139a2ed1f40009204b5bb67.dir - size: 557447134 - nfiles: 36 + md5: 3daab783532ba88d40eb905ff65b0f1c.dir + size: 400927883 + nfiles: 37 - path: data/prepared_data hash: md5 - md5: c162c4b80826f42b33cce656446460f3.dir - size: 23784411 + md5: d1ca07d66c3e28c133d0561423e6d2c8.dir + size: 14503223 nfiles: 2 params: configs/settings.yaml: @@ -120,7 +187,7 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 4edc571b115ec0b8be1d2689e97ff36f.dir + md5: 98a3db098cf2ad9bf786fb77b0ce643f.dir size: 77479 nfiles: 1 generate_metrics: @@ -132,13 +199,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 4edc571b115ec0b8be1d2689e97ff36f.dir + md5: 98a3db098cf2ad9bf786fb77b0ce643f.dir size: 77479 nfiles: 1 - path: data/prepared_data hash: md5 - md5: c162c4b80826f42b33cce656446460f3.dir - size: 23784411 + md5: d1ca07d66c3e28c133d0561423e6d2c8.dir + size: 14503223 nfiles: 2 params: configs/settings.yaml: @@ -148,8 +215,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 32811206191a4b2f24234d1f94b80b70 - size: 222 + md5: 41ccaa41fd34009602d0df571e6453e9 + size: 219 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: