From 7af43ecbef2767d9512cdb2857e8570f76458dfc Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Mon, 5 Aug 2024 22:46:03 +0100 Subject: [PATCH] removed features for new model --- modules/ml-pipeline/README.MD | 11 +- .../src/pipeline/configs/settings.yaml | 163 ++++++++---------- modules/ml-pipeline/src/pipeline/dvc.lock | 55 +++--- 3 files changed, 97 insertions(+), 132 deletions(-) diff --git a/modules/ml-pipeline/README.MD b/modules/ml-pipeline/README.MD index 2711569..a0f798e 100644 --- a/modules/ml-pipeline/README.MD +++ b/modules/ml-pipeline/README.MD @@ -17,14 +17,15 @@ Within `src` folder, the structure is as follows: # How to develop using this pipeline: -Run `make init`, which will: -- Download pyenv (Python version management) -- Download Python 3.X.X as defined in the `make` file - current 3.10.12 -- Create a virtual environment with this version of python +First, download miniconda to use conda to manage Python Environments +Rund `conda init`, to initialise your terminal + +Change to this directory and run `make init`, which will: +- Create a conda virtual environment with this version of python - current 3.10.12 - Install packages in the training and version control directories in the pipeline folder (dev version if applicable) - Install pre-commit to enable pre-commit hooks -To use the environment, run `source .dev_env_pipeline/bin/activate`. +To use the environment, run `conda activate dev_env_pipeline` To enable the virtual envrionemnt created in vscode: - Open settings diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 380bdb9..b0102ff 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -36,102 +36,75 @@ default: subsample_seed: 0 target: heating_kwh identifier_columns: ["uprn"] - # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"] drop_columns: ["hot_water_kwh"] - # [ - # "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", - # "heating_cost_ending", "hot_water_cost_ending", - # # "days_to_starting", "days_to_ending", - # 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', - # 'number_habitable_rooms', 'number_heated_rooms'] - retain_features: ['uprn', 'heating-cost-current', - 'co2-emissions-current', - 'hot-water-cost-current', - 'total-floor-area', - 'secondheat-description', - 'environment-impact-current', - 'floor-description', - 'mainheat-energy-eff', - 'current-energy-efficiency', - 'mainheat-env-eff', - 'walls-energy-eff', - 'roof-energy-eff', - 'property-type', - 'mainheat-description', - 'hot-water-env-eff', - 'mechanical-ventilation', - 'floor-level', - 'built-form', - 'walls-description', - 'mainheatcont-description', - 'roof-description', - 'energy-consumption-current', - 'construction-age-band', - 'hotwater-description', - 'lodgement-datetime', - 'main-fuel', - 'hot-water-energy-eff', - 'co2-emiss-curr-per-floor-area', - 'windows-energy-eff', - 'current-energy-rating', - 'lodgement-year', - 'extension-count', - 'number-open-fireplaces', - 'number-heated-rooms', - 'lodgement-date', -# 'number-habitable-rooms', - 'windows-description', - 'local-authority', - 'photo-supply', - 'heat-loss-corridor', - 'posttown', -# 'address', - 'flat-top-storey', - 'unheated-corridor-length', - 'fixed-lighting-outlets-count', - 'inspection-date', - 'tenure', - 'county', - 'constituency-label', - 'multi-glaze-proportion', - 'solar-water-heating-flag', -# 'address2', - 'energy-tariff', - 'floor-height', - 'constituency', - 'uprn-source', - 'transaction-type', - 'floor-energy-eff', - 'postcode', - 'lodgement-month', - 'lighting-cost-current', - 'glazed-area', -# 'address1', - 'floor-env-eff', - 'main-heating-controls'] - # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', - # 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending', - # 'walls_energy_eff_ending', 'secondheat_description_ending', - # 'property_type', 'mainheatc_energy_eff_ending', 'built_form', - # 'walls_insulation_thickness_ending', 'potential_energy_efficiency', - # 'transaction_type_ending', - # 'floor_thermal_transmittance_ending', - # 'low_energy_lighting_ending', 'heat_demand_starting', - # 'photo_supply_ending', 'carbon_starting', - # 'walls_thermal_transmittance_ending', - # 'roof_insulation_thickness_ending', - # 'total_floor_area_ending', 'number_open_fireplaces_ending', - # 'windows_energy_eff_ending', - # 'floor_height_ending', - # 'extension_count_ending', - # 'has_air_source_heat_pump_ending', - # 'charging_system_ending', 'construction_age_band', 'glazed_type_ending', - # 'roof_thermal_transmittance_ending', - # 'floor_insulation_thickness_ending', 'has_mains_gas_ending', - # 'estimated_perimeter_starting', 'energy_consumption_potential', - # 'environment_impact_potential', 'heater_type_ending', - # 'multi_glaze_proportion_ending', - # 'lighting_energy_eff_ending', 'fixed_lighting_outlets_count'] + retain_features: [ + 'uprn', + 'heating-cost-current', + 'co2-emissions-current', + 'hot-water-cost-current', + 'total-floor-area', + 'secondheat-description', + 'environment-impact-current', + 'floor-description', + 'mainheat-energy-eff', + 'current-energy-efficiency', + 'mainheat-env-eff', + 'walls-energy-eff', + 'roof-energy-eff', + 'property-type', + 'mainheat-description', + 'hot-water-env-eff', + 'mechanical-ventilation', + 'floor-level', + 'built-form', + 'walls-description', + 'mainheatcont-description', + 'roof-description', + 'energy-consumption-current', + 'construction-age-band', + 'hotwater-description', + # 'lodgement-datetime', + 'main-fuel', + 'hot-water-energy-eff', + 'co2-emiss-curr-per-floor-area', + 'windows-energy-eff', + 'current-energy-rating', + 'lodgement-year', + 'extension-count', + 'number-open-fireplaces', + 'number-heated-rooms', + # 'lodgement-date', + # 'number-habitable-rooms', + 'windows-description', + # 'local-authority', + 'photo-supply', + 'heat-loss-corridor', + # 'posttown', + # 'address', + 'flat-top-storey', + 'unheated-corridor-length', + 'fixed-lighting-outlets-count', + # 'inspection-date', + 'tenure', + # 'county', + # 'constituency-label', + 'multi-glaze-proportion', + 'solar-water-heating-flag', + # 'address2', + 'energy-tariff', + 'floor-height', + 'constituency', + # 'uprn-source', + 'transaction-type', + 'floor-energy-eff', + # 'postcode', + 'lodgement-month', + 'lighting-cost-current', + 'glazed-area', + # 'address1', + 'floor-env-eff', + 'main-heating-controls' + ] generate_predictions: input_dataclient_type: local diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 829628a..59ef25f 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -48,7 +48,6 @@ stages: - energy-consumption-current - construction-age-band - hotwater-description - - lodgement-datetime - main-fuel - hot-water-energy-eff - co2-emiss-curr-per-floor-area @@ -58,28 +57,20 @@ stages: - extension-count - number-open-fireplaces - number-heated-rooms - - lodgement-date - windows-description - - local-authority - photo-supply - heat-loss-corridor - - posttown - flat-top-storey - unheated-corridor-length - fixed-lighting-outlets-count - - inspection-date - tenure - - county - - constituency-label - multi-glaze-proportion - solar-water-heating-flag - energy-tariff - floor-height - constituency - - uprn-source - transaction-type - floor-energy-eff - - postcode - lodgement-month - lighting-cost-current - glazed-area @@ -99,8 +90,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir + size: 10405713 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -111,8 +102,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir + size: 10405713 nfiles: 2 params: configs/build_model.yaml: @@ -144,17 +135,17 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 0f536790b342ee84fe51f5bf66ca4e3c.dir - size: 1545512 + md5: dee9c58e45081cf5734895a18f31906f.dir + size: 1545644 nfiles: 1 - path: data/model/ hash: md5 - md5: 0ce09cc5e2d12876d9315cb18f8b70a9.dir - size: 320950858 - nfiles: 36 + md5: 2da6dc420a308a31e5450ab24b7d4c40.dir + size: 297721035 + nfiles: 35 - path: metrics/fit_metrics.json hash: md5 - md5: 5c38cf3ad988c55fb9685d76c7da78b3 + md5: 23032c58977677c6790415aa79e48aa8 size: 216 generate_predictions: cmd: python 3_generate_predictions.py @@ -165,13 +156,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 0ce09cc5e2d12876d9315cb18f8b70a9.dir - size: 320950858 - nfiles: 36 + md5: 2da6dc420a308a31e5450ab24b7d4c40.dir + size: 297721035 + nfiles: 35 - path: data/prepared_data hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir + size: 10405713 nfiles: 2 params: configs/settings.yaml: @@ -183,8 +174,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 9f32b5e943df8cd9336077b8daf2975c.dir - size: 163552 + md5: d93b71cd8f21df7928a423db8a2c4e2b.dir + size: 163544 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -195,13 +186,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 9f32b5e943df8cd9336077b8daf2975c.dir - size: 163552 + md5: d93b71cd8f21df7928a423db8a2c4e2b.dir + size: 163544 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: f5e520d6cc27dcd0d306cfdbebd324ff.dir + size: 10405713 nfiles: 2 params: configs/settings.yaml: @@ -211,8 +202,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 752659c808d2bf0f176a0bf1ad7088a1 - size: 223 + md5: f611572ff9273930f0c386903ee2ba63 + size: 217 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: