diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index 380bdb9..60869f0 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -38,13 +38,8 @@ default: identifier_columns: ["uprn"] # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"] drop_columns: ["hot_water_kwh"] - # [ - # "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", - # "heating_cost_ending", "hot_water_cost_ending", - # # "days_to_starting", "days_to_ending", - # 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', - # 'number_habitable_rooms', 'number_heated_rooms'] - retain_features: ['uprn', 'heating-cost-current', + retain_features: [ + 'uprn', 'heating-cost-current', 'co2-emissions-current', 'hot-water-cost-current', 'total-floor-area', @@ -68,7 +63,7 @@ default: 'energy-consumption-current', 'construction-age-band', 'hotwater-description', - 'lodgement-datetime', +# 'lodgement-datetime', 'main-fuel', 'hot-water-energy-eff', 'co2-emiss-curr-per-floor-area', @@ -78,60 +73,38 @@ default: 'extension-count', 'number-open-fireplaces', 'number-heated-rooms', - 'lodgement-date', +# 'lodgement-date', # 'number-habitable-rooms', 'windows-description', - 'local-authority', +# 'local-authority', 'photo-supply', 'heat-loss-corridor', - 'posttown', +# 'posttown', # 'address', 'flat-top-storey', 'unheated-corridor-length', 'fixed-lighting-outlets-count', - 'inspection-date', +# 'inspection-date', 'tenure', - 'county', - 'constituency-label', +# 'county', +# 'constituency-label', 'multi-glaze-proportion', 'solar-water-heating-flag', # 'address2', 'energy-tariff', 'floor-height', 'constituency', - 'uprn-source', +# 'uprn-source', 'transaction-type', 'floor-energy-eff', - 'postcode', +# 'postcode', 'lodgement-month', 'lighting-cost-current', 'glazed-area', # 'address1', 'floor-env-eff', - 'main-heating-controls'] - # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', - # 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending', - # 'walls_energy_eff_ending', 'secondheat_description_ending', - # 'property_type', 'mainheatc_energy_eff_ending', 'built_form', - # 'walls_insulation_thickness_ending', 'potential_energy_efficiency', - # 'transaction_type_ending', - # 'floor_thermal_transmittance_ending', - # 'low_energy_lighting_ending', 'heat_demand_starting', - # 'photo_supply_ending', 'carbon_starting', - # 'walls_thermal_transmittance_ending', - # 'roof_insulation_thickness_ending', - # 'total_floor_area_ending', 'number_open_fireplaces_ending', - # 'windows_energy_eff_ending', - # 'floor_height_ending', - # 'extension_count_ending', - # 'has_air_source_heat_pump_ending', - # 'charging_system_ending', 'construction_age_band', 'glazed_type_ending', - # 'roof_thermal_transmittance_ending', - # 'floor_insulation_thickness_ending', 'has_mains_gas_ending', - # 'estimated_perimeter_starting', 'energy_consumption_potential', - # 'environment_impact_potential', 'heater_type_ending', - # 'multi_glaze_proportion_ending', - # 'lighting_energy_eff_ending', 'fixed_lighting_outlets_count'] + 'main-heating-controls' + ] generate_predictions: input_dataclient_type: local diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index 829628a..707954a 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -48,7 +48,6 @@ stages: - energy-consumption-current - construction-age-band - hotwater-description - - lodgement-datetime - main-fuel - hot-water-energy-eff - co2-emiss-curr-per-floor-area @@ -58,28 +57,20 @@ stages: - extension-count - number-open-fireplaces - number-heated-rooms - - lodgement-date - windows-description - - local-authority - photo-supply - heat-loss-corridor - - posttown - flat-top-storey - unheated-corridor-length - fixed-lighting-outlets-count - - inspection-date - tenure - - county - - constituency-label - multi-glaze-proportion - solar-water-heating-flag - energy-tariff - floor-height - constituency - - uprn-source - transaction-type - floor-energy-eff - - postcode - lodgement-month - lighting-cost-current - glazed-area @@ -89,7 +80,7 @@ stages: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: heating_kwh default.feature_processor.feature_processor_type: dataframe - default.prepare_data.data_filepath: + default.prepare_data.data_filepath: s3://retrofit-data-dev/energy_consumption/2024-07-25/energy_consumption_dataset.parquet default.prepare_data.input_dataclient_type: aws-s3 default.prepare_data.output_dataclient_type: local @@ -99,8 +90,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: 240ceda2ba6ed38022d4ddf3271bccd0.dir + size: 10408173 nfiles: 2 build_model: cmd: python 2_build_model.py @@ -111,8 +102,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: 240ceda2ba6ed38022d4ddf3271bccd0.dir + size: 10408173 nfiles: 2 params: configs/build_model.yaml: @@ -144,18 +135,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 0f536790b342ee84fe51f5bf66ca4e3c.dir - size: 1545512 + md5: 449333eac4dd5a6538a39e08b448c838.dir + size: 1544880 nfiles: 1 - path: data/model/ hash: md5 - md5: 0ce09cc5e2d12876d9315cb18f8b70a9.dir - size: 320950858 - nfiles: 36 + md5: 735d5925686a22e9ea80e1374f33204f.dir + size: 144468604 + nfiles: 28 - path: metrics/fit_metrics.json hash: md5 - md5: 5c38cf3ad988c55fb9685d76c7da78b3 - size: 216 + md5: 01c3b9d6c0a08dff53f1532f78d36ecc + size: 219 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -165,13 +156,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 0ce09cc5e2d12876d9315cb18f8b70a9.dir - size: 320950858 - nfiles: 36 + md5: 735d5925686a22e9ea80e1374f33204f.dir + size: 144468604 + nfiles: 28 - path: data/prepared_data hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: 240ceda2ba6ed38022d4ddf3271bccd0.dir + size: 10408173 nfiles: 2 params: configs/settings.yaml: @@ -183,8 +174,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 9f32b5e943df8cd9336077b8daf2975c.dir - size: 163552 + md5: c1b613aa06be5589de825ca877ff66c4.dir + size: 163496 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -195,13 +186,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 9f32b5e943df8cd9336077b8daf2975c.dir - size: 163552 + md5: c1b613aa06be5589de825ca877ff66c4.dir + size: 163496 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 8585e7f26fa0008dcc0074996a51a78d.dir - size: 18062621 + md5: 240ceda2ba6ed38022d4ddf3271bccd0.dir + size: 10408173 nfiles: 2 params: configs/settings.yaml: @@ -211,8 +202,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 752659c808d2bf0f176a0bf1ad7088a1 - size: 223 + md5: 08ea486b800e624059374ff5bef107d0 + size: 218 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: