From a99501abe0a3701e8602b092336b29e1ae94dcc7 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 9 Jan 2026 10:39:36 +0000 Subject: [PATCH] initial model for heat baseline --- .../pipeline/configs/post_prediction_logic.py | 2 +- .../src/pipeline/configs/settings.yaml | 150 ++++++++++++++-- modules/ml-pipeline/src/pipeline/dvc.lock | 160 +++++++++++++++--- .../version_control/requirements.txt | 6 +- 4 files changed, 278 insertions(+), 40 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py index f23f88d..4af6108 100644 --- a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py @@ -32,6 +32,6 @@ def clip_predictions_to_minimum_value( post_prediction_logic = { - "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value, + # "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value, # "round_predictions": round_predictions } diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index f1175b1..fda7da1 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -12,8 +12,7 @@ default: AWS_ACCESS_KEY_ID: minio AWS_SECRET_ACCESS_KEY: minio123 ENDPOINT_URL: http://localhost:9000 - local: - null + local: null prepare_data: input_dataclient_type: aws-s3 @@ -26,20 +25,149 @@ default: output_test_filepath: ./data/prepared_data/test.parquet sample_test_filepath: ./data/prepared_data/sample_test.parquet - feature_processor: feature_processor_type: dataframe feature_processor_config: subsample_amount: null subsample_seed: 0 - target: heat_demand_ending + target: heat_demand_starting identifier_columns: ["uprn"] - drop_columns: [ - "heat_demand_change", "carbon_change", "rdsap_change", "sap_ending", "carbon_ending", "days_to_starting", "days_to_ending", - 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', - 'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting', - 'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending', - 'floor_thermal_transmittance', 'floor_thermal_transmittance_ending', 'lodgement_date_starting', 'lodgement_date_ending',] + drop_columns: + [ + "heat_demand_ending", + "potential_energy_efficiency", + "environment_impact_potential", + "energy_consumption_potential", + "co2_emissions_potential", + "heat_demand_change", + "carbon_change", + "rdsap_change", + "sap_starting", + "sap_ending", + "carbon_starting", + "carbon_ending", + "days_to_starting", + "days_to_ending", + "number_habitable_rooms_starting", + "number_habitable_rooms_ending", + "number_heated_rooms_starting", + "number_heated_rooms_ending", + "number_habitable_rooms", + "number_heated_rooms", + "lighting_cost_starting", + "lighting_cost_ending", + "heating_cost_starting", + "heating_cost_ending", + "hot_water_cost_starting", + "hot_water_cost_ending", + "floor_thermal_transmittance", + "floor_thermal_transmittance_ending", + "lodgement_date_starting", + "lodgement_date_ending", + "walls_thermal_transmittance_ending", + "walls_thermal_transmittance_unit_ending", + "is_filled_cavity_ending", + "is_as_built_ending", + "walls_is_assumed_ending", + "is_park_home_ending", + "walls_insulation_thickness_ending", + "external_insulation_ending", + "internal_insulation_ending", + "floor_insulation_thickness_ending", + "roof_thermal_transmittance_ending", + "is_at_rafters_ending", + "roof_insulation_thickness_ending", + "heater_type_ending", + "system_type_ending", + "thermostat_characteristics_ending", + "heating_scope_ending", + "energy_recovery_ending", + "hotwater_tariff_type_ending", + "extra_features_ending", + "chp_systems_ending", + "distribution_system_ending", + "no_system_present_ending", + "appliance_ending", + "has_radiators_ending", + "has_fan_coil_units_ending", + "has_pipes_in_screed_above_insulation_ending", + "has_pipes_in_insulated_timber_floor_ending", + "has_pipes_in_concrete_slab_ending", + "has_boiler_ending", + "has_air_source_heat_pump_ending", + "has_room_heaters_ending", + "has_electric_storage_heaters_ending", + "has_warm_air_ending", + "has_electric_underfloor_heating_ending", + "has_electric_ceiling_heating_ending", + "has_community_scheme_ending", + "has_ground_source_heat_pump_ending", + "has_no_system_present_ending", + "has_portable_electric_heaters_ending", + "has_water_source_heat_pump_ending", + "has_electric_heat_pump_ending", + "has_micro-cogeneration_ending", + "has_solar_assisted_heat_pump_ending", + "has_exhaust_source_heat_pump_ending", + "has_community_heat_pump_ending", + "has_hot-water-only_ending", + "has_electric_ending", + "has_mains_gas_ending", + "has_wood_logs_ending", + "has_coal_ending", + "has_oil_ending", + "has_wood_pellets_ending", + "has_anthracite_ending", + "has_dual_fuel_mineral_and_wood_ending", + "has_smokeless_fuel_ending", + "has_lpg_ending", + "has_b30k_ending", + "has_mineral_and_wood_ending", + "has_dual_fuel_appliance_ending", + "has_electricaire_ending", + "has_assumed_for_most_rooms_ending", + "has_underfloor_heating_ending", + "thermostatic_control_ending", + "charging_system_ending", + "switch_system_ending", + "no_control_ending", + "dhw_control_ending", + "community_heating_ending", + "multiple_room_thermostats_ending", + "auxiliary_systems_ending", + "trvs_ending", + "rate_control_ending", + "glazing_type_ending", + "fuel_type_ending", + "main-fuel_tariff_type_ending", + "is_community_ending", + "no_individual_heating_or_community_network_ending", + "complex_fuel_type_ending", + "mechanical_ventilation_ending", + "secondheat_description_ending", + "glazed_type_ending", + "multi_glaze_proportion_ending", + "low_energy_lighting_ending", + "number_open_fireplaces_ending", + "solar_water_heating_flag_ending", + "photo_supply_ending", + "transaction_type_ending", + "energy_tariff_ending", + "extension_count_ending", + "total_floor_area_ending", + "floor_height_ending", + "hot_water_energy_eff_ending", + "floor_energy_eff_ending", + "windows_energy_eff_ending", + "walls_energy_eff_ending", + "sheating_energy_eff_ending", + "roof_energy_eff_ending", + "mainheat_energy_eff_ending", + "mainheatc_energy_eff_ending", + "lighting_energy_eff_ending", + "is_post_sap10_ending", + "estimated_perimeter_ending", + ] # retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"] retain_features: null # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', @@ -81,4 +209,4 @@ default: dev: generate_predictions: - input_dataclient_type: aws-s3 + input_dataclient_type: aws-s3 diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index bf947eb..0060071 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -21,10 +21,17 @@ stages: params: configs/settings.yaml: default.feature_processor.feature_processor_config.drop_columns: + - heat_demand_ending + - potential_energy_efficiency + - environment_impact_potential + - energy_consumption_potential + - co2_emissions_potential - heat_demand_change - carbon_change - rdsap_change + - sap_starting - sap_ending + - carbon_starting - carbon_ending - days_to_starting - days_to_ending @@ -44,11 +51,114 @@ stages: - floor_thermal_transmittance_ending - lodgement_date_starting - lodgement_date_ending + - walls_thermal_transmittance_ending + - walls_thermal_transmittance_unit_ending + - is_filled_cavity_ending + - is_as_built_ending + - walls_is_assumed_ending + - is_park_home_ending + - walls_insulation_thickness_ending + - external_insulation_ending + - internal_insulation_ending + - floor_insulation_thickness_ending + - roof_thermal_transmittance_ending + - is_at_rafters_ending + - roof_insulation_thickness_ending + - heater_type_ending + - system_type_ending + - thermostat_characteristics_ending + - heating_scope_ending + - energy_recovery_ending + - hotwater_tariff_type_ending + - extra_features_ending + - chp_systems_ending + - distribution_system_ending + - no_system_present_ending + - appliance_ending + - has_radiators_ending + - has_fan_coil_units_ending + - has_pipes_in_screed_above_insulation_ending + - has_pipes_in_insulated_timber_floor_ending + - has_pipes_in_concrete_slab_ending + - has_boiler_ending + - has_air_source_heat_pump_ending + - has_room_heaters_ending + - has_electric_storage_heaters_ending + - has_warm_air_ending + - has_electric_underfloor_heating_ending + - has_electric_ceiling_heating_ending + - has_community_scheme_ending + - has_ground_source_heat_pump_ending + - has_no_system_present_ending + - has_portable_electric_heaters_ending + - has_water_source_heat_pump_ending + - has_electric_heat_pump_ending + - has_micro-cogeneration_ending + - has_solar_assisted_heat_pump_ending + - has_exhaust_source_heat_pump_ending + - has_community_heat_pump_ending + - has_hot-water-only_ending + - has_electric_ending + - has_mains_gas_ending + - has_wood_logs_ending + - has_coal_ending + - has_oil_ending + - has_wood_pellets_ending + - has_anthracite_ending + - has_dual_fuel_mineral_and_wood_ending + - has_smokeless_fuel_ending + - has_lpg_ending + - has_b30k_ending + - has_mineral_and_wood_ending + - has_dual_fuel_appliance_ending + - has_electricaire_ending + - has_assumed_for_most_rooms_ending + - has_underfloor_heating_ending + - thermostatic_control_ending + - charging_system_ending + - switch_system_ending + - no_control_ending + - dhw_control_ending + - community_heating_ending + - multiple_room_thermostats_ending + - auxiliary_systems_ending + - trvs_ending + - rate_control_ending + - glazing_type_ending + - fuel_type_ending + - main-fuel_tariff_type_ending + - is_community_ending + - no_individual_heating_or_community_network_ending + - complex_fuel_type_ending + - mechanical_ventilation_ending + - secondheat_description_ending + - glazed_type_ending + - multi_glaze_proportion_ending + - low_energy_lighting_ending + - number_open_fireplaces_ending + - solar_water_heating_flag_ending + - photo_supply_ending + - transaction_type_ending + - energy_tariff_ending + - extension_count_ending + - total_floor_area_ending + - floor_height_ending + - hot_water_energy_eff_ending + - floor_energy_eff_ending + - windows_energy_eff_ending + - walls_energy_eff_ending + - sheating_energy_eff_ending + - roof_energy_eff_ending + - mainheat_energy_eff_ending + - mainheatc_energy_eff_ending + - lighting_energy_eff_ending + - is_post_sap10_ending + - estimated_perimeter_ending default.feature_processor.feature_processor_config.retain_features: default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 default.feature_processor.feature_processor_config.target: - heat_demand_ending + heat_demand_starting default.feature_processor.feature_processor_type: dataframe default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet @@ -62,8 +172,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: e3bfd536e80a5e0289eb72d424b621d4.dir - size: 37960889 + md5: c293fbc1658af932f0d09cdce25acf67.dir + size: 21779190 nfiles: 3 build_model: cmd: python 2_build_model.py @@ -74,8 +184,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: e3bfd536e80a5e0289eb72d424b621d4.dir - size: 37960889 + md5: c293fbc1658af932f0d09cdce25acf67.dir + size: 21779190 nfiles: 3 params: configs/build_model.yaml: @@ -176,18 +286,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 203a83038aa79f61feaa8e7b036ec12c.dir - size: 3008451 + md5: 6c4de55effeb468e37ee3db3838109db.dir + size: 2976628 nfiles: 1 - path: data/model/ hash: md5 - md5: 42b9588e11bbe599aea65e7560f1d217.dir - size: 780134010 - nfiles: 32 + md5: 2ff63da0312853b1fd9338cac62ba0b0.dir + size: 592460869 + nfiles: 31 - path: metrics/fit_metrics.json hash: md5 - md5: da7294ddfffd1a3613a731ff2685d814 - size: 221 + md5: c00465e99e9368afdb3302a52fca99b9 + size: 223 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -197,13 +307,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 42b9588e11bbe599aea65e7560f1d217.dir - size: 780134010 - nfiles: 32 + md5: 2ff63da0312853b1fd9338cac62ba0b0.dir + size: 592460869 + nfiles: 31 - path: data/prepared_data hash: md5 - md5: e3bfd536e80a5e0289eb72d424b621d4.dir - size: 37960889 + md5: c293fbc1658af932f0d09cdce25acf67.dir + size: 21779190 nfiles: 3 params: configs/settings.yaml: @@ -217,8 +327,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 256e8a11b0d6ab414f97b89d4658dea3.dir - size: 406659 + md5: a960cadf88d5f38cc55942781a2db51e.dir + size: 392728 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -229,13 +339,13 @@ stages: size: 3447 - path: data/predictions hash: md5 - md5: 256e8a11b0d6ab414f97b89d4658dea3.dir - size: 406659 + md5: a960cadf88d5f38cc55942781a2db51e.dir + size: 392728 nfiles: 1 - path: data/prepared_data hash: md5 - md5: e3bfd536e80a5e0289eb72d424b621d4.dir - size: 37960889 + md5: c293fbc1658af932f0d09cdce25acf67.dir + size: 21779190 nfiles: 3 params: configs/settings.yaml: @@ -245,8 +355,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 559d598d06a40bcc337e9f9bf1c45edf - size: 221 + md5: c0241381a23b29831b18be3f063f75fd + size: 218 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: diff --git a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt index fa93d82..4591668 100644 --- a/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt +++ b/modules/ml-pipeline/src/pipeline/requirements/version_control/requirements.txt @@ -1,4 +1,4 @@ -dvc==3.51.0 -dvc-s3==3.2.0 +dvc==3.66.0 +dvc-s3==3.2.2 gto==1.9.0 -pyOpenSSL==23.3.0 +pyOpenSSL==23.3.0 \ No newline at end of file