From 637ea9d5e2eecaa39307fb0f7bf03042339682d6 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 9 Jan 2026 09:57:31 +0000 Subject: [PATCH] initial commit for carbon_baseline --- .../pipeline/configs/post_prediction_logic.py | 2 +- .../src/pipeline/configs/settings.yaml | 153 +++++++++++++++-- modules/ml-pipeline/src/pipeline/dvc.lock | 161 +++++++++++++++--- 3 files changed, 282 insertions(+), 34 deletions(-) diff --git a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py index 2ca8890..3db1c17 100644 --- a/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py +++ b/modules/ml-pipeline/src/pipeline/configs/post_prediction_logic.py @@ -31,6 +31,6 @@ def clip_predictions_to_minimum_value( post_prediction_logic = { - "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value, + # "clip_predictions_to_minimum_value": clip_predictions_to_minimum_value, # "round_predictions": round_predictions } diff --git a/modules/ml-pipeline/src/pipeline/configs/settings.yaml b/modules/ml-pipeline/src/pipeline/configs/settings.yaml index f71fc63..d068a06 100644 --- a/modules/ml-pipeline/src/pipeline/configs/settings.yaml +++ b/modules/ml-pipeline/src/pipeline/configs/settings.yaml @@ -12,8 +12,7 @@ default: AWS_ACCESS_KEY_ID: minio AWS_SECRET_ACCESS_KEY: minio123 ENDPOINT_URL: http://localhost:9000 - local: - null + local: null prepare_data: input_dataclient_type: aws-s3 @@ -31,15 +30,149 @@ default: feature_processor_config: subsample_amount: null subsample_seed: 0 - target: carbon_ending + target: carbon_starting identifier_columns: ["uprn"] # drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "sap_ending"] - drop_columns: [ - "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "sap_ending", "days_to_starting", "days_to_ending", - 'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting', 'number_heated_rooms_ending', - 'number_habitable_rooms', 'number_heated_rooms', 'lighting_cost_starting', - 'lighting_cost_ending', 'heating_cost_starting', 'heating_cost_ending', 'hot_water_cost_starting', 'hot_water_cost_ending', - 'floor_thermal_transmittance', 'floor_thermal_transmittance_ending', 'lodgement_date_starting', 'lodgement_date_ending',] + drop_columns: + [ + "carbon_ending", + "potential_energy_efficiency", + "environment_impact_potential", + "energy_consumption_potential", + "co2_emissions_potential", + "heat_demand_change", + "carbon_change", + "rdsap_change", + "heat_demand_starting", + "heat_demand_ending", + "sap_starting", + "sap_ending", + "days_to_starting", + "days_to_ending", + "number_habitable_rooms_starting", + "number_habitable_rooms_ending", + "number_heated_rooms_starting", + "number_heated_rooms_ending", + "number_habitable_rooms", + "number_heated_rooms", + "lighting_cost_starting", + "lighting_cost_ending", + "heating_cost_starting", + "heating_cost_ending", + "hot_water_cost_starting", + "hot_water_cost_ending", + "floor_thermal_transmittance", + "floor_thermal_transmittance_ending", + "lodgement_date_starting", + "lodgement_date_ending", + "potential_energy_efficiency", + "environment_impact_potential", + "energy_consumption_potential", + "co2_emissions_potential", + "walls_thermal_transmittance_ending", + "walls_thermal_transmittance_unit_ending", + "is_filled_cavity_ending", + "is_as_built_ending", + "walls_is_assumed_ending", + "is_park_home_ending", + "walls_insulation_thickness_ending", + "external_insulation_ending", + "internal_insulation_ending", + "floor_insulation_thickness_ending", + "roof_thermal_transmittance_ending", + "is_at_rafters_ending", + "roof_insulation_thickness_ending", + "heater_type_ending", + "system_type_ending", + "thermostat_characteristics_ending", + "heating_scope_ending", + "energy_recovery_ending", + "hotwater_tariff_type_ending", + "extra_features_ending", + "chp_systems_ending", + "distribution_system_ending", + "no_system_present_ending", + "appliance_ending", + "has_radiators_ending", + "has_fan_coil_units_ending", + "has_pipes_in_screed_above_insulation_ending", + "has_pipes_in_insulated_timber_floor_ending", + "has_pipes_in_concrete_slab_ending", + "has_boiler_ending", + "has_air_source_heat_pump_ending", + "has_room_heaters_ending", + "has_electric_storage_heaters_ending", + "has_warm_air_ending", + "has_electric_underfloor_heating_ending", + "has_electric_ceiling_heating_ending", + "has_community_scheme_ending", + "has_ground_source_heat_pump_ending", + "has_no_system_present_ending", + "has_portable_electric_heaters_ending", + "has_water_source_heat_pump_ending", + "has_electric_heat_pump_ending", + "has_micro-cogeneration_ending", + "has_solar_assisted_heat_pump_ending", + "has_exhaust_source_heat_pump_ending", + "has_community_heat_pump_ending", + "has_hot-water-only_ending", + "has_electric_ending", + "has_mains_gas_ending", + "has_wood_logs_ending", + "has_coal_ending", + "has_oil_ending", + "has_wood_pellets_ending", + "has_anthracite_ending", + "has_dual_fuel_mineral_and_wood_ending", + "has_smokeless_fuel_ending", + "has_lpg_ending", + "has_b30k_ending", + "has_mineral_and_wood_ending", + "has_dual_fuel_appliance_ending", + "has_electricaire_ending", + "has_assumed_for_most_rooms_ending", + "has_underfloor_heating_ending", + "thermostatic_control_ending", + "charging_system_ending", + "switch_system_ending", + "no_control_ending", + "dhw_control_ending", + "community_heating_ending", + "multiple_room_thermostats_ending", + "auxiliary_systems_ending", + "trvs_ending", + "rate_control_ending", + "glazing_type_ending", + "fuel_type_ending", + "main-fuel_tariff_type_ending", + "is_community_ending", + "no_individual_heating_or_community_network_ending", + "complex_fuel_type_ending", + "mechanical_ventilation_ending", + "secondheat_description_ending", + "glazed_type_ending", + "multi_glaze_proportion_ending", + "low_energy_lighting_ending", + "number_open_fireplaces_ending", + "solar_water_heating_flag_ending", + "photo_supply_ending", + "transaction_type_ending", + "energy_tariff_ending", + "extension_count_ending", + "total_floor_area_ending", + "floor_height_ending", + "hot_water_energy_eff_ending", + "floor_energy_eff_ending", + "windows_energy_eff_ending", + "walls_energy_eff_ending", + "sheating_energy_eff_ending", + "roof_energy_eff_ending", + "mainheat_energy_eff_ending", + "mainheatc_energy_eff_ending", + "lighting_energy_eff_ending", + "is_post_sap10_ending", + "estimated_perimeter_ending", + ] # retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"] retain_features: null # retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending', @@ -81,4 +214,4 @@ default: dev: generate_predictions: - input_dataclient_type: aws-s3 + input_dataclient_type: aws-s3 diff --git a/modules/ml-pipeline/src/pipeline/dvc.lock b/modules/ml-pipeline/src/pipeline/dvc.lock index ba1ccaf..d5ac41d 100644 --- a/modules/ml-pipeline/src/pipeline/dvc.lock +++ b/modules/ml-pipeline/src/pipeline/dvc.lock @@ -21,10 +21,17 @@ stages: params: configs/settings.yaml: default.feature_processor.feature_processor_config.drop_columns: + - carbon_ending + - potential_energy_efficiency + - environment_impact_potential + - energy_consumption_potential + - co2_emissions_potential - heat_demand_change - carbon_change - rdsap_change + - heat_demand_starting - heat_demand_ending + - sap_starting - sap_ending - days_to_starting - days_to_ending @@ -44,10 +51,118 @@ stages: - floor_thermal_transmittance_ending - lodgement_date_starting - lodgement_date_ending + - potential_energy_efficiency + - environment_impact_potential + - energy_consumption_potential + - co2_emissions_potential + - walls_thermal_transmittance_ending + - walls_thermal_transmittance_unit_ending + - is_filled_cavity_ending + - is_as_built_ending + - walls_is_assumed_ending + - is_park_home_ending + - walls_insulation_thickness_ending + - external_insulation_ending + - internal_insulation_ending + - floor_insulation_thickness_ending + - roof_thermal_transmittance_ending + - is_at_rafters_ending + - roof_insulation_thickness_ending + - heater_type_ending + - system_type_ending + - thermostat_characteristics_ending + - heating_scope_ending + - energy_recovery_ending + - hotwater_tariff_type_ending + - extra_features_ending + - chp_systems_ending + - distribution_system_ending + - no_system_present_ending + - appliance_ending + - has_radiators_ending + - has_fan_coil_units_ending + - has_pipes_in_screed_above_insulation_ending + - has_pipes_in_insulated_timber_floor_ending + - has_pipes_in_concrete_slab_ending + - has_boiler_ending + - has_air_source_heat_pump_ending + - has_room_heaters_ending + - has_electric_storage_heaters_ending + - has_warm_air_ending + - has_electric_underfloor_heating_ending + - has_electric_ceiling_heating_ending + - has_community_scheme_ending + - has_ground_source_heat_pump_ending + - has_no_system_present_ending + - has_portable_electric_heaters_ending + - has_water_source_heat_pump_ending + - has_electric_heat_pump_ending + - has_micro-cogeneration_ending + - has_solar_assisted_heat_pump_ending + - has_exhaust_source_heat_pump_ending + - has_community_heat_pump_ending + - has_hot-water-only_ending + - has_electric_ending + - has_mains_gas_ending + - has_wood_logs_ending + - has_coal_ending + - has_oil_ending + - has_wood_pellets_ending + - has_anthracite_ending + - has_dual_fuel_mineral_and_wood_ending + - has_smokeless_fuel_ending + - has_lpg_ending + - has_b30k_ending + - has_mineral_and_wood_ending + - has_dual_fuel_appliance_ending + - has_electricaire_ending + - has_assumed_for_most_rooms_ending + - has_underfloor_heating_ending + - thermostatic_control_ending + - charging_system_ending + - switch_system_ending + - no_control_ending + - dhw_control_ending + - community_heating_ending + - multiple_room_thermostats_ending + - auxiliary_systems_ending + - trvs_ending + - rate_control_ending + - glazing_type_ending + - fuel_type_ending + - main-fuel_tariff_type_ending + - is_community_ending + - no_individual_heating_or_community_network_ending + - complex_fuel_type_ending + - mechanical_ventilation_ending + - secondheat_description_ending + - glazed_type_ending + - multi_glaze_proportion_ending + - low_energy_lighting_ending + - number_open_fireplaces_ending + - solar_water_heating_flag_ending + - photo_supply_ending + - transaction_type_ending + - energy_tariff_ending + - extension_count_ending + - total_floor_area_ending + - floor_height_ending + - hot_water_energy_eff_ending + - floor_energy_eff_ending + - windows_energy_eff_ending + - walls_energy_eff_ending + - sheating_energy_eff_ending + - roof_energy_eff_ending + - mainheat_energy_eff_ending + - mainheatc_energy_eff_ending + - lighting_energy_eff_ending + - is_post_sap10_ending + - estimated_perimeter_ending default.feature_processor.feature_processor_config.retain_features: default.feature_processor.feature_processor_config.subsample_amount: default.feature_processor.feature_processor_config.subsample_seed: 0 - default.feature_processor.feature_processor_config.target: carbon_ending + default.feature_processor.feature_processor_config.target: + carbon_starting default.feature_processor.feature_processor_type: dataframe default.prepare_data.data_filepath: s3://retrofit-data-dev/sap_change_model/2025-11-02-09-32-42/dataset_rooms.parquet @@ -61,8 +176,8 @@ stages: outs: - path: data/prepared_data/ hash: md5 - md5: 219cd47a478057c6473e390611c46ba6.dir - size: 37781342 + md5: e40fba0aee32d4b7120d7cbe633040b0.dir + size: 21607047 nfiles: 3 build_model: cmd: python 2_build_model.py @@ -73,8 +188,8 @@ stages: size: 4820 - path: data/prepared_data hash: md5 - md5: 219cd47a478057c6473e390611c46ba6.dir - size: 37781342 + md5: e40fba0aee32d4b7120d7cbe633040b0.dir + size: 21607047 nfiles: 3 params: configs/build_model.yaml: @@ -175,18 +290,18 @@ stages: outs: - path: data/fit_predictions/ hash: md5 - md5: 5411b43b1a372e77f90de28b60913ae6.dir - size: 3833319 + md5: 81df2583046d6611c871df31b943ccbe.dir + size: 2733019 nfiles: 1 - path: data/model/ hash: md5 - md5: 354a0b7ea4268f1316c77257e57116fd.dir - size: 745138026 + md5: bfd74e6b160e063b0d9e1f40b3bd0e26.dir + size: 598129590 nfiles: 31 - path: metrics/fit_metrics.json hash: md5 - md5: 56bbb666b56aeca7da9436138c881948 - size: 225 + md5: e344820734a823327e3cc8c43bf2f6bc + size: 212 generate_predictions: cmd: python 3_generate_predictions.py deps: @@ -196,13 +311,13 @@ stages: size: 2464 - path: data/model hash: md5 - md5: 354a0b7ea4268f1316c77257e57116fd.dir - size: 745138026 + md5: bfd74e6b160e063b0d9e1f40b3bd0e26.dir + size: 598129590 nfiles: 31 - path: data/prepared_data hash: md5 - md5: 219cd47a478057c6473e390611c46ba6.dir - size: 37781342 + md5: e40fba0aee32d4b7120d7cbe633040b0.dir + size: 21607047 nfiles: 3 params: configs/settings.yaml: @@ -216,8 +331,8 @@ stages: outs: - path: data/predictions/ hash: md5 - md5: 8ddd3dbe13df261dbbcb57d01f75c3ba.dir - size: 532652 + md5: f4cf2a393b36cf49a0722889344a0016.dir + size: 392930 nfiles: 1 generate_metrics: cmd: python 4_generate_metrics.py @@ -228,13 +343,13 @@ stages: size: 3484 - path: data/predictions hash: md5 - md5: 8ddd3dbe13df261dbbcb57d01f75c3ba.dir - size: 532652 + md5: f4cf2a393b36cf49a0722889344a0016.dir + size: 392930 nfiles: 1 - path: data/prepared_data hash: md5 - md5: 219cd47a478057c6473e390611c46ba6.dir - size: 37781342 + md5: e40fba0aee32d4b7120d7cbe633040b0.dir + size: 21607047 nfiles: 3 params: configs/settings.yaml: @@ -244,8 +359,8 @@ stages: outs: - path: metrics/metrics.json hash: md5 - md5: 2c860657417421c757146f2dce76f444 - size: 225 + md5: 3d87652cffab141e1d5b4307761ec788 + size: 223 generate_scenerio_metrics: cmd: python 5_generate_scenarios.py deps: