Merge pull request #138 from Hestia-Homes/hotwaterkwh-dev-model

Hotwaterkwh dev model
This commit is contained in:
KhalimCK 2024-08-06 12:37:54 +01:00 committed by GitHub
commit bf39378e0d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 153 additions and 61 deletions

View file

@ -17,14 +17,15 @@ Within `src` folder, the structure is as follows:
# How to develop using this pipeline:
Run `make init`, which will:
- Download pyenv (Python version management)
- Download Python 3.X.X as defined in the `make` file - current 3.10.12
- Create a virtual environment with this version of python
First, download miniconda to use conda to manage Python Environments
Rund `conda init`, to initialise your terminal
Change to this directory and run `make init`, which will:
- Create a conda virtual environment with this version of python - current 3.10.12
- Install packages in the training and version control directories in the pipeline folder (dev version if applicable)
- Install pre-commit to enable pre-commit hooks
To use the environment, run `source .dev_env_pipeline/bin/activate`.
To use the environment, run `conda activate dev_env_pipeline`
To enable the virtual envrionemnt created in vscode:
- Open settings

View file

@ -18,12 +18,7 @@ default:
prepare_data:
input_dataclient_type: aws-s3
output_dataclient_type: local
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-03-22-18-56-53/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-25-08-36-36/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-26-10-31-39/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-05-28-19-08-25/dataset_rooms.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/2024-07-03-23-11-39/dataset_rooms.parquet
data_filepath: s3://retrofit-data-dev/energy_consumption/2024-07-08/energy_consumption_dataset.parquet
data_filepath: s3://retrofit-data-dev/energy_consumption/2024-07-25/energy_consumption_dataset.parquet
train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet
@ -35,32 +30,75 @@ default:
subsample_seed: 0
target: hot_water_kwh
identifier_columns: ["uprn"]
# drop_columns: ["heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending", "days_to_starting", "days_to_ending"]
drop_columns: ["heating_kwh"]
retain_features: null
# retain_features: ['uprn', 'sap_starting', 'hot_water_energy_eff_ending',
# 'mainheat_energy_eff_ending', 'constituency', 'roof_energy_eff_ending',
# 'walls_energy_eff_ending', 'secondheat_description_ending',
# 'property_type', 'mainheatc_energy_eff_ending', 'built_form',
# 'walls_insulation_thickness_ending', 'potential_energy_efficiency',
# 'transaction_type_ending',
# 'floor_thermal_transmittance_ending',
# 'low_energy_lighting_ending', 'heat_demand_starting',
# 'photo_supply_ending', 'carbon_starting',
# 'walls_thermal_transmittance_ending',
# 'roof_insulation_thickness_ending',
# 'total_floor_area_ending', 'number_open_fireplaces_ending',
# 'windows_energy_eff_ending',
# 'floor_height_ending',
# 'extension_count_ending',
# 'has_air_source_heat_pump_ending',
# 'charging_system_ending', 'construction_age_band', 'glazed_type_ending',
# 'roof_thermal_transmittance_ending',
# 'floor_insulation_thickness_ending', 'has_mains_gas_ending',
# 'estimated_perimeter_starting', 'energy_consumption_potential',
# 'environment_impact_potential', 'heater_type_ending',
# 'multi_glaze_proportion_ending',
# 'lighting_energy_eff_ending', 'fixed_lighting_outlets_count']
retain_features: [
'uprn',
'heating-cost-current',
'co2-emissions-current',
'hot-water-cost-current',
'total-floor-area',
'secondheat-description',
'environment-impact-current',
'floor-description',
'mainheat-energy-eff',
'current-energy-efficiency',
'mainheat-env-eff',
'walls-energy-eff',
'roof-energy-eff',
'property-type',
'mainheat-description',
'hot-water-env-eff',
'mechanical-ventilation',
'floor-level',
'built-form',
'walls-description',
'mainheatcont-description',
'roof-description',
'energy-consumption-current',
'construction-age-band',
'hotwater-description',
# 'lodgement-datetime',
'main-fuel',
'hot-water-energy-eff',
'co2-emiss-curr-per-floor-area',
'windows-energy-eff',
'current-energy-rating',
'lodgement-year',
'extension-count',
'number-open-fireplaces',
'number-heated-rooms',
# 'lodgement-date',
# 'number-habitable-rooms',
'windows-description',
# 'local-authority',
'photo-supply',
'heat-loss-corridor',
# 'posttown',
# 'address',
'flat-top-storey',
'unheated-corridor-length',
'fixed-lighting-outlets-count',
# 'inspection-date',
'tenure',
# 'county',
# 'constituency-label',
'multi-glaze-proportion',
'solar-water-heating-flag',
# 'address2',
'energy-tariff',
'floor-height',
'constituency',
# 'uprn-source',
'transaction-type',
'floor-energy-eff',
# 'postcode',
'lodgement-month',
'lighting-cost-current',
'glazed-area',
# 'address1',
'floor-env-eff',
'main-heating-controls'
]
generate_predictions:
input_dataclient_type: local

View file

@ -23,12 +23,65 @@ stages:
default.feature_processor.feature_processor_config.drop_columns:
- heating_kwh
default.feature_processor.feature_processor_config.retain_features:
- uprn
- heating-cost-current
- co2-emissions-current
- hot-water-cost-current
- total-floor-area
- secondheat-description
- environment-impact-current
- floor-description
- mainheat-energy-eff
- current-energy-efficiency
- mainheat-env-eff
- walls-energy-eff
- roof-energy-eff
- property-type
- mainheat-description
- hot-water-env-eff
- mechanical-ventilation
- floor-level
- built-form
- walls-description
- mainheatcont-description
- roof-description
- energy-consumption-current
- construction-age-band
- hotwater-description
- main-fuel
- hot-water-energy-eff
- co2-emiss-curr-per-floor-area
- windows-energy-eff
- current-energy-rating
- lodgement-year
- extension-count
- number-open-fireplaces
- number-heated-rooms
- windows-description
- photo-supply
- heat-loss-corridor
- flat-top-storey
- unheated-corridor-length
- fixed-lighting-outlets-count
- tenure
- multi-glaze-proportion
- solar-water-heating-flag
- energy-tariff
- floor-height
- constituency
- transaction-type
- floor-energy-eff
- lodgement-month
- lighting-cost-current
- glazed-area
- floor-env-eff
- main-heating-controls
default.feature_processor.feature_processor_config.subsample_amount:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: hot_water_kwh
default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath:
s3://retrofit-data-dev/energy_consumption/2024-07-08/energy_consumption_dataset.parquet
s3://retrofit-data-dev/energy_consumption/2024-07-25/energy_consumption_dataset.parquet
default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
@ -37,8 +90,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: 322c8294651dea6c4db9e06157a91ffd.dir
size: 23387145
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
size: 13131853
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -49,8 +102,8 @@ stages:
size: 4820
- path: data/prepared_data
hash: md5
md5: 322c8294651dea6c4db9e06157a91ffd.dir
size: 23387145
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
size: 13131853
nfiles: 2
params:
configs/build_model.yaml:
@ -82,18 +135,18 @@ stages:
outs:
- path: data/fit_predictions/
hash: md5
md5: b149b2be5ed3105e73b02000b9912422.dir
size: 724848
md5: 3e48cec68f702bc822eed8fcb2c5c603.dir
size: 1787931
nfiles: 1
- path: data/model/
hash: md5
md5: 3fe37e27b51fe6d9472252f219fd9126.dir
size: 465478726
nfiles: 36
md5: 37f7480141e920c68faacd39478a1a68.dir
size: 451364406
nfiles: 35
- path: metrics/fit_metrics.json
hash: md5
md5: c27dcce525b763fa7c2c55820ae72727
size: 225
md5: e7a2a5efea57b1ddd1431b713d78bb11
size: 219
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -103,13 +156,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 3fe37e27b51fe6d9472252f219fd9126.dir
size: 465478726
nfiles: 36
md5: 37f7480141e920c68faacd39478a1a68.dir
size: 451364406
nfiles: 35
- path: data/prepared_data
hash: md5
md5: 322c8294651dea6c4db9e06157a91ffd.dir
size: 23387145
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
size: 13131853
nfiles: 2
params:
configs/settings.yaml:
@ -121,8 +174,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 07db4158559475e73ffb06ff95a6c869.dir
size: 77435
md5: 9267a66c6fae4da5a589faab76fac14c.dir
size: 192482
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -133,13 +186,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: 07db4158559475e73ffb06ff95a6c869.dir
size: 77435
md5: 9267a66c6fae4da5a589faab76fac14c.dir
size: 192482
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 322c8294651dea6c4db9e06157a91ffd.dir
size: 23387145
md5: 295ac4fd05a1a3373401a7318b0b5186.dir
size: 13131853
nfiles: 2
params:
configs/settings.yaml:
@ -149,8 +202,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: db8eddb1bb0b190188e25de65bdbd8e8
size: 220
md5: 3a08c29f028f5e3cb50fb8cd3608e2f4
size: 221
generate_scenerio_metrics:
cmd: python 5_generate_scenarios.py
deps: