new data location

This commit is contained in:
Michael Duong 2023-09-15 18:36:33 +01:00
parent 573f0e2182
commit eb1f6ecaf2
5 changed files with 32 additions and 28 deletions

View file

@ -1,5 +1,5 @@
model_type: SKLearnLinearRegression
model_save_filepath: ./data/model/model.joblib
model_type: AutogluonAutoML
model_save_filepath: ./data/model/autogluonmodel/
SKLearnLinearRegression: null

View file

@ -3,6 +3,6 @@ feature_processor_config:
subsample_amount: null
subsample_seed: 0
target: RDSAP_CHANGE
drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE"]
retain_features: ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
# retain_features: null
drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE"]
# retain_features: ["TOTAL_FLOOR_AREA_STARTING", "SAP_STARTING", "TOTAL_FLOOR_AREA_ENDING"]
retain_features: null

View file

@ -1,7 +1,7 @@
input_dataclient_type: aws-s3
output_dataclient_type: local
datahandler_type: parquet
data_filepath: s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/train_validation_data.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
train_proportion: 0.1
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet

View file

@ -15,8 +15,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
build_model:
cmd: python build_model.py
@ -27,8 +27,8 @@ stages:
size: 3948
- path: data/prepared_data
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
params:
configs/build_model.yaml:
@ -42,26 +42,26 @@ stages:
SKLearnLinearRegression:
SKLearnSVMRegression:
kernel: linear
model_save_filepath: ./data/model/model.joblib
model_type: SKLearnLinearRegression
model_save_filepath: ./data/model/autogluonmodel/
model_type: AutogluonAutoML
outs:
- path: data/model/
hash: md5
md5: 1d4bc40f23a6866c8daa9f2f5b639d67.dir
size: 904
nfiles: 1
md5: 154f823d56a9892948a633789d9b08a5.dir
size: 680552724
nfiles: 18
generate_predictions:
cmd: python generate_predictions.py
deps:
- path: data/model
hash: md5
md5: 1d4bc40f23a6866c8daa9f2f5b639d67.dir
size: 904
nfiles: 1
md5: 154f823d56a9892948a633789d9b08a5.dir
size: 680552724
nfiles: 18
- path: data/prepared_data
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
- path: generate_predictions.py
hash: md5
@ -77,21 +77,21 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: ea0431b600f0ef357de3a543482cefe7.dir
size: 4085105
md5: d8abefde18d78588158ef6acf282e2ed.dir
size: 2948553
nfiles: 1
generate_metrics:
cmd: python generate_metrics.py
deps:
- path: data/predictions
hash: md5
md5: ea0431b600f0ef357de3a543482cefe7.dir
size: 4085105
md5: d8abefde18d78588158ef6acf282e2ed.dir
size: 2948553
nfiles: 1
- path: data/prepared_data
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
- path: generate_metrics.py
hash: md5
@ -107,8 +107,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: ae53c4781cb8a754d24e29ba7ddb16ea
size: 183
md5: f5aaae75ea74241500cd1ce76751c579
size: 182
startup_cleanup:
cmd: python startup_cleanup.py
deps:

View file

@ -19,6 +19,7 @@ stages:
- train_proportion
outs:
- data/prepared_data/
always_changed: true
build_model:
cmd: python build_model.py
deps:
@ -28,6 +29,7 @@ stages:
- configs/build_model.yaml:
outs:
- data/model/
always_changed: true
generate_predictions:
cmd: python generate_predictions.py
deps:
@ -38,6 +40,7 @@ stages:
- configs/generate_predictions.yaml:
outs:
- data/predictions/
always_changed: true
generate_metrics:
cmd: python generate_metrics.py
deps:
@ -48,5 +51,6 @@ stages:
- configs/generate_metrics.yaml:
outs:
- metrics/metrics.json
always_changed: true
metrics:
- metrics/metrics.json