test model with all permutation and zero records

This commit is contained in:
Michael Duong 2024-01-24 10:29:56 +00:00
parent ca2a3d3623
commit d356fbfed0
3 changed files with 28 additions and 22 deletions

View file

@ -40,6 +40,11 @@ def keep_flats(df):
return df
def keep_non_zero_rdsap(df):
df = df[df["rdsap_change"] != 0]
return df
# def keep_ending_columns(df):
# ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)]
# keep_columns = df.columns[ending_column_index].to_list()
@ -49,6 +54,7 @@ def keep_flats(df):
# return df
business_logic = {
"keep_non_zero_rdsap": keep_non_zero_rdsap,
# "keep_flats": keep_flats,
# "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,
# "remove_floor_height_ending": remove_floor_height_ending

View file

@ -23,7 +23,7 @@ default:
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
data_filepath: s3://retrofit-datalake-dev/dataset_with0.parquet
data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
train_proportion: 0.9
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet

View file

@ -20,7 +20,7 @@ stages:
default.feature_processor.feature_processor_config.subsample_seed: 0
default.feature_processor.feature_processor_config.target: sap_ending
default.feature_processor.feature_processor_type: dataframe
default.prepare_data.data_filepath: s3://retrofit-datalake-dev/dataset_with0.parquet
default.prepare_data.data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
default.prepare_data.input_dataclient_type: aws-s3
default.prepare_data.output_dataclient_type: local
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
@ -29,8 +29,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: 12b7939c38b6a201063b063ed64d521b.dir
size: 39840424
md5: 312d09b682ce0c973eabcec40e2741fe.dir
size: 39832060
nfiles: 2
build_model:
cmd: python 2_build_model.py
@ -41,8 +41,8 @@ stages:
size: 4149
- path: data/prepared_data
hash: md5
md5: 12b7939c38b6a201063b063ed64d521b.dir
size: 39840424
md5: 312d09b682ce0c973eabcec40e2741fe.dir
size: 39832060
nfiles: 2
params:
configs/build_model.yaml:
@ -68,13 +68,13 @@ stages:
outs:
- path: data/model/
hash: md5
md5: 7d062363a9de5a659df638de1541d9ee.dir
size: 383515358
md5: 7708d5705a2db2d621dae73338a641ae.dir
size: 393761847
nfiles: 26
- path: metrics/fit_metrics.json
hash: md5
md5: 06c50da7ca7fdb631896790b76a5e19d
size: 223
md5: f7c3a5d39644d41cf60872baad7797b2
size: 222
generate_predictions:
cmd: python 3_generate_predictions.py
deps:
@ -84,13 +84,13 @@ stages:
size: 2464
- path: data/model
hash: md5
md5: 7d062363a9de5a659df638de1541d9ee.dir
size: 383515358
md5: 7708d5705a2db2d621dae73338a641ae.dir
size: 393761847
nfiles: 26
- path: data/prepared_data
hash: md5
md5: 12b7939c38b6a201063b063ed64d521b.dir
size: 39840424
md5: 312d09b682ce0c973eabcec40e2741fe.dir
size: 39832060
nfiles: 2
params:
configs/settings.yaml:
@ -102,8 +102,8 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: d6c97ad17146677fe705ccd7bcbb4873.dir
size: 369475
md5: dade2114bb2be2769cf0648b8046f705.dir
size: 369115
nfiles: 1
generate_metrics:
cmd: python 4_generate_metrics.py
@ -114,13 +114,13 @@ stages:
size: 3484
- path: data/predictions
hash: md5
md5: d6c97ad17146677fe705ccd7bcbb4873.dir
size: 369475
md5: dade2114bb2be2769cf0648b8046f705.dir
size: 369115
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 12b7939c38b6a201063b063ed64d521b.dir
size: 39840424
md5: 312d09b682ce0c973eabcec40e2741fe.dir
size: 39832060
nfiles: 2
params:
configs/settings.yaml:
@ -130,8 +130,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 6bb037ff29c7119576c8818b395d32f6
size: 225
md5: 3315792b9f7e6f55d59a39db03ee7093
size: 222
startup_cleanup:
cmd: python 0_startup_cleanup.py
deps: