mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
test model with all permutation and zero records
This commit is contained in:
parent
ca2a3d3623
commit
d356fbfed0
3 changed files with 28 additions and 22 deletions
|
|
@ -40,6 +40,11 @@ def keep_flats(df):
|
|||
return df
|
||||
|
||||
|
||||
def keep_non_zero_rdsap(df):
|
||||
df = df[df["rdsap_change"] != 0]
|
||||
return df
|
||||
|
||||
|
||||
# def keep_ending_columns(df):
|
||||
# ending_column_index = [ col_name.endswith("_ENDING") for col_name in list(df.columns)]
|
||||
# keep_columns = df.columns[ending_column_index].to_list()
|
||||
|
|
@ -49,6 +54,7 @@ def keep_flats(df):
|
|||
# return df
|
||||
|
||||
business_logic = {
|
||||
"keep_non_zero_rdsap": keep_non_zero_rdsap,
|
||||
# "keep_flats": keep_flats,
|
||||
# "remove_minimum_habitable_room_size": remove_minimum_habitable_room_size,
|
||||
# "remove_floor_height_ending": remove_floor_height_ending
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ default:
|
|||
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_without_differencing.parquet
|
||||
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset_test.parquet
|
||||
# data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
|
||||
data_filepath: s3://retrofit-datalake-dev/dataset_with0.parquet
|
||||
data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
|
||||
train_proportion: 0.9
|
||||
output_train_filepath: ./data/prepared_data/train.parquet
|
||||
output_test_filepath: ./data/prepared_data/test.parquet
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ stages:
|
|||
default.feature_processor.feature_processor_config.subsample_seed: 0
|
||||
default.feature_processor.feature_processor_config.target: sap_ending
|
||||
default.feature_processor.feature_processor_type: dataframe
|
||||
default.prepare_data.data_filepath: s3://retrofit-datalake-dev/dataset_with0.parquet
|
||||
default.prepare_data.data_filepath: s3://retrofit-datalake-dev/dataset_with0perm_all.parquet
|
||||
default.prepare_data.input_dataclient_type: aws-s3
|
||||
default.prepare_data.output_dataclient_type: local
|
||||
default.prepare_data.output_test_filepath: ./data/prepared_data/test.parquet
|
||||
|
|
@ -29,8 +29,8 @@ stages:
|
|||
outs:
|
||||
- path: data/prepared_data/
|
||||
hash: md5
|
||||
md5: 12b7939c38b6a201063b063ed64d521b.dir
|
||||
size: 39840424
|
||||
md5: 312d09b682ce0c973eabcec40e2741fe.dir
|
||||
size: 39832060
|
||||
nfiles: 2
|
||||
build_model:
|
||||
cmd: python 2_build_model.py
|
||||
|
|
@ -41,8 +41,8 @@ stages:
|
|||
size: 4149
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 12b7939c38b6a201063b063ed64d521b.dir
|
||||
size: 39840424
|
||||
md5: 312d09b682ce0c973eabcec40e2741fe.dir
|
||||
size: 39832060
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/build_model.yaml:
|
||||
|
|
@ -68,13 +68,13 @@ stages:
|
|||
outs:
|
||||
- path: data/model/
|
||||
hash: md5
|
||||
md5: 7d062363a9de5a659df638de1541d9ee.dir
|
||||
size: 383515358
|
||||
md5: 7708d5705a2db2d621dae73338a641ae.dir
|
||||
size: 393761847
|
||||
nfiles: 26
|
||||
- path: metrics/fit_metrics.json
|
||||
hash: md5
|
||||
md5: 06c50da7ca7fdb631896790b76a5e19d
|
||||
size: 223
|
||||
md5: f7c3a5d39644d41cf60872baad7797b2
|
||||
size: 222
|
||||
generate_predictions:
|
||||
cmd: python 3_generate_predictions.py
|
||||
deps:
|
||||
|
|
@ -84,13 +84,13 @@ stages:
|
|||
size: 2464
|
||||
- path: data/model
|
||||
hash: md5
|
||||
md5: 7d062363a9de5a659df638de1541d9ee.dir
|
||||
size: 383515358
|
||||
md5: 7708d5705a2db2d621dae73338a641ae.dir
|
||||
size: 393761847
|
||||
nfiles: 26
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 12b7939c38b6a201063b063ed64d521b.dir
|
||||
size: 39840424
|
||||
md5: 312d09b682ce0c973eabcec40e2741fe.dir
|
||||
size: 39832060
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
|
|
@ -102,8 +102,8 @@ stages:
|
|||
outs:
|
||||
- path: data/predictions/
|
||||
hash: md5
|
||||
md5: d6c97ad17146677fe705ccd7bcbb4873.dir
|
||||
size: 369475
|
||||
md5: dade2114bb2be2769cf0648b8046f705.dir
|
||||
size: 369115
|
||||
nfiles: 1
|
||||
generate_metrics:
|
||||
cmd: python 4_generate_metrics.py
|
||||
|
|
@ -114,13 +114,13 @@ stages:
|
|||
size: 3484
|
||||
- path: data/predictions
|
||||
hash: md5
|
||||
md5: d6c97ad17146677fe705ccd7bcbb4873.dir
|
||||
size: 369475
|
||||
md5: dade2114bb2be2769cf0648b8046f705.dir
|
||||
size: 369115
|
||||
nfiles: 1
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 12b7939c38b6a201063b063ed64d521b.dir
|
||||
size: 39840424
|
||||
md5: 312d09b682ce0c973eabcec40e2741fe.dir
|
||||
size: 39832060
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/settings.yaml:
|
||||
|
|
@ -130,8 +130,8 @@ stages:
|
|||
outs:
|
||||
- path: metrics/metrics.json
|
||||
hash: md5
|
||||
md5: 6bb037ff29c7119576c8818b395d32f6
|
||||
size: 225
|
||||
md5: 3315792b9f7e6f55d59a39db03ee7093
|
||||
size: 222
|
||||
startup_cleanup:
|
||||
cmd: python 0_startup_cleanup.py
|
||||
deps:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue