add dvc pipeline to scripts

This commit is contained in:
Michael Duong 2023-09-09 20:02:08 +00:00
parent 24dc6e43a5
commit e35462cc22
7 changed files with 149 additions and 8 deletions

View file

@ -96,7 +96,7 @@ if __name__ == "__main__":
dataclient=dataclient,
model=model,
target=build_model_params["target"],
model_save_location=build_model_params["model_save_location"],
model_save_location=build_model_params["model_save_filepath"],
model_hyperparameters=build_model_params[model_type],
train_filepath=prepare_data_params["output_train_filepath"],
test_filepath=prepare_data_params["output_test_filepath"],

View file

@ -1,9 +1,6 @@
model_type: SKLearnLinearRegression
train_location: ./data/prepared_data/train.parquet
target: target
test_location: ./data/prepared_data/test.parquet
model_save_location: ./data/model/model.joblib
model_save_filepath: ./data/model/model.joblib
SKLearnLinearRegression: null

View file

@ -0,0 +1,3 @@
/prepared_data
/model
/predictions

View file

@ -0,0 +1,99 @@
schema: '2.0'
stages:
prepare_data:
cmd: python prepare_data.py
deps:
- path: prepare_data.py
hash: md5
md5: 113f292aa8fa1ecec56b21cfc7f657a9
size: 3623
params:
configs/prepare_data.yaml:
output_test_filepath: ./data/prepared_data/test.parquet
output_train_filepath: ./data/prepared_data/train.parquet
train_proportion: 0.8
outs:
- path: data/prepared_data/
hash: md5
md5: 8268b5117320d2589594a0eda859c5e5.dir
size: 36337
nfiles: 2
build_model:
cmd: python build_model.py
deps:
- path: build_model.py
hash: md5
md5: 43ff6a4781efacff4234fe261022a5dd
size: 3576
- path: data/prepared_data
hash: md5
md5: 8268b5117320d2589594a0eda859c5e5.dir
size: 36337
nfiles: 2
params:
configs/build_model.yaml:
SKLearnLinearRegression:
SKLearnSVMRegression:
kernel: linear
model_save_filepath: ./data/model/model.joblib
model_type: SKLearnLinearRegression
target: target
outs:
- path: data/model/
hash: md5
md5: 85ed2d0d4f179e038b8ffd296b86f630.dir
size: 1096
nfiles: 1
generate_predictions:
cmd: python generate_predictions.py
deps:
- path: data/model
hash: md5
md5: 85ed2d0d4f179e038b8ffd296b86f630.dir
size: 1096
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 8268b5117320d2589594a0eda859c5e5.dir
size: 36337
nfiles: 2
- path: generate_predictions.py
hash: md5
md5: 209fe6efbebfd3d7aa1a1bb27885d3c1
size: 3114
params:
configs/generate_predictions.yaml:
predictions_output_filepath: ./data/predictions/predictions.parquet
test_data_filepath: ./data/prepared_data/test.parquet
outs:
- path: data/predictions/
hash: md5
md5: 7bb333329935cc66390475a3ad6deaf9.dir
size: 2531
nfiles: 1
generate_metrics:
cmd: python generate_metrics.py
deps:
- path: data/predictions
hash: md5
md5: 7bb333329935cc66390475a3ad6deaf9.dir
size: 2531
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 8268b5117320d2589594a0eda859c5e5.dir
size: 36337
nfiles: 2
- path: generate_metrics.py
hash: md5
md5: 4c1379bf37f5e5ad5843eb5b5a22ebc5
size: 3407
params:
configs/generate_metrics.yaml:
metrics_output_filepath: ./metrics/metrics.json
metrics_type: Regression
outs:
- path: metrics/metrics.json
hash: md5
md5: bf7ed6a9b378b42fb3d7b6d16c76655f
size: 183

View file

@ -0,0 +1,41 @@
stages:
prepare_data:
cmd: python prepare_data.py
deps:
- prepare_data.py
params:
- configs/prepare_data.yaml:
- output_test_filepath
- output_train_filepath
- train_proportion
outs:
- data/prepared_data/
build_model:
cmd: python build_model.py
deps:
- build_model.py
- data/prepared_data
params:
- configs/build_model.yaml:
outs:
- data/model/
generate_predictions:
cmd: python generate_predictions.py
deps:
- generate_predictions.py
- data/prepared_data
- data/model
params:
- configs/generate_predictions.yaml:
outs:
- data/predictions/
generate_metrics:
cmd: python generate_metrics.py
deps:
- generate_metrics.py
- data/prepared_data
- data/predictions
params:
- configs/generate_metrics.yaml:
outs:
- metrics/metrics.json

View file

@ -34,7 +34,7 @@ def generate_predictions(
dataclient: DataClient,
model: MLModel,
target: str,
model_location: str,
model_filepath: str,
test_data_filepath: str,
predictions_output_filepath: str,
):
@ -53,7 +53,7 @@ def generate_predictions(
logger.info("--- Loading model ---")
logger.info("---------------------")
model.load_model(model_location)
model.load_model(model_filepath)
logger.info("------------------------------")
logger.info("--- Generating predictions ---")
@ -93,7 +93,7 @@ if __name__ == "__main__":
dataclient=dataclient,
model=model,
target=build_model_params["target"],
model_location=build_model_params["model_save_location"],
model_filepath=build_model_params["model_save_filepath"],
test_data_filepath=generate_predictions_params["test_data_filepath"],
predictions_output_filepath=generate_predictions_params[
"predictions_output_filepath"

View file

@ -0,0 +1 @@
/metrics.json