mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
add dvc pipeline to scripts
This commit is contained in:
parent
24dc6e43a5
commit
e35462cc22
7 changed files with 149 additions and 8 deletions
|
|
@ -96,7 +96,7 @@ if __name__ == "__main__":
|
|||
dataclient=dataclient,
|
||||
model=model,
|
||||
target=build_model_params["target"],
|
||||
model_save_location=build_model_params["model_save_location"],
|
||||
model_save_location=build_model_params["model_save_filepath"],
|
||||
model_hyperparameters=build_model_params[model_type],
|
||||
train_filepath=prepare_data_params["output_train_filepath"],
|
||||
test_filepath=prepare_data_params["output_test_filepath"],
|
||||
|
|
|
|||
|
|
@ -1,9 +1,6 @@
|
|||
model_type: SKLearnLinearRegression
|
||||
train_location: ./data/prepared_data/train.parquet
|
||||
target: target
|
||||
test_location: ./data/prepared_data/test.parquet
|
||||
model_save_location: ./data/model/model.joblib
|
||||
|
||||
model_save_filepath: ./data/model/model.joblib
|
||||
|
||||
SKLearnLinearRegression: null
|
||||
|
||||
|
|
|
|||
3
modules/ml-pipeline/src/pipeline/training/data/.gitignore
vendored
Normal file
3
modules/ml-pipeline/src/pipeline/training/data/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
/prepared_data
|
||||
/model
|
||||
/predictions
|
||||
99
modules/ml-pipeline/src/pipeline/training/dvc.lock
Normal file
99
modules/ml-pipeline/src/pipeline/training/dvc.lock
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
schema: '2.0'
|
||||
stages:
|
||||
prepare_data:
|
||||
cmd: python prepare_data.py
|
||||
deps:
|
||||
- path: prepare_data.py
|
||||
hash: md5
|
||||
md5: 113f292aa8fa1ecec56b21cfc7f657a9
|
||||
size: 3623
|
||||
params:
|
||||
configs/prepare_data.yaml:
|
||||
output_test_filepath: ./data/prepared_data/test.parquet
|
||||
output_train_filepath: ./data/prepared_data/train.parquet
|
||||
train_proportion: 0.8
|
||||
outs:
|
||||
- path: data/prepared_data/
|
||||
hash: md5
|
||||
md5: 8268b5117320d2589594a0eda859c5e5.dir
|
||||
size: 36337
|
||||
nfiles: 2
|
||||
build_model:
|
||||
cmd: python build_model.py
|
||||
deps:
|
||||
- path: build_model.py
|
||||
hash: md5
|
||||
md5: 43ff6a4781efacff4234fe261022a5dd
|
||||
size: 3576
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 8268b5117320d2589594a0eda859c5e5.dir
|
||||
size: 36337
|
||||
nfiles: 2
|
||||
params:
|
||||
configs/build_model.yaml:
|
||||
SKLearnLinearRegression:
|
||||
SKLearnSVMRegression:
|
||||
kernel: linear
|
||||
model_save_filepath: ./data/model/model.joblib
|
||||
model_type: SKLearnLinearRegression
|
||||
target: target
|
||||
outs:
|
||||
- path: data/model/
|
||||
hash: md5
|
||||
md5: 85ed2d0d4f179e038b8ffd296b86f630.dir
|
||||
size: 1096
|
||||
nfiles: 1
|
||||
generate_predictions:
|
||||
cmd: python generate_predictions.py
|
||||
deps:
|
||||
- path: data/model
|
||||
hash: md5
|
||||
md5: 85ed2d0d4f179e038b8ffd296b86f630.dir
|
||||
size: 1096
|
||||
nfiles: 1
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 8268b5117320d2589594a0eda859c5e5.dir
|
||||
size: 36337
|
||||
nfiles: 2
|
||||
- path: generate_predictions.py
|
||||
hash: md5
|
||||
md5: 209fe6efbebfd3d7aa1a1bb27885d3c1
|
||||
size: 3114
|
||||
params:
|
||||
configs/generate_predictions.yaml:
|
||||
predictions_output_filepath: ./data/predictions/predictions.parquet
|
||||
test_data_filepath: ./data/prepared_data/test.parquet
|
||||
outs:
|
||||
- path: data/predictions/
|
||||
hash: md5
|
||||
md5: 7bb333329935cc66390475a3ad6deaf9.dir
|
||||
size: 2531
|
||||
nfiles: 1
|
||||
generate_metrics:
|
||||
cmd: python generate_metrics.py
|
||||
deps:
|
||||
- path: data/predictions
|
||||
hash: md5
|
||||
md5: 7bb333329935cc66390475a3ad6deaf9.dir
|
||||
size: 2531
|
||||
nfiles: 1
|
||||
- path: data/prepared_data
|
||||
hash: md5
|
||||
md5: 8268b5117320d2589594a0eda859c5e5.dir
|
||||
size: 36337
|
||||
nfiles: 2
|
||||
- path: generate_metrics.py
|
||||
hash: md5
|
||||
md5: 4c1379bf37f5e5ad5843eb5b5a22ebc5
|
||||
size: 3407
|
||||
params:
|
||||
configs/generate_metrics.yaml:
|
||||
metrics_output_filepath: ./metrics/metrics.json
|
||||
metrics_type: Regression
|
||||
outs:
|
||||
- path: metrics/metrics.json
|
||||
hash: md5
|
||||
md5: bf7ed6a9b378b42fb3d7b6d16c76655f
|
||||
size: 183
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
stages:
|
||||
prepare_data:
|
||||
cmd: python prepare_data.py
|
||||
deps:
|
||||
- prepare_data.py
|
||||
params:
|
||||
- configs/prepare_data.yaml:
|
||||
- output_test_filepath
|
||||
- output_train_filepath
|
||||
- train_proportion
|
||||
outs:
|
||||
- data/prepared_data/
|
||||
build_model:
|
||||
cmd: python build_model.py
|
||||
deps:
|
||||
- build_model.py
|
||||
- data/prepared_data
|
||||
params:
|
||||
- configs/build_model.yaml:
|
||||
outs:
|
||||
- data/model/
|
||||
generate_predictions:
|
||||
cmd: python generate_predictions.py
|
||||
deps:
|
||||
- generate_predictions.py
|
||||
- data/prepared_data
|
||||
- data/model
|
||||
params:
|
||||
- configs/generate_predictions.yaml:
|
||||
outs:
|
||||
- data/predictions/
|
||||
generate_metrics:
|
||||
cmd: python generate_metrics.py
|
||||
deps:
|
||||
- generate_metrics.py
|
||||
- data/prepared_data
|
||||
- data/predictions
|
||||
params:
|
||||
- configs/generate_metrics.yaml:
|
||||
outs:
|
||||
- metrics/metrics.json
|
||||
|
|
@ -34,7 +34,7 @@ def generate_predictions(
|
|||
dataclient: DataClient,
|
||||
model: MLModel,
|
||||
target: str,
|
||||
model_location: str,
|
||||
model_filepath: str,
|
||||
test_data_filepath: str,
|
||||
predictions_output_filepath: str,
|
||||
):
|
||||
|
|
@ -53,7 +53,7 @@ def generate_predictions(
|
|||
logger.info("--- Loading model ---")
|
||||
logger.info("---------------------")
|
||||
|
||||
model.load_model(model_location)
|
||||
model.load_model(model_filepath)
|
||||
|
||||
logger.info("------------------------------")
|
||||
logger.info("--- Generating predictions ---")
|
||||
|
|
@ -93,7 +93,7 @@ if __name__ == "__main__":
|
|||
dataclient=dataclient,
|
||||
model=model,
|
||||
target=build_model_params["target"],
|
||||
model_location=build_model_params["model_save_location"],
|
||||
model_filepath=build_model_params["model_save_filepath"],
|
||||
test_data_filepath=generate_predictions_params["test_data_filepath"],
|
||||
predictions_output_filepath=generate_predictions_params[
|
||||
"predictions_output_filepath"
|
||||
|
|
|
|||
1
modules/ml-pipeline/src/pipeline/training/metrics/.gitignore
vendored
Normal file
1
modules/ml-pipeline/src/pipeline/training/metrics/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
/metrics.json
|
||||
Loading…
Add table
Reference in a new issue