Merge pull request #16 from Hestia-Homes/model-test

Model test
This commit is contained in:
quandanrepo 2023-09-15 00:22:58 +01:00 committed by GitHub
commit a8110fff73
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 198 additions and 113 deletions

View file

@ -15,34 +15,88 @@ on:
permissions: write-all
jobs:
Use-Major-Label-on-Merge:
Register-Major-Model-Dev:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'major')) }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Test echoo
run: |
echo "Hello Major"
- uses: actions/checkout@v4
with:
fetch-depth: 0
Use-Minor-Label-on-Merge:
- name: Install packages to register model
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
- name: Register Model
run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# gto register test --repo https://github.com/Hestia-Homes/ML/
# echo "chicken" >> test.md
# gto register ${REGISTER_MODEL_NAME} --bump-major
# gto assign regression --stage dev
# gto show >> Model_Register.md
Register-Minor-Model-Dev:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Test echoo
run: |
echo "Hello Minor"
- uses: actions/checkout@v4
with:
fetch-depth: 0
Use-Patch-Label-on-Merge:
- name: Install packages to register model
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
- name: Register Model
run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# gto register test --repo https://github.com/Hestia-Homes/ML/
# echo "chicken" >> test.md
# gto register ${REGISTER_MODEL_NAME} --bump-minor
# gto assign regression --stage dev
# gto show >> Model_Register.md
Register-Patch-Model-Dev:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Test echoo
run: |
echo "Hello Patch"
- uses: actions/checkout@v4
with:
fetch-depth: 0
Promote-Model-To-Dev:
- name: Install packages to register model
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
- name: Register Model
run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# gto register test --repo https://github.com/Hestia-Homes/ML/
# echo "chicken" >> test.md
# gto register ${REGISTER_MODEL_NAME} --bump-major
# gto assign regression --stage dev
# gto show >> Model_Register.md
Promote-Artefacts-To-Dev:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
@ -70,55 +124,52 @@ jobs:
dvc push -r dev
Register-New-Model-Dev:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# Register-New-Model-Dev:
# if: github.event.pull_request.merged == true
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# with:
# fetch-depth: 0
- name: Install packages to register model
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
# - name: Install packages to register model
# env:
# AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
# run: |
# pip install --upgrade pip
# pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
- name: Register Model
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
# - name: Register Model
# env:
# AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
# run: |
# REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
# # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# git config user.name "Github-Bot"
# git config user.email "Github-Bot@no-reply.com"
# git tag model@v0.0.1
# git push origin model@v0.0.1
# # git tag model@v0.0.1
# # git push origin model@v0.0.1
# gto register test --repo https://github.com/Hestia-Homes/ML/
# echo "chicken" >> test.md
# # gto register test --repo https://github.com/Hestia-Homes/ML/
# # echo "chicken" >> test.md
# gto -v register ${REGISTER_MODEL_NAME}
# gto assign regression --stage dev
# gto show
# # gto -v register ${REGISTER_MODEL_NAME}
# # gto assign regression --stage dev
# # gto show
Register-Prediction-Image-Dev:
needs: Promote-Model-To-Dev
# needs: [Promote-Model-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS
needs: Promote-Artefacts-To-Dev
# needs: [Promote-Artefacts-To-Dev, Register-New-Model-Dev] WILL ADD BACK ONCE REGISTER WORKS
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install packages to retrieve artifacts
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt

View file

@ -13,7 +13,7 @@ permissions: write-all
jobs:
No-Label:
Check-Label:
runs-on: ubuntu-latest
steps:
- uses: yogevbd/enforce-label-action@2.1.0

View file

@ -1,5 +1,5 @@
model_type: AutogluonAutoML
model_save_filepath: ./data/model/autogluonmodel/
model_type: SKLearnLinearRegression
model_save_filepath: ./data/model/model.joblib
SKLearnLinearRegression: null

View file

@ -0,0 +1,10 @@
aws-s3:
AWS_ACCESS_KEY_ID: null
AWS_SECRET_ACCESS_KEY: null
ENDPOINT_URL: null
aws-s3-mock:
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123
ENDPOINT_URL: http://localhost:9000
local:
null

View file

@ -4,5 +4,5 @@ feature_processor_config:
subsample_seed: 0
target: RDSAP_CHANGE
drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE"]
# retain_features: ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
retain_features: null
retain_features: ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
# retain_features: null

View file

@ -1,14 +1,8 @@
input_dataclient_type: aws-s3
input_dataclient:
AWS_ACCESS_KEY_ID: null
AWS_SECRET_ACCESS_KEY: null
ENDPOINT_URL: null
output_dataclient_type: local
output_dataclient:
null
datahandler_type: parquet
data_filepath: s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/train_validation_data.parquet
train_proportion: 0.8
train_proportion: 0.1
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet

View file

@ -5,18 +5,18 @@ stages:
deps:
- path: prepare_data.py
hash: md5
md5: 87a83e62512bff93c89f3e93c1ed248d
size: 5593
md5: 9c31bfb1b75ea3c9685ec459cbb50e62
size: 5921
params:
configs/prepare_data.yaml:
output_test_filepath: ./data/prepared_data/test.parquet
output_train_filepath: ./data/prepared_data/train.parquet
train_proportion: 0.8
train_proportion: 0.1
outs:
- path: data/prepared_data/
hash: md5
md5: 01a8f8f0b264ac4d61307a67bfa910b4.dir
size: 4428909
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
nfiles: 2
build_model:
cmd: python build_model.py
@ -27,8 +27,8 @@ stages:
size: 3948
- path: data/prepared_data
hash: md5
md5: 01a8f8f0b264ac4d61307a67bfa910b4.dir
size: 4428909
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
nfiles: 2
params:
configs/build_model.yaml:
@ -42,31 +42,31 @@ stages:
SKLearnLinearRegression:
SKLearnSVMRegression:
kernel: linear
model_save_filepath: ./data/model/autogluonmodel/
model_type: AutogluonAutoML
model_save_filepath: ./data/model/model.joblib
model_type: SKLearnLinearRegression
outs:
- path: data/model/
hash: md5
md5: 04a1e3bc625e7934c9f57a3fa2f1ea5c.dir
size: 1264795580
nfiles: 28
md5: 1d4bc40f23a6866c8daa9f2f5b639d67.dir
size: 904
nfiles: 1
generate_predictions:
cmd: python generate_predictions.py
deps:
- path: data/model
hash: md5
md5: 04a1e3bc625e7934c9f57a3fa2f1ea5c.dir
size: 1264795580
nfiles: 28
md5: 1d4bc40f23a6866c8daa9f2f5b639d67.dir
size: 904
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 01a8f8f0b264ac4d61307a67bfa910b4.dir
size: 4428909
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
nfiles: 2
- path: generate_predictions.py
hash: md5
md5: 76c45e7575ec979e6c4c8e2cf754a720
size: 4225
md5: 32c0ecd082e1f8fc4426338d6629979c
size: 4686
params:
configs/generate_predictions.yaml:
input_dataclient_type: local
@ -77,26 +77,26 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: 44c298a28a0bb1367bb82d5da1a5dbd0.dir
size: 672577
md5: ea0431b600f0ef357de3a543482cefe7.dir
size: 4085105
nfiles: 1
generate_metrics:
cmd: python generate_metrics.py
deps:
- path: data/predictions
hash: md5
md5: 44c298a28a0bb1367bb82d5da1a5dbd0.dir
size: 672577
md5: ea0431b600f0ef357de3a543482cefe7.dir
size: 4085105
nfiles: 1
- path: data/prepared_data
hash: md5
md5: 01a8f8f0b264ac4d61307a67bfa910b4.dir
size: 4428909
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
nfiles: 2
- path: generate_metrics.py
hash: md5
md5: cc368845f62523575a9ed5c791e27815
size: 4329
md5: 4709c42d93f8e717a3d9e4958e46cd76
size: 4587
params:
configs/generate_metrics.yaml:
dataclient_type: local
@ -107,15 +107,15 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: 3f03e50a419af6730351a5016e2ae98a
size: 182
md5: ae53c4781cb8a754d24e29ba7ddb16ea
size: 183
startup_cleanup:
cmd: python startup_cleanup.py
deps:
- path: startup_cleanup.py
hash: md5
md5: f7fe2ca33004b34530da0a3ab48c1790
size: 1458
md5: 2e51fbcac960d0f960bf32a8ec7486a0
size: 1748
params:
configs/startup_cleanup.yaml:
artefacts: ./data

View file

@ -7,6 +7,7 @@ stages:
- configs/startup_cleanup.yaml:
- artefacts
- metrics
always_changed: true
prepare_data:
cmd: python prepare_data.py
deps:

View file

@ -21,6 +21,9 @@ from core.Logger import logger
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
client_path = Path(__file__).parent / "configs" / "client.yaml"
client_params = yaml.safe_load(open(client_path))
prepare_data_path = Path(__file__).parent / "configs" / "prepare_data.yaml"
prepare_data_params = yaml.safe_load(open(prepare_data_path))
@ -96,7 +99,12 @@ if __name__ == "__main__":
logger.info("----------------------------")
model = model_factory(build_model_params["model_type"])
dataclient = dataclient_factory(generate_metrics_params["dataclient_type"])
dataclient_type = generate_metrics_params["dataclient_type"]
dataclient = dataclient_factory(dataclient_type)
dataclient.ingest_configurations(client_params[dataclient_type])
dataclient.establish_client()
input_datahandler = datahandler_factory(
generate_metrics_params["input_datahandler_type"]
)

View file

@ -19,6 +19,9 @@ from core.Logger import logger
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
client_path = Path(__file__).parent / "configs" / "client.yaml"
client_params = yaml.safe_load(open(client_path))
prepare_data_path = Path(__file__).parent / "configs" / "prepare_data.yaml"
prepare_data_params = yaml.safe_load(open(prepare_data_path))
@ -97,12 +100,19 @@ if __name__ == "__main__":
# We may have different locations of loading hence why we use one specified in generate_predictions.yaml
# I.e. for metric runs, this will be a local data client
# For predictions, we will want a cloud data client
input_dataclient = dataclient_factory(
generate_predictions_params["input_dataclient_type"]
)
output_dataclient = dataclient_factory(
generate_predictions_params["output_dataclient_type"]
input_dataclient_type = generate_predictions_params["input_dataclient_type"]
input_dataclient = dataclient_factory(input_dataclient_type)
input_dataclient.ingest_configurations(config=client_params[input_dataclient_type])
input_dataclient.establish_client()
output_dataclient_type = generate_predictions_params["output_dataclient_type"]
output_dataclient = dataclient_factory(output_dataclient_type)
output_dataclient.ingest_configurations(
config=client_params[output_dataclient_type]
)
output_dataclient.establish_client()
datahandler = datahandler_factory(prepare_data_params["datahandler_type"])
generate_predictions(

View file

@ -21,6 +21,9 @@ from core.FeatureProcessor import feature_processor_factory
RUNTIME_ENVIRONMENT = os.environ.get("RUNTIME_ENVIRONMENT", "local")
client_path = Path(__file__).parent / "configs" / "client.yaml"
client_params = yaml.safe_load(open(client_path))
prepare_data_path = Path(__file__).parent / "configs" / "prepare_data.yaml"
prepare_data_params = yaml.safe_load(open(prepare_data_path))
@ -94,7 +97,7 @@ def prepare_data(
dataclient=output_dataclient, obj=train, location=output_train_filepath
)
if test:
if test is not None:
datahandler.save_data(
dataclient=output_dataclient, obj=test, location=output_test_filepath
)
@ -112,18 +115,17 @@ if __name__ == "__main__":
logger.info(f"--- Initiate DataClient ---")
logger.info("----------------------------")
input_dataclient = dataclient_factory(prepare_data_params["input_dataclient_type"])
output_dataclient = dataclient_factory(
prepare_data_params["output_dataclient_type"]
)
input_dataclient_type = prepare_data_params["input_dataclient_type"]
output_dataclient_type = prepare_data_params["output_dataclient_type"]
input_dataclient.ingest_configurations(
config=prepare_data_params["input_dataclient"]
)
input_dataclient = dataclient_factory(input_dataclient_type)
output_dataclient = dataclient_factory(output_dataclient_type)
input_dataclient.ingest_configurations(config=client_params[input_dataclient_type])
input_dataclient.establish_client()
output_dataclient.ingest_configurations(
config=prepare_data_params["output_dataclient"]
config=client_params[output_dataclient_type]
)
output_dataclient.establish_client()

View file

@ -15,6 +15,15 @@ def run_cleanup(artefacts_directory: str, metrics_directory: str) -> None:
"""
Remove the directory where artefacts are stored
"""
logger.info("---------------------")
logger.info(f"--- Run Clean up ---")
logger.info("---------------------")
logger.info("-------------------------")
logger.info(f"--- Delete artefacts ---")
logger.info("-------------------------")
artefact_directory_path = Path(artefacts_directory)
if artefact_directory_path.exists():
@ -22,6 +31,10 @@ def run_cleanup(artefacts_directory: str, metrics_directory: str) -> None:
logger.info(f"Removing the directory: {artefacts_directory}")
shutil.rmtree(artefact_directory_path)
logger.info("-----------------------")
logger.info(f"--- Delete metrics ---")
logger.info("-----------------------")
metrics_directory_path = Path(metrics_directory)
if metrics_directory_path.exists():
@ -36,10 +49,6 @@ if __name__ == "__main__":
logger.info(f"--- {__file__} - Start! ---")
logger.info("----------------------------")
logger.info("---------------------")
logger.info(f"--- Run Clean up ---")
logger.info("---------------------")
run_cleanup(
artefacts_directory=startup_cleanup_params["artefacts"],
metrics_directory=startup_cleanup_params["metrics"],