Merge pull request #18 from Hestia-Homes/model-test

Model test
This commit is contained in:
quandanrepo 2023-09-16 12:00:26 +01:00 committed by GitHub
commit 45d4d078b2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 268 additions and 77 deletions

36
.github/workflows/MLMonitoringData.yml vendored Normal file
View file

@ -0,0 +1,36 @@
name: (REPLACE WITH LAMBDA) Run monitoring on data to ensure that fundamentally, the data and its relationships haven't changed
on:
push:
branches:
- "off-**"
jobs:
Verify-Data:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install packages to generate data report
run: |
pip install --upgrade pip
pip install -r modules/ml-monitoring/src/evidently/src/requirements/requirements.txt
- name: Run Monitoring Data report
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
cd modules/ml-monitoring/src/evidently/src
python regression_report.py
# - name: Build Monitoring Data docker Image
# run: |
# cd modules/ml-monitoring/src/evidently/
# docker build . --file MonitoringData.Dockerfile --tag monitoring_data_test
# - name: Run Monitoring Data docker container
# run: |
# docker run monitoring_data_test

View file

@ -0,0 +1,27 @@
name: Run monitoring on an potential promotions (i.e. a new model registering, make sure results are not just "metric" better but everything makes sense)
on:
push:
tags:
- 'NewModel**'
jobs:
Verify-Model:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install packages to generate Model report
run: |
pip install --upgrade pip
pip install -r modules/ml-monitoring/src/evidently/src/requirements/requirements.txt
- name: Build Monitoring Model docker Image
run: |
cd modules/ml-monitoring/src/evidently/
docker build . --file MonitoringModel.Dockerfile --tag monitoring_model_test
- name: Run Monitoring Model docker container
run: |
docker run monitoring_model_test

View file

@ -31,16 +31,27 @@ jobs:
- name: Register Model
run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# gto register test --repo https://github.com/Hestia-Homes/ML/
# echo "chicken" >> test.md
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}') || false
if [ -z "${latest_version}" ]; then
increment_version="1.0.0"
else
increment_version=$(echo ${latest_version} | awk -F'.' '{OFS="."; $1+=1; print}')
fi
# gto register ${REGISTER_MODEL_NAME} --bump-major
# gto assign regression --stage dev
# gto show >> Model_Register.md
new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
git tag -a ${new_tag} -m "Registering new Major Version"
git push origin ${new_tag}
gto show > MODEL_REGISTRY.md
git add .
git commit -m "Update Registry"
git push
Register-Minor-Model-Dev:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'minor')) }}
@ -58,16 +69,27 @@ jobs:
- name: Register Model
run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# gto register test --repo https://github.com/Hestia-Homes/ML/
# echo "chicken" >> test.md
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
if [ -z "${latest_version}" ]; then
increment_version="0.1.0"
else
increment_version=$(echo ${latest_version} | awk 'BEGIN{FS=OFS="."} {$2++; print}')
fi
# gto register ${REGISTER_MODEL_NAME} --bump-minor
# gto assign regression --stage dev
# gto show >> Model_Register.md
new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
git tag -a ${new_tag} -m "Registering new Minor Version"
git push origin ${new_tag}
gto show > MODEL_REGISTRY.md
git add .
git commit -m "Update Registry"
git push
Register-Patch-Model-Dev:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'patch')) }}
@ -85,16 +107,27 @@ jobs:
- name: Register Model
run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# gto register test --repo https://github.com/Hestia-Homes/ML/
# echo "chicken" >> test.md
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref | awk -F"@v" '{print $2}')
if [ -z "${latest_version}" ]; then
increment_version="0.0.1"
else
increment_version=$(echo ${latest_version} | awk 'BEGIN{FS=OFS="."} {$3++; print}')
fi
# gto register ${REGISTER_MODEL_NAME} --bump-major
# gto assign regression --stage dev
# gto show >> Model_Register.md
new_tag=${REGISTER_MODEL_NAME}@v${increment_version}
git tag -a ${new_tag} -m "Registering new Patch Version"
git push origin ${new_tag}
gto show > MODEL_REGISTRY.md
git add .
git commit -m "Update Registry"
git push
Promote-Artefacts-To-Dev:
if: github.event.pull_request.merged == true
@ -123,44 +156,43 @@ jobs:
cd modules/ml-pipeline/src/pipeline/src
dvc push -r dev
Register-New-Model-Dev:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
# Register-New-Model-Dev:
# if: github.event.pull_request.merged == true
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# with:
# fetch-depth: 0
- name: Install packages to register model
run: |
pip install --upgrade pip
pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
# - name: Install packages to register model
# env:
# AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
# run: |
# pip install --upgrade pip
# pip install -r modules/ml-pipeline/src/pipeline/src/requirements/version_control/requirements.txt
- name: Register Model
env:
AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
run: |
# - name: Register Model
# env:
# AWS_ACCESS_KEY_ID: ${{ secrets.ROBOT_AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.ROBOT_AWS_SECRET_ACCESS_KEY }}
# run: |
REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
# # REGISTER_MODEL_NAME=$(echo ${{ github.event.pull_request.head.ref }} | awk -F"-" '{print $1}')
# REGISTER_MODEL_NAME=$(echo ${{github.ref_name}} | awk -F"-" '{print $1}')
git config user.name "Github-Bot"
git config user.email "Github-Bot@no-reply.com"
# git config user.name "Github-Bot"
# git config user.email "Github-Bot@no-reply.com"
latest_version=$(gto show ${REGISTER_MODEL_NAME}@latest --ref)
new_tag=${latest_version}#dev
# # git tag model@v0.0.1
# # git push origin model@v0.0.1
git tag -a ${new_tag} -m "Registering Latest Version to Dev"
git push origin ${new_tag}
# # gto register test --repo https://github.com/Hestia-Homes/ML/
# # echo "chicken" >> test.md
gto show > MODEL_REGISTRY.md
git add .
git commit -m "Update Registry"
git push
# # gto -v register ${REGISTER_MODEL_NAME}
# # gto assign regression --stage dev
# # gto show
Register-Prediction-Image-Dev:
needs: Promote-Artefacts-To-Dev

11
MODEL_REGISTRY.md Normal file
View file

@ -0,0 +1,11 @@
╒════════╤══════════╕
│ name │ latest │
╞════════╪══════════╡
│ model │ v2.1.3 │
╘════════╧══════════╛
╒════════╤══════════╕
│ name │ latest │
╞════════╪══════════╡
│ bob │ v1.0.0 │
│ model │ v2.1.3 │
╘════════╧══════════╛

1
modules/ml-monitoring/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
.dev_env_monitoring/

View file

@ -0,0 +1,20 @@
export PYENV_ROOT=$(HOME)/.pyenv
export PATH := $(PYENV_ROOT)/bin:$(PATH)
PYTHON_VERSION ?= 3.10.12
.PHONY: init
init: dev-pyenv
.PHONY: dev-pyenv
dev-pyenv:
curl https://pyenv.run | bash || echo "Pyenv - Already installed"
pyenv install ${PYTHON_VERSION} || echo "Python version already installed"
pyenv global ${PYTHON_VERSION}
python3 -m venv .dev_env_monitoring
. .dev_env_monitoring/bin/activate && pip install --upgrade pip && pip install -r src/evidently/src/requirements/requirements-dev.txt && pre-commit install
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
echo "source .dev_env_monitoring/bin/activate"
.PHONY: dvc-init
dvc-init:
. .dev_env_monitoring/bin/activate && dvc init --subdir

Binary file not shown.

View file

@ -0,0 +1,14 @@
# Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow)
FROM python:3.10.12-slim
COPY src/requirements/requirements.txt requirements.txt
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
# Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script
COPY src/ /home/src/
WORKDIR /home/src/
CMD [ "python", "regression_report.py"]

View file

@ -0,0 +1,40 @@
import boto3
import pandas as pd
from evidently.report import Report
from evidently.metric_preset import (
DataDriftPreset,
DataQualityPreset,
)
def run_evidently_dashboard(local_output: str = "./report.html"):
# DUMMY TEST CASE
ref = pd.read_parquet(
"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/train_validation_data.parquet"
).head(100)
cur = pd.read_parquet(
"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data.parquet"
).head(100)
report = Report(
metrics=[
DataDriftPreset(),
DataQualityPreset(),
]
)
report.run(reference_data=ref, current_data=cur)
report.save_html(local_output)
s3 = boto3.client("s3")
s3.upload_file(local_output, "retrofit-data-dev", "monitoring/test-report.html")
print(
f'{local_output} uploaded to {"retrofit-data-dev" + "/" + "monitoring/test-report.html"} successfully.'
)
if __name__ == "__main__":
run_evidently_dashboard()

View file

@ -0,0 +1,4 @@
evidently==0.4.4
pre-commit==3.3.3
sphinx==7.2.5
sphinx_rtd_theme==1.3.0

View file

@ -0,0 +1,2 @@
boto3==1.28.41
evidently==0.4.4

View file

@ -10,11 +10,11 @@ dev-pyenv:
curl https://pyenv.run | bash || echo "Pyenv - Already installed"
pyenv install ${PYTHON_VERSION} || echo "Python version already installed"
pyenv global ${PYTHON_VERSION}
python3 -m venv .dev_env
. .dev_env/bin/activate && pip install --upgrade pip && pip install -r src/pipeline/src/requirements/training/requirements-dev.txt && pip install -r src/pipeline/src/requirements/version_control/requirements.txt && pre-commit install
python3 -m venv .dev_env_pipeline
. .dev_env_pipeline/bin/activate && pip install --upgrade pip && pip install -r src/pipeline/src/requirements/training/requirements-dev.txt && pip install -r src/pipeline/src/requirements/version_control/requirements.txt && pre-commit install
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
echo "source .dev_env/bin/activate"
echo "source .dev_env_pipeline/bin/activate"
.PHONY: dvc-init
dvc-init:
. .dev_env/bin/activate && dvc init --subdir
. .dev_env_pipeline/bin/activate && dvc init --subdir

View file

@ -1,5 +1,5 @@
model_type: SKLearnLinearRegression
model_save_filepath: ./data/model/model.joblib
model_type: AutogluonAutoML
model_save_filepath: ./data/model/autogluonmodel/
SKLearnLinearRegression: null

View file

@ -3,6 +3,6 @@ feature_processor_config:
subsample_amount: null
subsample_seed: 0
target: RDSAP_CHANGE
drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE"]
retain_features: ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
# retain_features: null
drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE"]
# retain_features: ["TOTAL_FLOOR_AREA_STARTING", "SAP_STARTING", "TOTAL_FLOOR_AREA_ENDING"]
retain_features: null

View file

@ -1,7 +1,7 @@
input_dataclient_type: aws-s3
output_dataclient_type: local
datahandler_type: parquet
data_filepath: s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/train_validation_data.parquet
data_filepath: s3://retrofit-data-dev/sap_change_model/dataset.parquet
train_proportion: 0.1
output_train_filepath: ./data/prepared_data/train.parquet
output_test_filepath: ./data/prepared_data/test.parquet

View file

@ -15,8 +15,8 @@ stages:
outs:
- path: data/prepared_data/
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
build_model:
cmd: python build_model.py
@ -27,8 +27,8 @@ stages:
size: 3948
- path: data/prepared_data
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
params:
configs/build_model.yaml:
@ -42,26 +42,26 @@ stages:
SKLearnLinearRegression:
SKLearnSVMRegression:
kernel: linear
model_save_filepath: ./data/model/model.joblib
model_type: SKLearnLinearRegression
model_save_filepath: ./data/model/autogluonmodel/
model_type: AutogluonAutoML
outs:
- path: data/model/
hash: md5
md5: 1d4bc40f23a6866c8daa9f2f5b639d67.dir
size: 904
nfiles: 1
md5: 154f823d56a9892948a633789d9b08a5.dir
size: 680552724
nfiles: 18
generate_predictions:
cmd: python generate_predictions.py
deps:
- path: data/model
hash: md5
md5: 1d4bc40f23a6866c8daa9f2f5b639d67.dir
size: 904
nfiles: 1
md5: 154f823d56a9892948a633789d9b08a5.dir
size: 680552724
nfiles: 18
- path: data/prepared_data
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
- path: generate_predictions.py
hash: md5
@ -77,21 +77,21 @@ stages:
outs:
- path: data/predictions/
hash: md5
md5: ea0431b600f0ef357de3a543482cefe7.dir
size: 4085105
md5: d8abefde18d78588158ef6acf282e2ed.dir
size: 2948553
nfiles: 1
generate_metrics:
cmd: python generate_metrics.py
deps:
- path: data/predictions
hash: md5
md5: ea0431b600f0ef357de3a543482cefe7.dir
size: 4085105
md5: d8abefde18d78588158ef6acf282e2ed.dir
size: 2948553
nfiles: 1
- path: data/prepared_data
hash: md5
md5: f7e45d3997cf165904174b2bc2d2eba5.dir
size: 4396934
md5: febdc8362200167078dfa578cf2bc889.dir
size: 24296908
nfiles: 2
- path: generate_metrics.py
hash: md5
@ -107,8 +107,8 @@ stages:
outs:
- path: metrics/metrics.json
hash: md5
md5: ae53c4781cb8a754d24e29ba7ddb16ea
size: 183
md5: f5aaae75ea74241500cd1ce76751c579
size: 182
startup_cleanup:
cmd: python startup_cleanup.py
deps:

View file

@ -19,6 +19,7 @@ stages:
- train_proportion
outs:
- data/prepared_data/
always_changed: true
build_model:
cmd: python build_model.py
deps:
@ -28,6 +29,7 @@ stages:
- configs/build_model.yaml:
outs:
- data/model/
always_changed: true
generate_predictions:
cmd: python generate_predictions.py
deps:
@ -38,6 +40,7 @@ stages:
- configs/generate_predictions.yaml:
outs:
- data/predictions/
always_changed: true
generate_metrics:
cmd: python generate_metrics.py
deps:
@ -48,5 +51,6 @@ stages:
- configs/generate_metrics.yaml:
outs:
- metrics/metrics.json
always_changed: true
metrics:
- metrics/metrics.json