diff --git a/.github/workflows/MLMonitoringData.yml b/.github/workflows/MLMonitoringData.yml new file mode 100644 index 0000000..0119ffd --- /dev/null +++ b/.github/workflows/MLMonitoringData.yml @@ -0,0 +1,28 @@ +name: (REPLACE WITH LAMBDA) Run monitoring on data to ensure that fundamentally, the data and its relationships haven't changed + +on: + push: + branches: + - "model-**" + +jobs: + + Verify-Data: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Install packages to generate data report + run: | + pip install --upgrade pip + pip install -r modules/ml-monitoring/src/evidently/src/requirements/requirements.txt + + - name: Build Monitoring Data docker Image + run: | + cd modules/ml-monitoring/src/evidently/ + docker build . --file MonitoringData.Dockerfile --tag monitoring_data_test + + - name: Run Monitoring Data docker container + run: | + docker run monitoring_data_test diff --git a/.github/workflows/MLMonitoringPromotion.yml b/.github/workflows/MLMonitoringPromotion.yml new file mode 100644 index 0000000..89d6977 --- /dev/null +++ b/.github/workflows/MLMonitoringPromotion.yml @@ -0,0 +1,27 @@ +name: Run monitoring on an potential promotions (i.e. a new model registering, make sure results are not just "metric" better but everything makes sense) + +on: + push: + tags: + - 'NewModel**' + +jobs: + + Verify-Model: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Install packages to generate Model report + run: | + pip install --upgrade pip + pip install -r modules/ml-monitoring/src/evidently/src/requirements/requirements.txt + + - name: Build Monitoring Model docker Image + run: | + cd modules/ml-monitoring/src/evidently/ + docker build . --file MonitoringModel.Dockerfile --tag monitoring_model_test + + - name: Run Monitoring Model docker container + run: | + docker run monitoring_model_test diff --git a/modules/ml-monitoring/.gitignore b/modules/ml-monitoring/.gitignore new file mode 100644 index 0000000..832692f --- /dev/null +++ b/modules/ml-monitoring/.gitignore @@ -0,0 +1 @@ +.dev_env_monitoring/ diff --git a/modules/ml-monitoring/Makefile b/modules/ml-monitoring/Makefile new file mode 100644 index 0000000..20767ff --- /dev/null +++ b/modules/ml-monitoring/Makefile @@ -0,0 +1,20 @@ +export PYENV_ROOT=$(HOME)/.pyenv +export PATH := $(PYENV_ROOT)/bin:$(PATH) +PYTHON_VERSION ?= 3.10.12 + +.PHONY: init +init: dev-pyenv + +.PHONY: dev-pyenv +dev-pyenv: + curl https://pyenv.run | bash || echo "Pyenv - Already installed" + pyenv install ${PYTHON_VERSION} || echo "Python version already installed" + pyenv global ${PYTHON_VERSION} + python3 -m venv .dev_env_monitoring + . .dev_env_monitoring/bin/activate && pip install --upgrade pip && pip install -r src/evidently/src/requirements/requirements-dev.txt && pre-commit install + echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND" + echo "source .dev_env_monitoring/bin/activate" + +.PHONY: dvc-init +dvc-init: + . .dev_env_monitoring/bin/activate && dvc init --subdir diff --git a/modules/ml-monitoring/src/evidently/.DS_Store b/modules/ml-monitoring/src/evidently/.DS_Store new file mode 100644 index 0000000..21bc00d Binary files /dev/null and b/modules/ml-monitoring/src/evidently/.DS_Store differ diff --git a/modules/ml-monitoring/src/evidently/Monitoring.Dockerfile b/modules/ml-monitoring/src/evidently/Monitoring.Dockerfile new file mode 100644 index 0000000..71661d8 --- /dev/null +++ b/modules/ml-monitoring/src/evidently/Monitoring.Dockerfile @@ -0,0 +1,14 @@ +# Dockerfile that can be used to test loading a model to generate a prediction (part of CI/CD flow) +FROM python:3.10.12-slim + +COPY src/requirements/requirements.txt requirements.txt + +RUN pip install --upgrade pip +RUN pip install -r requirements.txt + +# Assuming in the CI/CD step, there will be a dvc pull step to get data and model, so will just need to run a single script +COPY src/ /home/src/ + +WORKDIR /home/src/ + +CMD [ "python", "regression_report.py"] diff --git a/modules/ml-monitoring/src/evidently/src/regression_report.py b/modules/ml-monitoring/src/evidently/src/regression_report.py new file mode 100644 index 0000000..49f424c --- /dev/null +++ b/modules/ml-monitoring/src/evidently/src/regression_report.py @@ -0,0 +1,40 @@ +import boto3 +import pandas as pd +from evidently.report import Report +from evidently.metric_preset import ( + DataDriftPreset, + DataQualityPreset, +) + + +def run_evidently_dashboard(local_output: str = "./report.html"): + + # DUMMY TEST CASE + ref = pd.read_parquet( + "s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/train_validation_data.parquet" + ).head(100) + cur = pd.read_parquet( + "s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data.parquet" + ).head(100) + + report = Report( + metrics=[ + DataDriftPreset(), + DataQualityPreset(), + ] + ) + + report.run(reference_data=ref, current_data=cur) + report.save_html(local_output) + + s3 = boto3.client("s3") + + s3.upload_file(local_output, "retrofit-data-dev", "monitoring/test-report.html") + + print( + f'{local_output} uploaded to {"retrofit-data-dev" + "/" + "monitoring/test-report.html"} successfully.' + ) + + +if __name__ == "__main__": + run_evidently_dashboard() diff --git a/modules/ml-monitoring/src/evidently/src/requirements/requirements-dev.txt b/modules/ml-monitoring/src/evidently/src/requirements/requirements-dev.txt new file mode 100644 index 0000000..b5e534e --- /dev/null +++ b/modules/ml-monitoring/src/evidently/src/requirements/requirements-dev.txt @@ -0,0 +1,4 @@ +evidently==0.4.4 +pre-commit==3.3.3 +sphinx==7.2.5 +sphinx_rtd_theme==1.3.0 diff --git a/modules/ml-monitoring/src/evidently/src/requirements/requirements.txt b/modules/ml-monitoring/src/evidently/src/requirements/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/modules/ml-pipeline/Makefile b/modules/ml-pipeline/Makefile index 45a16f5..d4d6fb7 100644 --- a/modules/ml-pipeline/Makefile +++ b/modules/ml-pipeline/Makefile @@ -10,11 +10,11 @@ dev-pyenv: curl https://pyenv.run | bash || echo "Pyenv - Already installed" pyenv install ${PYTHON_VERSION} || echo "Python version already installed" pyenv global ${PYTHON_VERSION} - python3 -m venv .dev_env - . .dev_env/bin/activate && pip install --upgrade pip && pip install -r src/pipeline/src/requirements/training/requirements-dev.txt && pip install -r src/pipeline/src/requirements/version_control/requirements.txt && pre-commit install + python3 -m venv .dev_env_pipeline + . .dev_env_pipeline/bin/activate && pip install --upgrade pip && pip install -r src/pipeline/src/requirements/training/requirements-dev.txt && pip install -r src/pipeline/src/requirements/version_control/requirements.txt && pre-commit install echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND" - echo "source .dev_env/bin/activate" + echo "source .dev_env_pipeline/bin/activate" .PHONY: dvc-init dvc-init: - . .dev_env/bin/activate && dvc init --subdir + . .dev_env_pipeline/bin/activate && dvc init --subdir