push up temp code

This commit is contained in:
Michael Duong 2023-10-17 23:52:51 +00:00
parent e6c7b2f58c
commit f2de544b6b
10 changed files with 284 additions and 4 deletions

5
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,5 @@
{
"python.analysis.extraPaths": [
"./modules/ml-monitoring/src/evidently/src"
]
}

View file

@ -0,0 +1,29 @@
FROM public.ecr.aws/lambda/python:3.10
# Set the working directory
WORKDIR ${LAMBDA_TASK_ROOT}
ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
# Environment variables
ARG RUNTIME_ENVIRONMENT
ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
# Install necessary build tools - required to test locally
RUN yum install -y gcc python3-devel
# Install python packages
COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./predictions_requirements.txt
COPY modules/ml-monitoring/src/evidently/src/requirements/requirements.txt ./monitoring_requirements.txt
RUN pip install --no-cache-dir -r ./predictions_requirements.txt -r ./monitoring_requirements.txt
# Copy the project code
COPY modules/ml-pipeline/src/pipeline ./pipeline
# Copy the monitoring code
COPY modules/ml-monitoring/src/evidently/src ./monitoring
# Copy the handler
COPY deployment/handlers/monitoring_app.py ./pipeline/monitoring_app.py
WORKDIR ${LAMBDA_TASK_ROOT}/pipeline
CMD [ "monitoring_app.handler" ]

View file

@ -1 +1,2 @@
.dev_env_monitoring/
workspace/

View file

@ -1,9 +1,24 @@
export PYENV_ROOT=$(HOME)/.pyenv
export PATH := $(PYENV_ROOT)/bin:$(PATH)
PYTHON_VERSION ?= 3.10.12
CONDA_ENV=dev_env_monitoring
.PHONY: init
init: dev-pyenv
init: dev-conda
.PHONY: dev-conda
dev-conda:
# conda deactivate || echo "Not in conda environment"
# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
conda init bash
conda run -v -n ${CONDA_ENV} pip install --upgrade pip
conda run -v -n ${CONDA_ENV} pip install -r src/evidently/src/requirements/requirements-dev.txt
# conda run -vvvv -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt
conda run -v -n ${CONDA_ENV} pre-commit install
conda run -v -n ${CONDA_ENV} pip install ipykernel
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
echo "conda activate ${CONDA_ENV}"
.PHONY: dev-pyenv
dev-pyenv:

View file

@ -0,0 +1,38 @@
"""
Implementations of the MLReport protocol
"""
from core.interface.InterfaceMLReport import MLReport
def report_factory(report_type: str, report_config: dict | None = None) -> MLReport:
"""
Select the type of reporting you require
"""
if report_config is None:
report_config = {}
reports = {
"data-quality": DataQualityReport,
"regression": RegressionReport,
# Add more report types here
}
if report_type not in reports:
raise ValueError("Report type specified is not in factory")
return reports[report_type](**report_config)
class DataQualityReport:
def generate_report():
return 1
class RegressionReport:
report_type = "regression"
def generate_report():
return 1

View file

@ -0,0 +1,19 @@
"""
Interface for generating MLReports
"""
import pandas as pd
from typing import Protocol
class MLReport(Protocol):
"""
Declare methods for MLReport implementation
"""
def generate_report(
reference_data: pd.DataFrame, current_data: pd.DataFrame, location: str
) -> None:
"""
Create a html report
"""

View file

@ -0,0 +1,174 @@
"""
Create a report regarding the data quality
"""
import datetime
from sklearn import datasets
from evidently.metrics import ColumnDriftMetric
from evidently.metrics import ColumnSummaryMetric
from evidently.metrics import DatasetDriftMetric
from evidently.metrics import DatasetMissingValuesMetric
from evidently.report import Report
from evidently.test_preset import DataDriftTestPreset
from evidently.test_suite import TestSuite
from evidently.ui.dashboards import CounterAgg
from evidently.ui.dashboards import DashboardPanelCounter
from evidently.ui.dashboards import DashboardPanelPlot
from evidently.ui.dashboards import PanelValue
from evidently.ui.dashboards import PlotType
from evidently.ui.dashboards import ReportFilter
from evidently.ui.remote import RemoteWorkspace
from evidently.ui.workspace import Workspace
from evidently.ui.workspace import WorkspaceBase
import pandas as pd
# DUMMY TEST CASE
ref = pd.read_parquet(
"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/train_validation_data.parquet"
).head(1000)
cur = pd.read_parquet(
"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data.parquet"
)
WORKSPACE = "workspace"
YOUR_PROJECT_NAME = "Data Drift Monitoring"
YOUR_PROJECT_DESCRIPTION = "Monitoring Data for modelling process"
def create_report(i: int):
data_drift_report = Report(
metrics=[
DatasetDriftMetric(),
DatasetMissingValuesMetric(),
# ColumnDriftMetric(column_name="age", stattest="wasserstein"),
# ColumnSummaryMetric(column_name="age"),
# ColumnDriftMetric(column_name="education-num", stattest="wasserstein"),
# ColumnSummaryMetric(column_name="education-num"),
],
timestamp=datetime.datetime.now() + datetime.timedelta(days=i),
)
data_drift_report.run(
reference_data=ref, current_data=cur.iloc[100 * i : 100 * (i + 1), :]
)
return data_drift_report
def create_test_suite(i: int):
data_drift_test_suite = TestSuite(
tests=[DataDriftTestPreset()],
timestamp=datetime.datetime.now() + datetime.timedelta(days=i),
)
data_drift_test_suite.run(
reference_data=ref, current_data=cur.iloc[100 * i : 100 * (i + 1), :]
)
return data_drift_test_suite
def create_project(workspace: WorkspaceBase):
project = workspace.create_project(YOUR_PROJECT_NAME)
project.description = YOUR_PROJECT_DESCRIPTION
project.dashboard.add_panel(
DashboardPanelCounter(
filter=ReportFilter(metadata_values={}, tag_values=[]),
agg=CounterAgg.NONE,
title="Census Income Dataset (Adult)",
)
)
# project.dashboard.add_panel(
# DashboardPanelCounter(
# title="Model Calls",
# filter=ReportFilter(metadata_values={}, tag_values=[]),
# value=PanelValue(
# metric_id="DatasetMissingValuesMetric",
# field_path=DatasetMissingValuesMetric.fields.current.number_of_rows,
# legend="count",
# ),
# text="count",
# agg=CounterAgg.SUM,
# size=1,
# )
# )
# project.dashboard.add_panel(
# DashboardPanelCounter(
# title="Share of Drifted Features",
# filter=ReportFilter(metadata_values={}, tag_values=[]),
# value=PanelValue(
# metric_id="DatasetDriftMetric",
# field_path="share_of_drifted_columns",
# legend="share",
# ),
# text="share",
# agg=CounterAgg.LAST,
# size=1,
# )
# )
# project.dashboard.add_panel(
# DashboardPanelPlot(
# title="Dataset Quality",
# filter=ReportFilter(metadata_values={}, tag_values=[]),
# values=[
# PanelValue(metric_id="DatasetDriftMetric", field_path="share_of_drifted_columns", legend="Drift Share"),
# PanelValue(
# metric_id="DatasetMissingValuesMetric",
# field_path=DatasetMissingValuesMetric.fields.current.share_of_missing_values,
# legend="Missing Values Share",
# ),
# ],
# plot_type=PlotType.LINE,
# )
# )
# project.dashboard.add_panel(
# DashboardPanelPlot(
# title="Age: Wasserstein drift distance",
# filter=ReportFilter(metadata_values={}, tag_values=[]),
# values=[
# PanelValue(
# metric_id="ColumnDriftMetric",
# metric_args={"column_name.name": "age"},
# field_path=ColumnDriftMetric.fields.drift_score,
# legend="Drift Score",
# ),
# ],
# plot_type=PlotType.BAR,
# size=1,
# )
# )
# project.dashboard.add_panel(
# DashboardPanelPlot(
# title="Education-num: Wasserstein drift distance",
# filter=ReportFilter(metadata_values={}, tag_values=[]),
# values=[
# PanelValue(
# metric_id="ColumnDriftMetric",
# metric_args={"column_name.name": "education-num"},
# field_path=ColumnDriftMetric.fields.drift_score,
# legend="Drift Score",
# ),
# ],
# plot_type=PlotType.BAR,
# size=1,
# )
# )
project.save()
return project
def create_demo_project(workspace: str):
ws = Workspace.create(workspace)
project = create_project(ws)
for i in range(0, 5):
report = create_report(i=i)
ws.add_report(project.id, report)
test_suite = create_test_suite(i=i)
ws.add_test_suite(project.id, test_suite)
if __name__ == "__main__":
create_demo_project(WORKSPACE)

View file

@ -1,4 +1,3 @@
boto3==1.28.41
evidently==0.4.4
pre-commit==3.3.3
sphinx==7.2.5
sphinx_rtd_theme==1.3.0

View file

@ -1,2 +1,2 @@
boto3==1.28.41
evidently==0.4.4
evidently==0.4.6