mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-30 13:10:43 +00:00
push up temp code
This commit is contained in:
parent
e6c7b2f58c
commit
f2de544b6b
10 changed files with 284 additions and 4 deletions
5
.vscode/settings.json
vendored
Normal file
5
.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"python.analysis.extraPaths": [
|
||||
"./modules/ml-monitoring/src/evidently/src"
|
||||
]
|
||||
}
|
||||
29
deployment/Dockerfile.monitoring.lambda
Normal file
29
deployment/Dockerfile.monitoring.lambda
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR ${LAMBDA_TASK_ROOT}
|
||||
ENV PYTHONPATH "${PYTHONPATH}:${LAMBDA_TASK_ROOT}"
|
||||
|
||||
# Environment variables
|
||||
ARG RUNTIME_ENVIRONMENT
|
||||
ENV RUNTIME_ENVIRONMENT=${RUNTIME_ENVIRONMENT}
|
||||
|
||||
# Install necessary build tools - required to test locally
|
||||
RUN yum install -y gcc python3-devel
|
||||
|
||||
# Install python packages
|
||||
COPY modules/ml-pipeline/src/pipeline/requirements/predictions/requirements.txt ./predictions_requirements.txt
|
||||
COPY modules/ml-monitoring/src/evidently/src/requirements/requirements.txt ./monitoring_requirements.txt
|
||||
RUN pip install --no-cache-dir -r ./predictions_requirements.txt -r ./monitoring_requirements.txt
|
||||
|
||||
# Copy the project code
|
||||
COPY modules/ml-pipeline/src/pipeline ./pipeline
|
||||
# Copy the monitoring code
|
||||
COPY modules/ml-monitoring/src/evidently/src ./monitoring
|
||||
|
||||
# Copy the handler
|
||||
COPY deployment/handlers/monitoring_app.py ./pipeline/monitoring_app.py
|
||||
WORKDIR ${LAMBDA_TASK_ROOT}/pipeline
|
||||
|
||||
|
||||
CMD [ "monitoring_app.handler" ]
|
||||
1
modules/ml-monitoring/.gitignore
vendored
1
modules/ml-monitoring/.gitignore
vendored
|
|
@ -1 +1,2 @@
|
|||
.dev_env_monitoring/
|
||||
workspace/
|
||||
|
|
|
|||
|
|
@ -1,9 +1,24 @@
|
|||
export PYENV_ROOT=$(HOME)/.pyenv
|
||||
export PATH := $(PYENV_ROOT)/bin:$(PATH)
|
||||
PYTHON_VERSION ?= 3.10.12
|
||||
CONDA_ENV=dev_env_monitoring
|
||||
|
||||
.PHONY: init
|
||||
init: dev-pyenv
|
||||
init: dev-conda
|
||||
|
||||
.PHONY: dev-conda
|
||||
dev-conda:
|
||||
# conda deactivate || echo "Not in conda environment"
|
||||
# conda remove --name ${CONDA_ENV} --all -y || echo "No environment created previously"
|
||||
conda create --name ${CONDA_ENV} python=$(PYTHON_VERSION) -y
|
||||
conda init bash
|
||||
conda run -v -n ${CONDA_ENV} pip install --upgrade pip
|
||||
conda run -v -n ${CONDA_ENV} pip install -r src/evidently/src/requirements/requirements-dev.txt
|
||||
# conda run -vvvv -n ${CONDA_ENV} pip install -r src/pipeline/requirements/version_control/requirements.txt
|
||||
conda run -v -n ${CONDA_ENV} pre-commit install
|
||||
conda run -v -n ${CONDA_ENV} pip install ipykernel
|
||||
echo "TO ACTIVATE ENVIRONMENT, USE THE FOLLOWING COMMAND"
|
||||
echo "conda activate ${CONDA_ENV}"
|
||||
|
||||
.PHONY: dev-pyenv
|
||||
dev-pyenv:
|
||||
|
|
|
|||
38
modules/ml-monitoring/src/evidently/src/core/MLReport.py
Normal file
38
modules/ml-monitoring/src/evidently/src/core/MLReport.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""
|
||||
Implementations of the MLReport protocol
|
||||
"""
|
||||
|
||||
from core.interface.InterfaceMLReport import MLReport
|
||||
|
||||
|
||||
def report_factory(report_type: str, report_config: dict | None = None) -> MLReport:
|
||||
"""
|
||||
Select the type of reporting you require
|
||||
"""
|
||||
|
||||
if report_config is None:
|
||||
report_config = {}
|
||||
|
||||
reports = {
|
||||
"data-quality": DataQualityReport,
|
||||
"regression": RegressionReport,
|
||||
# Add more report types here
|
||||
}
|
||||
|
||||
if report_type not in reports:
|
||||
raise ValueError("Report type specified is not in factory")
|
||||
|
||||
return reports[report_type](**report_config)
|
||||
|
||||
|
||||
class DataQualityReport:
|
||||
def generate_report():
|
||||
return 1
|
||||
|
||||
|
||||
class RegressionReport:
|
||||
|
||||
report_type = "regression"
|
||||
|
||||
def generate_report():
|
||||
return 1
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
"""
|
||||
Interface for generating MLReports
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
class MLReport(Protocol):
|
||||
"""
|
||||
Declare methods for MLReport implementation
|
||||
"""
|
||||
|
||||
def generate_report(
|
||||
reference_data: pd.DataFrame, current_data: pd.DataFrame, location: str
|
||||
) -> None:
|
||||
"""
|
||||
Create a html report
|
||||
"""
|
||||
174
modules/ml-monitoring/src/evidently/src/generate_report.py
Normal file
174
modules/ml-monitoring/src/evidently/src/generate_report.py
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
"""
|
||||
Create a report regarding the data quality
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
from sklearn import datasets
|
||||
|
||||
from evidently.metrics import ColumnDriftMetric
|
||||
from evidently.metrics import ColumnSummaryMetric
|
||||
from evidently.metrics import DatasetDriftMetric
|
||||
from evidently.metrics import DatasetMissingValuesMetric
|
||||
from evidently.report import Report
|
||||
from evidently.test_preset import DataDriftTestPreset
|
||||
from evidently.test_suite import TestSuite
|
||||
from evidently.ui.dashboards import CounterAgg
|
||||
from evidently.ui.dashboards import DashboardPanelCounter
|
||||
from evidently.ui.dashboards import DashboardPanelPlot
|
||||
from evidently.ui.dashboards import PanelValue
|
||||
from evidently.ui.dashboards import PlotType
|
||||
from evidently.ui.dashboards import ReportFilter
|
||||
from evidently.ui.remote import RemoteWorkspace
|
||||
from evidently.ui.workspace import Workspace
|
||||
from evidently.ui.workspace import WorkspaceBase
|
||||
import pandas as pd
|
||||
|
||||
# DUMMY TEST CASE
|
||||
ref = pd.read_parquet(
|
||||
"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/train_validation_data.parquet"
|
||||
).head(1000)
|
||||
cur = pd.read_parquet(
|
||||
"s3://retrofit-data-dev/model_build_data/change_data/rdsap_full/test_data.parquet"
|
||||
)
|
||||
|
||||
WORKSPACE = "workspace"
|
||||
|
||||
YOUR_PROJECT_NAME = "Data Drift Monitoring"
|
||||
YOUR_PROJECT_DESCRIPTION = "Monitoring Data for modelling process"
|
||||
|
||||
|
||||
def create_report(i: int):
|
||||
data_drift_report = Report(
|
||||
metrics=[
|
||||
DatasetDriftMetric(),
|
||||
DatasetMissingValuesMetric(),
|
||||
# ColumnDriftMetric(column_name="age", stattest="wasserstein"),
|
||||
# ColumnSummaryMetric(column_name="age"),
|
||||
# ColumnDriftMetric(column_name="education-num", stattest="wasserstein"),
|
||||
# ColumnSummaryMetric(column_name="education-num"),
|
||||
],
|
||||
timestamp=datetime.datetime.now() + datetime.timedelta(days=i),
|
||||
)
|
||||
|
||||
data_drift_report.run(
|
||||
reference_data=ref, current_data=cur.iloc[100 * i : 100 * (i + 1), :]
|
||||
)
|
||||
return data_drift_report
|
||||
|
||||
|
||||
def create_test_suite(i: int):
|
||||
data_drift_test_suite = TestSuite(
|
||||
tests=[DataDriftTestPreset()],
|
||||
timestamp=datetime.datetime.now() + datetime.timedelta(days=i),
|
||||
)
|
||||
|
||||
data_drift_test_suite.run(
|
||||
reference_data=ref, current_data=cur.iloc[100 * i : 100 * (i + 1), :]
|
||||
)
|
||||
return data_drift_test_suite
|
||||
|
||||
|
||||
def create_project(workspace: WorkspaceBase):
|
||||
project = workspace.create_project(YOUR_PROJECT_NAME)
|
||||
project.description = YOUR_PROJECT_DESCRIPTION
|
||||
project.dashboard.add_panel(
|
||||
DashboardPanelCounter(
|
||||
filter=ReportFilter(metadata_values={}, tag_values=[]),
|
||||
agg=CounterAgg.NONE,
|
||||
title="Census Income Dataset (Adult)",
|
||||
)
|
||||
)
|
||||
# project.dashboard.add_panel(
|
||||
# DashboardPanelCounter(
|
||||
# title="Model Calls",
|
||||
# filter=ReportFilter(metadata_values={}, tag_values=[]),
|
||||
# value=PanelValue(
|
||||
# metric_id="DatasetMissingValuesMetric",
|
||||
# field_path=DatasetMissingValuesMetric.fields.current.number_of_rows,
|
||||
# legend="count",
|
||||
# ),
|
||||
# text="count",
|
||||
# agg=CounterAgg.SUM,
|
||||
# size=1,
|
||||
# )
|
||||
# )
|
||||
# project.dashboard.add_panel(
|
||||
# DashboardPanelCounter(
|
||||
# title="Share of Drifted Features",
|
||||
# filter=ReportFilter(metadata_values={}, tag_values=[]),
|
||||
# value=PanelValue(
|
||||
# metric_id="DatasetDriftMetric",
|
||||
# field_path="share_of_drifted_columns",
|
||||
# legend="share",
|
||||
# ),
|
||||
# text="share",
|
||||
# agg=CounterAgg.LAST,
|
||||
# size=1,
|
||||
# )
|
||||
# )
|
||||
# project.dashboard.add_panel(
|
||||
# DashboardPanelPlot(
|
||||
# title="Dataset Quality",
|
||||
# filter=ReportFilter(metadata_values={}, tag_values=[]),
|
||||
# values=[
|
||||
# PanelValue(metric_id="DatasetDriftMetric", field_path="share_of_drifted_columns", legend="Drift Share"),
|
||||
# PanelValue(
|
||||
# metric_id="DatasetMissingValuesMetric",
|
||||
# field_path=DatasetMissingValuesMetric.fields.current.share_of_missing_values,
|
||||
# legend="Missing Values Share",
|
||||
# ),
|
||||
# ],
|
||||
# plot_type=PlotType.LINE,
|
||||
# )
|
||||
# )
|
||||
# project.dashboard.add_panel(
|
||||
# DashboardPanelPlot(
|
||||
# title="Age: Wasserstein drift distance",
|
||||
# filter=ReportFilter(metadata_values={}, tag_values=[]),
|
||||
# values=[
|
||||
# PanelValue(
|
||||
# metric_id="ColumnDriftMetric",
|
||||
# metric_args={"column_name.name": "age"},
|
||||
# field_path=ColumnDriftMetric.fields.drift_score,
|
||||
# legend="Drift Score",
|
||||
# ),
|
||||
# ],
|
||||
# plot_type=PlotType.BAR,
|
||||
# size=1,
|
||||
# )
|
||||
# )
|
||||
# project.dashboard.add_panel(
|
||||
# DashboardPanelPlot(
|
||||
# title="Education-num: Wasserstein drift distance",
|
||||
# filter=ReportFilter(metadata_values={}, tag_values=[]),
|
||||
# values=[
|
||||
# PanelValue(
|
||||
# metric_id="ColumnDriftMetric",
|
||||
# metric_args={"column_name.name": "education-num"},
|
||||
# field_path=ColumnDriftMetric.fields.drift_score,
|
||||
# legend="Drift Score",
|
||||
# ),
|
||||
# ],
|
||||
# plot_type=PlotType.BAR,
|
||||
# size=1,
|
||||
# )
|
||||
# )
|
||||
project.save()
|
||||
return project
|
||||
|
||||
|
||||
def create_demo_project(workspace: str):
|
||||
ws = Workspace.create(workspace)
|
||||
project = create_project(ws)
|
||||
|
||||
for i in range(0, 5):
|
||||
report = create_report(i=i)
|
||||
ws.add_report(project.id, report)
|
||||
|
||||
test_suite = create_test_suite(i=i)
|
||||
ws.add_test_suite(project.id, test_suite)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_demo_project(WORKSPACE)
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
boto3==1.28.41
|
||||
evidently==0.4.4
|
||||
pre-commit==3.3.3
|
||||
sphinx==7.2.5
|
||||
sphinx_rtd_theme==1.3.0
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
boto3==1.28.41
|
||||
evidently==0.4.4
|
||||
evidently==0.4.6
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue