mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
130 lines
3.8 KiB
Python
130 lines
3.8 KiB
Python
"""
|
|
Generate metrics and enable regeneration of metrics if new metrics are generated
|
|
Key tasks:
|
|
- Specify metric functions that take in prediction vs actual to generate a metric value
|
|
- Given a model and test data, produce a suite of all metrics
|
|
"""
|
|
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
from core.Settings import (
|
|
RESIDUAL_TRUE_LABEL,
|
|
RESIDUAL_PREDICTION_LABEL,
|
|
SEABORN_RESIDUAL_AXIS_FONTSIZE,
|
|
SEABORN_RESIDUAL_TITLE_FONTSIZE,
|
|
SEABORN_RESIDUAL_STYLE,
|
|
SEABORN_RESIDUAL_ASPECT_RATIO,
|
|
SEABORN_RESIDUAL_PLOT_DPI,
|
|
SEABORN_RESIDUAL_RANGE,
|
|
SEABORN_RESIDUAL_LINE_COLOUR,
|
|
SEABORN_RESIDUAL_LINE_WIDTH,
|
|
)
|
|
from sklearn.metrics import (
|
|
mean_absolute_error,
|
|
median_absolute_error,
|
|
mean_squared_error,
|
|
mean_absolute_percentage_error,
|
|
)
|
|
|
|
|
|
# Dummy example of new metric that can be added - must be true and prediction as arguments
|
|
def max_error(y_true: pd.Series, y_pred: pd.Series):
|
|
return max(y_true - y_pred)
|
|
|
|
|
|
METRIC_TO_APPLY = [
|
|
mean_absolute_error,
|
|
median_absolute_error,
|
|
mean_squared_error,
|
|
mean_absolute_percentage_error,
|
|
# max_error
|
|
]
|
|
|
|
|
|
def sort_by_metric(
|
|
data: pd.DataFrame, optimse_metric: str, best_model_column_name: str
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Helper function to sort data frame by metric and append a best model flag
|
|
"""
|
|
# Ascending as we want lowest error values
|
|
data = data.sort_values(optimse_metric, ascending=True).reset_index(drop=True)
|
|
data[best_model_column_name] = [False] * len(data)
|
|
data.loc[0, best_model_column_name] = True
|
|
|
|
return data
|
|
|
|
|
|
class Metrics:
|
|
"""
|
|
All metric functions used to generate a dictionary of metrics
|
|
"""
|
|
|
|
@staticmethod
|
|
def list_metric_functions() -> list:
|
|
"""
|
|
Gather all metric functions to run
|
|
"""
|
|
return [metric_to_apply.__name__ for metric_to_apply in METRIC_TO_APPLY]
|
|
|
|
@staticmethod
|
|
def generate_metric_suite(actuals: pd.Series, predictions: pd.Series) -> pd.Series:
|
|
"""
|
|
For the model, test data and target, generate predictions and then iterative over all metrics to generate a Series of metric values
|
|
"""
|
|
|
|
metric_dict = {}
|
|
for metric_function in METRIC_TO_APPLY:
|
|
metric_dict[metric_function.__name__] = metric_function(
|
|
actuals, predictions
|
|
)
|
|
|
|
metrics = pd.Series(metric_dict)
|
|
|
|
return metrics
|
|
|
|
@staticmethod
|
|
def generate_plot_suite():
|
|
"""
|
|
Can do all metric ploting
|
|
"""
|
|
|
|
@staticmethod
|
|
def generate_residual_plot(
|
|
actuals: pd.Series,
|
|
predictions: pd.Series,
|
|
target_column: str,
|
|
output_filepath: Path | str,
|
|
):
|
|
|
|
# TODO: can have a model.metric_outputs method
|
|
# FOr not just do it here
|
|
residual_df = pd.DataFrame(
|
|
list(zip(actuals, predictions)),
|
|
columns=[RESIDUAL_TRUE_LABEL, RESIDUAL_PREDICTION_LABEL],
|
|
)
|
|
|
|
# image formatting
|
|
sns.set(style=SEABORN_RESIDUAL_STYLE)
|
|
ax = sns.scatterplot(
|
|
x=RESIDUAL_TRUE_LABEL, y=RESIDUAL_PREDICTION_LABEL, data=residual_df
|
|
)
|
|
ax.set_aspect(SEABORN_RESIDUAL_ASPECT_RATIO)
|
|
ax.set_xlabel(f"True {target_column}", fontsize=SEABORN_RESIDUAL_AXIS_FONTSIZE)
|
|
ax.set_ylabel(
|
|
f"Predicted {target_column}", fontsize=SEABORN_RESIDUAL_AXIS_FONTSIZE
|
|
) # ylabel
|
|
ax.set_title("Residuals", fontsize=SEABORN_RESIDUAL_TITLE_FONTSIZE)
|
|
|
|
# Square aspect ratio
|
|
ax.plot(
|
|
SEABORN_RESIDUAL_RANGE,
|
|
SEABORN_RESIDUAL_RANGE,
|
|
SEABORN_RESIDUAL_LINE_COLOUR,
|
|
linewidth=SEABORN_RESIDUAL_LINE_WIDTH,
|
|
)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(output_filepath, dpi=SEABORN_RESIDUAL_PLOT_DPI)
|