mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
add eda code for nowA
This commit is contained in:
parent
c0d73d8b9e
commit
0386346c67
1 changed files with 52 additions and 0 deletions
|
|
@ -175,3 +175,55 @@ plot_permutation_importance(exp, fig_kw={"figwidth": 7, "figheight": 6})
|
|||
# Use shap package to explain why 9158 has a 35 prediction when its sap ending is 96
|
||||
#
|
||||
#
|
||||
|
||||
from core.MLModels import model_factory
|
||||
from core.DataClient import dataclient_factory
|
||||
import pandas as pd
|
||||
from config import settings
|
||||
|
||||
client_params = settings.client
|
||||
prepare_data_params = settings.prepare_data
|
||||
feature_process_params = settings.feature_processor
|
||||
build_model_params = settings.build_model
|
||||
generate_predictions_params = settings.generate_predictions
|
||||
prediction_analysis_params = settings.prediction_analysis
|
||||
model = model_factory(build_model_params["model_type"])
|
||||
model.load_model(build_model_params["model_save_filepath"])
|
||||
dataclient_type = prediction_analysis_params["dataclient_type"]
|
||||
dataclient = dataclient_factory(
|
||||
dataclient_type=dataclient_type,
|
||||
dataclient_config=client_params[dataclient_type],
|
||||
)
|
||||
|
||||
target = feature_process_params["feature_processor_config"]["target"]
|
||||
predictions_column_name = generate_predictions_params["predictions_column_name"]
|
||||
output_test_filepath = prepare_data_params["output_test_filepath"]
|
||||
predictions_output_filepath = generate_predictions_params["predictions_output_filepath"]
|
||||
|
||||
test_df = dataclient.load_data(output_test_filepath)
|
||||
predictions = dataclient.load_data(predictions_output_filepath)
|
||||
mix_df = pd.concat([test_df.copy(), predictions], axis=1)
|
||||
mix_df["residual"] = abs(mix_df[predictions_column_name] - mix_df[target])
|
||||
mix_df = mix_df.sort_values("residual", ascending=False)
|
||||
|
||||
cosine_similarity_df = mix_df[
|
||||
mix_df.columns.difference(["predictions", "residual", "SAP_ENDING"])
|
||||
]
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
row_index = 12624
|
||||
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
||||
object_columns = cosine_similarity_df.select_dtypes(["object"])
|
||||
|
||||
cosine_similarity_df[object_columns.columns] = cosine_similarity_df[
|
||||
object_columns.columns
|
||||
].apply(LabelEncoder().fit_transform)
|
||||
|
||||
feature_vector = cosine_similarity_df.loc[[row_index]]
|
||||
|
||||
cosine_similarity_df["cosine"] = cosine_similarity(cosine_similarity_df, feature_vector)
|
||||
similar_index = cosine_similarity_df[cosine_similarity_df["cosine"] > 0.997].index
|
||||
|
||||
check_df = mix_df.loc[similar_index]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue