add eda script bits

This commit is contained in:
Michael Duong 2023-10-03 23:01:17 +00:00
parent 961773f58a
commit bcd2383d8d

View file

@ -211,7 +211,7 @@ cosine_similarity_df = mix_df[
]
from sklearn.metrics.pairwise import cosine_similarity
row_index = 12624
row_index = 58199
from sklearn.preprocessing import LabelEncoder
@ -224,6 +224,8 @@ cosine_similarity_df[object_columns.columns] = cosine_similarity_df[
feature_vector = cosine_similarity_df.loc[[row_index]]
cosine_similarity_df["cosine"] = cosine_similarity(cosine_similarity_df, feature_vector)
similar_index = cosine_similarity_df[cosine_similarity_df["cosine"] > 0.997].index
similar_index = (
cosine_similarity_df.sort_values("cosine", ascending=False).head(5).index
)
check_df = mix_df.loc[similar_index]