Model/utils/uvalue_estimates.py
Khalim Conn-Kowlessar deda5bc025 documented function
2023-07-20 15:50:12 +01:00

39 lines
1.7 KiB
Python

from typing import List
from bisect import bisect_left
def classify_decile_newvalues(
decile_boundaries: List[float], decile_labels: List[str], new_values: List[float]
) -> List[str]:
"""
Classify a list of new values into pre-established deciles.
This function is an alternative to UvalueEstimations.classify_decile_newvalues that does not depend on pandas,
making it suitable for use in environments where pandas may not be available (such as AWS Lambda).
:param decile_boundaries: A list of decile boundaries. These define the ranges of the deciles.
:param decile_labels: A list of labels for the deciles. These are the classifications to be assigned to the values.
:param new_values: A list of new values to be classified into the deciles.
:return: A list of classifications for the new values. Each classification corresponds to the decile in which
the respective new value falls. If a value falls outside the range of the deciles, its classification is
None.
"""
classifications = []
# For each new value...
for value in new_values:
# If the value is outside the range of the deciles, classify it as None
if value < decile_boundaries[0] or value > decile_boundaries[-1]:
classifications.append(None)
else:
# Use bisect_left to find the decile in which the value falls
i = bisect_left(decile_boundaries, value)
# If the value falls exactly on a decile boundary, classify it in the lower decile
if i:
i -= 1
# Append the classification to the list of classifications
classifications.append(decile_labels[i])
return classifications