from utils.s3 import read_from_s3 from backend.app.config import get_settings import msgpack def get_cleaned(): """ This function will retrieve the cleaned dataset from s3 which has the cleaned descriptions for the epc dataset This data is stored in MessagePack format and therefore needs to be decoded :return: """ cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT) ) cleaned = msgpack.unpackb(cleaned, raw=False) return cleaned