import pandas as pd from backend.Property import Property from utils.s3 import read_from_s3 from recommendations.recommendation_utils import get_wall_u_value, get_floor_u_value, get_roof_u_value from backend.app.config import get_settings import msgpack def get_cleaned(): """ This function will retrieve the cleaned dataset from s3 which has the cleaned descriptions for the epc dataset This data is stored in MessagePack format and therefore needs to be decoded :return: """ cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT) ) cleaned = msgpack.unpackb(cleaned, raw=False) return cleaned