import pickle import pandas as pd import numpy as np from model_data.EpcClean import EpcClean class UvalueEstimations: def __init__(self, data: list): """ Initialize the UvalueEstimations class. :param data: The input data as a list of dictionaries, to be converted to a dataframe """ self.data = pd.DataFrame(data) self.walls = None self.walls_decile_data = {} self.roofs = None self.floors = None self.floors_decile_data = {} def get_estimates(self, cleaner: EpcClean): """ Calculate U-value estimates for walls, roofs, and floors. :param cleaner: An instance of the EpcClean class used for cleaning data. """ self.set_walls(cleaner) self.set_roofs(cleaner) self.set_floors(cleaner) def set_walls(self, cleaner: EpcClean): """ Set U-value estimates for walls. :param cleaner: An instance of the EpcClean class used for cleaning data. """ walls_columns = [ "local-authority", "property-type", "walls-description", "walls-energy-eff", "walls-env-eff", "built-form", "total-floor-area", "number-habitable-rooms", "number-heated-rooms" ] walls_df = self.data[self.data["walls-description"].str.contains("Average thermal transmittance")] # Take just the columns we want walls_df = walls_df[walls_columns] walls_df["total-floor-area"] = walls_df["total-floor-area"].astype(float) walls_df, decile_labels, decile_boundaries = self.classify_into_deciles(walls_df, "total-floor-area") # We now get the U-values walls_df = walls_df.merge( pd.DataFrame(cleaner.cleaned['walls-description'])[["original_description", "thermal_transmittance"]], how="left", right_on="original_description", left_on="walls-description" ) u_value_summary = walls_df.groupby( [ "local-authority", "property-type", "walls-energy-eff", "walls-env-eff", "built-form", "number-habitable-rooms", "number-heated-rooms", "total-floor-area_group" ], observed=True ).agg({"thermal_transmittance": ["median", "size"]}).reset_index() u_value_summary.columns = [ "local-authority", "property-type", "walls-energy-eff", "walls-env-eff", "built-form", "number-habitable-rooms", "number-heated-rooms", "total-floor-area_group", "median_thermal_transmittance", "n_samples" ] self.walls = u_value_summary self.walls_decile_data = { "decile_labels": decile_labels, "decile_boundaries": decile_boundaries } def set_roofs(self, cleaner: EpcClean): """ Set U-value estimates for roofs. :param cleaner: An instance of the EpcClean class used for cleaning data. """ pass def set_floors(self, cleaner: EpcClean): """ Set U-value estimates for floors. :param cleaner: An instance of the EpcClean class used for cleaning data. """ floors_columns = [ "local-authority", "property-type", "floor-description", "floor-energy-eff", "floor-env-eff", "built-form", "total-floor-area", "number-habitable-rooms", "number-heated-rooms" ] floors_df = self.data[self.data["floor-description"].str.contains("Average thermal transmittance")] # Take just the columns we want floors_df = floors_df[floors_columns] floors_df["total-floor-area"] = floors_df["total-floor-area"].astype(float) floors_df, decile_labels, decile_boundaries = self.classify_into_deciles(floors_df, "total-floor-area") # We now get the U-values floors_df = floors_df.merge( pd.DataFrame(cleaner.cleaned['floor-description'])[["original_description", "thermal_transmittance"]], how="left", right_on="original_description", left_on="floor-description" ) u_value_summary = floors_df.groupby( [ "local-authority", "property-type", "floor-energy-eff", "floor-env-eff", "built-form", "number-habitable-rooms", "number-heated-rooms", "total-floor-area_group" ], observed=True ).agg({"thermal_transmittance": ["median", "size"]}).reset_index() u_value_summary.columns = [ "local-authority", "property-type", "floor-energy-eff", "floor-env-eff", "built-form", "number-habitable-rooms", "number-heated-rooms", "total-floor-area_group", "median_thermal_transmittance", "n_samples" ] self.floors = u_value_summary self.floors_decile_data = { "decile_labels": decile_labels, "decile_boundaries": decile_boundaries } @staticmethod def classify_into_deciles(df: pd.DataFrame, column: str) -> (pd.DataFrame, list, list): """ Break a column in a Pandas DataFrame into deciles and classify new values into the existing deciles. :param df: The input Pandas DataFrame. :param column: The column name to break into deciles. :return: A tuple containing: - The DataFrame with the decile group column. - The list of decile labels. - The list of decile boundaries. """ # Calculate decile boundaries decile_boundaries = np.percentile(df[column], np.arange(0, 101, 10)) # Create decile labels decile_labels = [f"Decile {i + 1}" for i in range(10)] # Assign decile labels to existing values df[column + "_group"] = pd.cut(df[column], bins=decile_boundaries, labels=decile_labels, include_lowest=True) return df, decile_labels, decile_boundaries @staticmethod def classify_decile_newvalues(decile_boundaries, decile_labels, new_values: list) -> list: """ Classify new values into existing deciles based on decile definitions. :param decile_boundaries: The list of decile boundaries. :param decile_labels: The list of decile labels. :param new_values: A list of new values to classify. :return: The classifications for the new values as a list. """ # Classify new values based on decile definitions classifications = pd.cut(new_values, bins=decile_boundaries, labels=decile_labels, include_lowest=True) return classifications.tolist() def _save(self, filename): """ Useful utility function to store this object, which is particularly handy for unit testing :return: """ with open(filename, 'wb') as f: pickle.dump(self, f)