import pandas as pd import numpy as np from model_data.EpcClean import EpcClean class UvalueEstimations: def __init__(self, data): self.data = pd.DataFrame(data) self.walls = None self.walls_decile_data = {} self.roofs = None self.floors = None def set_walls(self, cleaner: EpcClean): walls_columns = [ "local-authority", "property-type", "walls-description", "walls-energy-eff", "walls-env-eff", "total-floor-area", "number-habitable-rooms", "number-heated-rooms" ] walls_df = self.data[self.data["walls-description"].str.contains("Average thermal transmittance")] # Take just the columns we want walls_df = walls_df[walls_columns] walls_df["total-floor-area"] = walls_df["total-floor-area"].astype(float) walls_df, decile_labels, decile_boundaries = self.classify_into_deciles(walls_df, "total-floor-area") # We now get the U-values walls_df = walls_df.merge( pd.DataFrame(cleaner.cleaned['walls-description'])[["original_description", "thermal_transmittance"]], how="left", right_on="original_description", left_on="walls-description" ) u_value_summary = walls_df.groupby( [ "local-authority", "property-type", "walls-energy-eff", "walls-env-eff", "number-habitable-rooms", "number-heated-rooms", "total-floor-area_group" ], observed=True ).agg({"thermal_transmittance": ["median", "size"]}).reset_index() u_value_summary.columns = [ "local-authority", "property-type", "walls-energy-eff", "walls-env-eff", "number-habitable-rooms", "number-heated-rooms", "total-floor-area_group", "median_thermal_transmittance", "n_samples" ] self.walls = u_value_summary self.walls_decile_data = { "decile_labels": decile_labels, "decile_boundaries": decile_boundaries } @staticmethod def classify_into_deciles(df: pd.DataFrame, column: str) -> (pd.DataFrame, list, list): """ Break a column in a Pandas DataFrame into deciles and classify new values into the existing deciles. Args: df: The input Pandas DataFrame. column: The column name to break into deciles. new_values: A list of new values to classify. Returns: A list of classifications for the new values. """ # Calculate decile boundaries decile_boundaries = np.percentile(df[column], np.arange(0, 101, 10)) # Create decile labels decile_labels = [f"Decile {i + 1}" for i in range(10)] # Assign decile labels to existing values df[column + "_group"] = pd.cut(df[column], bins=decile_boundaries, labels=decile_labels, include_lowest=True) return df, decile_labels, decile_boundaries @staticmethod def classify_decile_newvalues(decile_boundaries, decile_labels, new_values: list) -> list: # Classify new values based on decile definitions classifications = pd.cut(new_values, bins=decile_boundaries, labels=decile_labels, include_lowest=True) return classifications.tolist()