diff --git a/model_data/analysis/UvalueEstimations.py b/model_data/analysis/UvalueEstimations.py new file mode 100644 index 00000000..06049c63 --- /dev/null +++ b/model_data/analysis/UvalueEstimations.py @@ -0,0 +1,99 @@ +import pandas as pd +import numpy as np +from model_data.EpcClean import EpcClean + + +class UvalueEstimations: + + def __init__(self, data): + self.data = pd.DataFrame(data) + self.walls = None + self.walls_decile_data = {} + self.roofs = None + self.floors = None + + def set_walls(self, cleaner: EpcClean): + walls_columns = [ + "local-authority", "property-type", "walls-description", "walls-energy-eff", "walls-env-eff", + "total-floor-area", "number-habitable-rooms", "number-heated-rooms" + ] + + walls_df = self.data[self.data["walls-description"].str.contains("Average thermal transmittance")] + + # Take just the columns we want + walls_df = walls_df[walls_columns] + walls_df["total-floor-area"] = walls_df["total-floor-area"].astype(float) + + walls_df, decile_labels, decile_boundaries = self.classify_into_deciles(walls_df, "total-floor-area") + + # We now get the U-values + walls_df = walls_df.merge( + pd.DataFrame(cleaner.cleaned['walls-description'])[["original_description", "thermal_transmittance"]], + how="left", + right_on="original_description", + left_on="walls-description" + ) + + u_value_summary = walls_df.groupby( + [ + "local-authority", + "property-type", + "walls-energy-eff", + "walls-env-eff", + "number-habitable-rooms", + "number-heated-rooms", + "total-floor-area_group" + ], + observed=True + ).agg({"thermal_transmittance": ["median", "size"]}).reset_index() + + u_value_summary.columns = [ + "local-authority", + "property-type", + "walls-energy-eff", + "walls-env-eff", + "number-habitable-rooms", + "number-heated-rooms", + "total-floor-area_group", + "median_thermal_transmittance", + "n_samples" + ] + + self.walls = u_value_summary + self.walls_decile_data = { + "decile_labels": decile_labels, + "decile_boundaries": decile_boundaries + } + + @staticmethod + def classify_into_deciles(df: pd.DataFrame, column: str) -> (pd.DataFrame, list, list): + """ + Break a column in a Pandas DataFrame into deciles and classify new values into the existing deciles. + + Args: + df: The input Pandas DataFrame. + column: The column name to break into deciles. + new_values: A list of new values to classify. + + Returns: + A list of classifications for the new values. + + """ + + # Calculate decile boundaries + decile_boundaries = np.percentile(df[column], np.arange(0, 101, 10)) + + # Create decile labels + decile_labels = [f"Decile {i + 1}" for i in range(10)] + + # Assign decile labels to existing values + df[column + "_group"] = pd.cut(df[column], bins=decile_boundaries, labels=decile_labels, + include_lowest=True) + + return df, decile_labels, decile_boundaries + + @staticmethod + def classify_decile_newvalues(decile_boundaries, decile_labels, new_values: list) -> list: + # Classify new values based on decile definitions + classifications = pd.cut(new_values, bins=decile_boundaries, labels=decile_labels, include_lowest=True) + return classifications.tolist() diff --git a/model_data/analysis/__init__.py b/model_data/analysis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/model_data/app.py b/model_data/app.py index 6f8209d5..5ff71816 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -154,9 +154,6 @@ def handler(): # We need to deduce a U-value for "Good" energy effieciency - df = pd.DataFrame(data) - df = df[df["walls-description"].str.contains("Average thermal transmittance")] - mainheating = pd.DataFrame( [{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties]) hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties]) @@ -167,3 +164,6 @@ def handler(): # 'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)' # so to do this, filter on the local authority code and property type, where we have U # values for the wall and take a median! + + p = input_properties[6] + df = pd.DataFrame(data)