Model/model_data/analysis/UvalueEstimations.py
Khalim Conn-Kowlessar 4dfa32ca1f new model data pickle
2023-06-27 11:30:51 +01:00

207 lines
No EOL
7.1 KiB
Python

import pickle
import pandas as pd
import numpy as np
from model_data.EpcClean import EpcClean
class UvalueEstimations:
def __init__(self, data: list):
"""
Initialize the UvalueEstimations class.
:param data: The input data as a list of dictionaries, to be converted to a dataframe
"""
self.data = pd.DataFrame(data)
self.walls = None
self.walls_decile_data = {}
self.roofs = None
self.floors = None
self.floors_decile_data = {}
def get_estimates(self, cleaner: EpcClean):
"""
Calculate U-value estimates for walls, roofs, and floors.
:param cleaner: An instance of the EpcClean class used for cleaning data.
"""
self.set_walls(cleaner)
self.set_roofs(cleaner)
self.set_floors(cleaner)
def set_walls(self, cleaner: EpcClean):
"""
Set U-value estimates for walls.
:param cleaner: An instance of the EpcClean class used for cleaning data.
"""
walls_columns = [
"local-authority", "property-type", "walls-description", "walls-energy-eff", "walls-env-eff", "built-form",
"total-floor-area", "number-habitable-rooms", "number-heated-rooms"
]
walls_df = self.data[self.data["walls-description"].str.contains("Average thermal transmittance")]
# Take just the columns we want
walls_df = walls_df[walls_columns]
walls_df["total-floor-area"] = walls_df["total-floor-area"].astype(float)
walls_df, decile_labels, decile_boundaries = self.classify_into_deciles(walls_df, "total-floor-area")
# We now get the U-values
walls_df = walls_df.merge(
pd.DataFrame(cleaner.cleaned['walls-description'])[["original_description", "thermal_transmittance"]],
how="left",
right_on="original_description",
left_on="walls-description"
)
u_value_summary = walls_df.groupby(
[
"local-authority",
"property-type",
"walls-energy-eff",
"walls-env-eff",
"built-form",
"number-habitable-rooms",
"number-heated-rooms",
"total-floor-area_group"
],
observed=True
).agg({"thermal_transmittance": ["median", "size"]}).reset_index()
u_value_summary.columns = [
"local-authority",
"property-type",
"walls-energy-eff",
"walls-env-eff",
"built-form",
"number-habitable-rooms",
"number-heated-rooms",
"total-floor-area_group",
"median_thermal_transmittance",
"n_samples"
]
self.walls = u_value_summary
self.walls_decile_data = {
"decile_labels": decile_labels,
"decile_boundaries": decile_boundaries
}
def set_roofs(self, cleaner: EpcClean):
"""
Set U-value estimates for roofs.
:param cleaner: An instance of the EpcClean class used for cleaning data.
"""
pass
def set_floors(self, cleaner: EpcClean):
"""
Set U-value estimates for floors.
:param cleaner: An instance of the EpcClean class used for cleaning data.
"""
floors_columns = [
"local-authority", "property-type", "floor-description", "floor-energy-eff", "floor-env-eff",
"built-form",
"total-floor-area", "number-habitable-rooms", "number-heated-rooms"
]
floors_df = self.data[self.data["floor-description"].str.contains("Average thermal transmittance")]
# Take just the columns we want
floors_df = floors_df[floors_columns]
floors_df["total-floor-area"] = floors_df["total-floor-area"].astype(float)
floors_df, decile_labels, decile_boundaries = self.classify_into_deciles(floors_df, "total-floor-area")
# We now get the U-values
floors_df = floors_df.merge(
pd.DataFrame(cleaner.cleaned['floor-description'])[["original_description", "thermal_transmittance"]],
how="left",
right_on="original_description",
left_on="floor-description"
)
u_value_summary = floors_df.groupby(
[
"local-authority",
"property-type",
"floor-energy-eff",
"floor-env-eff",
"built-form",
"number-habitable-rooms",
"number-heated-rooms",
"total-floor-area_group"
],
observed=True
).agg({"thermal_transmittance": ["median", "size"]}).reset_index()
u_value_summary.columns = [
"local-authority",
"property-type",
"floor-energy-eff",
"floor-env-eff",
"built-form",
"number-habitable-rooms",
"number-heated-rooms",
"total-floor-area_group",
"median_thermal_transmittance",
"n_samples"
]
self.floors = u_value_summary
self.floors_decile_data = {
"decile_labels": decile_labels,
"decile_boundaries": decile_boundaries
}
@staticmethod
def classify_into_deciles(df: pd.DataFrame, column: str) -> (pd.DataFrame, list, list):
"""
Break a column in a Pandas DataFrame into deciles and classify new values into the existing deciles.
:param df: The input Pandas DataFrame.
:param column: The column name to break into deciles.
:return: A tuple containing:
- The DataFrame with the decile group column.
- The list of decile labels.
- The list of decile boundaries.
"""
# Calculate decile boundaries
decile_boundaries = np.percentile(df[column], np.arange(0, 101, 10))
# Create decile labels
decile_labels = [f"Decile {i + 1}" for i in range(10)]
# Assign decile labels to existing values
df[column + "_group"] = pd.cut(df[column], bins=decile_boundaries, labels=decile_labels,
include_lowest=True)
return df, decile_labels, decile_boundaries
@staticmethod
def classify_decile_newvalues(decile_boundaries, decile_labels, new_values: list) -> list:
"""
Classify new values into existing deciles based on decile definitions.
:param decile_boundaries: The list of decile boundaries.
:param decile_labels: The list of decile labels.
:param new_values: A list of new values to classify.
:return: The classifications for the new values as a list.
"""
# Classify new values based on decile definitions
classifications = pd.cut(new_values, bins=decile_boundaries, labels=decile_labels, include_lowest=True)
return classifications.tolist()
def _save(self, filename):
"""
Useful utility function to store this object, which is particularly handy for unit testing
:return:
"""
with open(filename, 'wb') as f:
pickle.dump(self, f)