First commit of UvalueEstimations class

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-22 10:06:28 +01:00
parent 537caa410c
commit f5bebd4b78
3 changed files with 102 additions and 3 deletions

View file

@ -0,0 +1,99 @@
import pandas as pd
import numpy as np
from model_data.EpcClean import EpcClean
class UvalueEstimations:
def __init__(self, data):
self.data = pd.DataFrame(data)
self.walls = None
self.walls_decile_data = {}
self.roofs = None
self.floors = None
def set_walls(self, cleaner: EpcClean):
walls_columns = [
"local-authority", "property-type", "walls-description", "walls-energy-eff", "walls-env-eff",
"total-floor-area", "number-habitable-rooms", "number-heated-rooms"
]
walls_df = self.data[self.data["walls-description"].str.contains("Average thermal transmittance")]
# Take just the columns we want
walls_df = walls_df[walls_columns]
walls_df["total-floor-area"] = walls_df["total-floor-area"].astype(float)
walls_df, decile_labels, decile_boundaries = self.classify_into_deciles(walls_df, "total-floor-area")
# We now get the U-values
walls_df = walls_df.merge(
pd.DataFrame(cleaner.cleaned['walls-description'])[["original_description", "thermal_transmittance"]],
how="left",
right_on="original_description",
left_on="walls-description"
)
u_value_summary = walls_df.groupby(
[
"local-authority",
"property-type",
"walls-energy-eff",
"walls-env-eff",
"number-habitable-rooms",
"number-heated-rooms",
"total-floor-area_group"
],
observed=True
).agg({"thermal_transmittance": ["median", "size"]}).reset_index()
u_value_summary.columns = [
"local-authority",
"property-type",
"walls-energy-eff",
"walls-env-eff",
"number-habitable-rooms",
"number-heated-rooms",
"total-floor-area_group",
"median_thermal_transmittance",
"n_samples"
]
self.walls = u_value_summary
self.walls_decile_data = {
"decile_labels": decile_labels,
"decile_boundaries": decile_boundaries
}
@staticmethod
def classify_into_deciles(df: pd.DataFrame, column: str) -> (pd.DataFrame, list, list):
"""
Break a column in a Pandas DataFrame into deciles and classify new values into the existing deciles.
Args:
df: The input Pandas DataFrame.
column: The column name to break into deciles.
new_values: A list of new values to classify.
Returns:
A list of classifications for the new values.
"""
# Calculate decile boundaries
decile_boundaries = np.percentile(df[column], np.arange(0, 101, 10))
# Create decile labels
decile_labels = [f"Decile {i + 1}" for i in range(10)]
# Assign decile labels to existing values
df[column + "_group"] = pd.cut(df[column], bins=decile_boundaries, labels=decile_labels,
include_lowest=True)
return df, decile_labels, decile_boundaries
@staticmethod
def classify_decile_newvalues(decile_boundaries, decile_labels, new_values: list) -> list:
# Classify new values based on decile definitions
classifications = pd.cut(new_values, bins=decile_boundaries, labels=decile_labels, include_lowest=True)
return classifications.tolist()

View file

View file

@ -154,9 +154,6 @@ def handler():
# We need to deduce a U-value for "Good" energy effieciency
df = pd.DataFrame(data)
df = df[df["walls-description"].str.contains("Average thermal transmittance")]
mainheating = pd.DataFrame(
[{"address1": p.address1, "postcode": p.postcode, **p.main_heating} for p in input_properties])
hotwater = pd.DataFrame([{"address1": p.address1, **p.hotwater} for p in input_properties])
@ -167,3 +164,6 @@ def handler():
# 'Flat 28, 22 Adelina Grove' 'Solid brick, as built, insulated (assumed)'
# so to do this, filter on the local authority code and property type, where we have U
# values for the wall and take a median!
p = input_properties[6]
df = pd.DataFrame(data)