mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
cleaning columns for stonewater clustering
This commit is contained in:
parent
e9366c72e8
commit
4e85d1380e
1 changed files with 62 additions and 0 deletions
|
|
@ -1668,6 +1668,7 @@ def compile_data_final():
|
|||
'lighting-description': LightingAttributes
|
||||
}
|
||||
for variable_to_clean in cleaned.keys():
|
||||
|
||||
unique_descriptions = property_attributes[variable_to_clean].unique()
|
||||
clean_df = pd.DataFrame(cleaned[variable_to_clean])
|
||||
# Check if we have any
|
||||
|
|
@ -1686,6 +1687,67 @@ def compile_data_final():
|
|||
}
|
||||
descriptions_to_append.append(to_append)
|
||||
|
||||
descriptions_to_append = pd.DataFrame(descriptions_to_append)
|
||||
clean_df = pd.concat([clean_df, descriptions_to_append])
|
||||
|
||||
starting_size = len(property_attributes)
|
||||
property_attributes = property_attributes.merge(
|
||||
clean_df, how="left", left_on=variable_to_clean, right_on="original_description"
|
||||
)
|
||||
if starting_size != property_attributes.shape[0]:
|
||||
raise Exception("something went wrong")
|
||||
property_attributes = property_attributes.drop(columns=["original_description", "clean_description"])
|
||||
# Fill missings
|
||||
for k in clean_df.columns:
|
||||
if k in property_attributes.columns:
|
||||
property_attributes[k] = property_attributes[k].fillna("missing")
|
||||
|
||||
# We group some variables such as thermal transmittance for walls, roof, floors
|
||||
ranges = {
|
||||
"< 0.1": (0, 0.1),
|
||||
"0.1 - 0.3": (0.1, 0.3),
|
||||
"0.3 - 0.5": (0.3, 0.5),
|
||||
"0.5 - 0.7": (0.5, 0.7),
|
||||
"0.9 - 1": (0.9, 1),
|
||||
"1 - 1.5": (1, 1.5),
|
||||
"1.5 - 2": (1.5, 2),
|
||||
"2+": (2, 2.5)
|
||||
}
|
||||
|
||||
# Generate the lookup table
|
||||
thermal_transmittance_lookup_table = []
|
||||
for i in range(1, 251):
|
||||
value = i / 100
|
||||
for label, (low, high) in ranges.items():
|
||||
if low < value <= high:
|
||||
thermal_transmittance_lookup_table.append({"from": value, "to": label})
|
||||
break
|
||||
|
||||
# Convert to DataFrame for display
|
||||
thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
|
||||
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
|
||||
|
||||
thermal_transmittance_cols = [
|
||||
c for c in property_attributes.columns if "thermal_transmittance" in c and "unit" not in c
|
||||
]
|
||||
for i, col in enumerate(thermal_transmittance_cols):
|
||||
# Perform the mapping
|
||||
to_col = f"to_{i}"
|
||||
property_attributes[col] = property_attributes[col].astype(str)
|
||||
property_attributes = property_attributes.merge(
|
||||
thermal_transmittance_lookup_table.rename(columns={"to": to_col}),
|
||||
how="left",
|
||||
left_on=col,
|
||||
right_on="from",
|
||||
suffixes=("", f"_{i}")
|
||||
)
|
||||
property_attributes = property_attributes.drop(columns=["from", col])
|
||||
property_attributes[to_col] = property_attributes[to_col].fillna("unknown")
|
||||
|
||||
# Drop the description columns that are the keys in cleaned
|
||||
property_attributes = property_attributes.drop(columns=list(cleaned.keys()))
|
||||
# Perform the mapping
|
||||
|
||||
# CLUSTERING!!
|
||||
|
||||
from sklearn.cluster import KMeans
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue