mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
error analysis - not working though
This commit is contained in:
parent
0a1f728f37
commit
14417c37df
1 changed files with 59 additions and 1 deletions
|
|
@ -15,7 +15,8 @@ class EnergyConsumptionModel:
|
|||
FEATURES = {
|
||||
"heating_kwh": [
|
||||
"lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
|
||||
"heating-cost-current", "total-floor-area", "number-heated-rooms", "number-habitable-rooms",
|
||||
"heating-cost-current", "total-floor-area", "number-heated-rooms",
|
||||
# "number-habitable-rooms",
|
||||
# "mainheat-energy-eff", "mainheat-description", "main-fuel",
|
||||
],
|
||||
"hot_water_kwh": [
|
||||
|
|
@ -214,6 +215,63 @@ class EnergyConsumptionModel:
|
|||
|
||||
return new_data
|
||||
|
||||
def error_analysis(self, target, top_n=10, unique_threshold=0.8):
|
||||
"""
|
||||
Perform error analysis on the provided model and dataset.
|
||||
"""
|
||||
|
||||
# Calculate predictions and residuals
|
||||
y_train_pred = self.models[target].predict(self.x_train[target])
|
||||
y_test_pred = self.models[target].predict(self.x_test[target])
|
||||
|
||||
train_residuals = self.y_train[target] - y_train_pred
|
||||
test_residuals = self.y_test[target] - y_test_pred
|
||||
|
||||
# Identify top N poorly performing rows by absolute residuals
|
||||
top_train_indices = train_residuals.abs().nlargest(top_n).index
|
||||
top_test_indices = test_residuals.abs().nlargest(top_n).index
|
||||
|
||||
top_train_data = self.input_data.loc[top_train_indices]
|
||||
top_test_data = self.input_data.loc[top_test_indices]
|
||||
|
||||
def exclude_columns(data, threshold):
|
||||
exclude_cols = []
|
||||
num_rows = data.shape[0]
|
||||
for col in data.columns:
|
||||
if data[col].dtype == 'object' and data[col].nunique() / num_rows >= threshold:
|
||||
exclude_cols.append(col)
|
||||
return exclude_cols
|
||||
|
||||
exclude_cols = exclude_columns(top_train_data, unique_threshold)
|
||||
|
||||
top_train_data = top_train_data.drop(columns=exclude_cols)
|
||||
top_test_data = top_test_data.drop(columns=exclude_cols)
|
||||
|
||||
# TODO: Not working
|
||||
|
||||
# One-hot encode categorical variables
|
||||
categorical_columns = top_train_data.select_dtypes(include=['object']).columns.tolist()
|
||||
top_train_data_encoded = pd.get_dummies(top_train_data, columns=categorical_columns, drop_first=True)
|
||||
top_test_data_encoded = pd.get_dummies(top_test_data, columns=categorical_columns, drop_first=True)
|
||||
|
||||
# Align the encoded data with the training data
|
||||
top_train_data_encoded = top_train_data_encoded.reindex(columns=self.x_train[target].columns, fill_value=0)
|
||||
top_test_data_encoded = top_test_data_encoded.reindex(columns=self.x_test[target].columns, fill_value=0)
|
||||
|
||||
# Correlation analysis with residuals
|
||||
train_corr = top_train_data_encoded.corrwith(train_residuals.loc[top_train_indices])
|
||||
test_corr = top_test_data_encoded.corrwith(test_residuals.loc[top_test_indices])
|
||||
|
||||
# Return summaries
|
||||
summary = {
|
||||
"train_corr": train_corr,
|
||||
"test_corr": test_corr,
|
||||
"top_train_data": top_train_data,
|
||||
"top_test_data": top_test_data
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
# Example usage:
|
||||
model = EnergyConsumptionModel()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue