scrappy code testing out modelling affect of thermal transmittance on EPC

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-30 10:03:41 +01:00
parent b922d5a9b7
commit cbfb9a5a93
2 changed files with 39 additions and 2 deletions

View file

@ -237,6 +237,7 @@ def handler():
"built-form",
# "construction-age-band",
"number-habitable-rooms",
"constituency",
]
component_features = [
@ -245,8 +246,43 @@ def handler():
]
model_data = df[[response] + component_features + base_features]
model_data = model_data.reset_index()
model_data = model_data.reset_index(drop=True)
model_data["idx"] = model_data.index.copy()
# Append on u-value estimates
model_data = model_data.merge(
pd.DataFrame(cleaner.cleaned["walls-description"])[["original_description", "thermal_transmittance"]],
how="left",
left_on="walls-description",
right_on="original_description"
)
# Take just entries with U-values
model_data = model_data[~pd.isnull(model_data["thermal_transmittance"])]
# We need to split the data into a train and test set for model build
import statsmodels.api as sm
# Assuming 'df' is your DataFrame
X = model_data[base_features + ["thermal_transmittance"]]
Y = model_data[response]
# Add a constant to the independent value
X1 = sm.add_constant(X)
# make regression model
model = sm.OLS(Y, X1)
# fit model and print results
results = model.fit()
print(results.summary())
model_data[["thermal_transmittance", response]].corr()
summary = model_data.groupby(["property-type", "built-form"], observed=True)[
["thermal_transmittance", response]
].corr()
summary = (
model_data
.groupby(component_features + base_features)

View file

@ -14,4 +14,5 @@ pyproj
pint
geopandas
mip
seaborn
seaborn
statsmodels