From cbfb9a5a93d867ba8b1422e31c987e41b7b053cb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 30 Jun 2023 10:03:41 +0100 Subject: [PATCH] scrappy code testing out modelling affect of thermal transmittance on EPC --- model_data/app.py | 38 ++++++++++++++++++++++++++++++++++++- model_data/requirements.txt | 3 ++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/model_data/app.py b/model_data/app.py index a19635e3..4ebad3d2 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -237,6 +237,7 @@ def handler(): "built-form", # "construction-age-band", "number-habitable-rooms", + "constituency", ] component_features = [ @@ -245,8 +246,43 @@ def handler(): ] model_data = df[[response] + component_features + base_features] - model_data = model_data.reset_index() + model_data = model_data.reset_index(drop=True) model_data["idx"] = model_data.index.copy() + + # Append on u-value estimates + model_data = model_data.merge( + pd.DataFrame(cleaner.cleaned["walls-description"])[["original_description", "thermal_transmittance"]], + how="left", + left_on="walls-description", + right_on="original_description" + ) + # Take just entries with U-values + model_data = model_data[~pd.isnull(model_data["thermal_transmittance"])] + + # We need to split the data into a train and test set for model build + + import statsmodels.api as sm + + # Assuming 'df' is your DataFrame + X = model_data[base_features + ["thermal_transmittance"]] + Y = model_data[response] + + # Add a constant to the independent value + X1 = sm.add_constant(X) + + # make regression model + model = sm.OLS(Y, X1) + + # fit model and print results + results = model.fit() + print(results.summary()) + + model_data[["thermal_transmittance", response]].corr() + + summary = model_data.groupby(["property-type", "built-form"], observed=True)[ + ["thermal_transmittance", response] + ].corr() + summary = ( model_data .groupby(component_features + base_features) diff --git a/model_data/requirements.txt b/model_data/requirements.txt index c4fbd083..609cb528 100644 --- a/model_data/requirements.txt +++ b/model_data/requirements.txt @@ -14,4 +14,5 @@ pyproj pint geopandas mip -seaborn \ No newline at end of file +seaborn +statsmodels \ No newline at end of file