diff --git a/model_data/app.py b/model_data/app.py
index 235ebfc1..0a6055f1 100644
--- a/model_data/app.py
+++ b/model_data/app.py
@@ -77,12 +77,12 @@ def handler():
     # We pull properties from local authorities, by property type. This will allow us to build
     # a dataset of up to 10k properties per local authority/property type combination
     data = []
-    for la in tqdm(local_authorities):
+    for c in tqdm(constituencies):
         for pt in property_types:
             data.extend(
                 pagenated_epc_download(
                     client=epc_client,
-                    params={"local-authority": la, "property-type": pt},
+                    params={"constituency": c, "property-type": pt},
                     page_size=5000,
                     n_pages=10,
                 )
@@ -240,11 +240,17 @@ def handler():
         # "construction-age-band",
         "number-habitable-rooms",
         "constituency",
+        "number-heated-rooms",
     ]
 
     component_features = [
         "walls-description",
         "floor-description",
+        "lighting-description",
+        "windows-description",
+        "roof-description",
+        "mainheat-description",
+        "main-fuel"
     ]
 
     model_data = df[[response] + component_features + base_features]
@@ -253,18 +259,37 @@ def handler():
 
     # Append on u-value estimates
     model_data = model_data.merge(
-        pd.DataFrame(cleaner.cleaned["walls-description"])[["original_description", "thermal_transmittance"]],
+        pd.DataFrame(cleaner.cleaned["walls-description"])[["original_description", "thermal_transmittance"]].rename(
+            columns={"thermal_transmittance": "walls_u_value", }
+        ),
         how="left",
         left_on="walls-description",
         right_on="original_description"
+    ) \
+        .drop(columns=["original_description"]) \
+        .merge(
+        pd.DataFrame(cleaner.cleaned["floor-description"])[["original_description", "thermal_transmittance"]].rename(
+            columns={"thermal_transmittance": "floor_u_value", }
+        ),
+        how="left",
+        left_on="floor-description",
+        right_on="original_description"
     )
     # Take just entries with U-values
-    model_data = model_data[~pd.isnull(model_data["thermal_transmittance"])]
-    model_data = model_data[base_features + ["thermal_transmittance", response]]
+    model_data = model_data[
+        ~pd.isnull(model_data["walls_u_value"]) &
+        ~pd.isnull(model_data["floor_u_value"])
+        ]
+    model_data = model_data[
+        base_features + [c for c in component_features if c not in [
+            "walls-description", "floor-description"]] + ["walls_u_value", "floor_u_value", response]
+        ]
 
     # We need to split the data into a train and test set for model build
     categorical_cols = [
-        "property-type", "built-form", "number-habitable-rooms", "constituency",
+        "property-type", "built-form", "number-habitable-rooms", "constituency", "number-heated-rooms",
+        "lighting-description", "windows-description", "roof-description", "mainheat-description", "main-fuel",
+
     ]
 
     # If these categorical variables are not of type 'category', convert them
@@ -325,7 +350,7 @@ def handler():
 
     import numpy as np
     from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, \
-        median_absolute_error
+        median_absolute_error, mean_absolute_percentage_error
 
     def calculate_regression_metrics(y_true, y_pred, n=20):
         """
@@ -340,11 +365,14 @@ def handler():
         """
         metrics = {}
 
+        metrics['MAPE'] = mean_absolute_percentage_error(y_true, y_pred)
         metrics['Mean Squared Error'] = mean_squared_error(y_true, y_pred)
         metrics['Mean Absolute Error'] = mean_absolute_error(y_true, y_pred)
         metrics['R2 Score'] = r2_score(y_true, y_pred)
         metrics['Explained Variance Score'] = explained_variance_score(y_true, y_pred)
         metrics['Median Absolute Error'] = median_absolute_error(y_true, y_pred)
+        metrics['Mean True Value'] = y_true.mean()
+        metrics['Mean Predicted Value'] = y_pred.mean()
 
         errors = pd.DataFrame()
         errors['Fit'] = y_true
@@ -358,8 +386,44 @@ def handler():
 
     fit_error, worst_errors = calculate_regression_metrics(y_true=Y, y_pred=results.fittedvalues)
 
-    worst_x = model_data[model_data.index.isin(worst_errors.index)]
+    model_data['fit'] = results.fittedvalues
     # The worst errors over index heavily for flats
+    worst_x = model_data[model_data.index.isin(worst_errors.index)]
+
+    # Notes
+    # TODO: We might want to look at adding in the u-value estimates for the properties that do not have them
+    # so that we have move data.
+    # TODO: Add in the u-values for roofs rather than the description
+    # TODO: Add in the actual property features for walls, floors, roof, not just the u-value
+    # TODO: Think about how we use sap vs rdsap - should we add a feature in the model for transaction-type?
+    #
+    # property type looks okay - we're definitely low on the number of bungalows
+    # number-habitable-rooms & number-heated-rooms is unpopulated so pretty useless atm
+    # **** constituency should be looked at - potentially modelled individually as some constituencies
+    # peform much worse that others despite enough data.
+    # **** Lighting is a bit of mess - needs to be looked at. Most properties are of the same type
+    # and a few of the categories just have barely any data and poor scores
+    # **** windows-description again most of the properties are of the same type, need more samples
+    # for thge smaller groups
+    # **** Turn roof into U-value
+    # **** mainheat is a bad one - community scheme seems to actually be quite a lot of properties, it's ok for
+    #      MAPE though.
+
+    grouped_error = []
+    groupby = ["mainheat-description"]
+    for group, data in model_data.groupby(groupby, observed=True):
+        group_fit_error, _ = calculate_regression_metrics(y_true=data[response].astype(float), y_pred=data["fit"])
+        # plot_regression(pd.DataFrame({"fit": data["fit"].values, "actual": data[response].astype(float).values}))
+        grouped_error.append(
+            {
+                **dict(zip(groupby, group)),
+                "n_samples": data.shape[0],
+                **group_fit_error,
+            }
+        )
+
+    grouped_error = pd.DataFrame(grouped_error)
+    grouped_error = grouped_error.sort_values("R2 Score", ascending=True)
 
     fit_df = pd.DataFrame(
         {
diff --git a/model_data/downloader.py b/model_data/downloader.py
index 7dcc84ba..5355367b 100644
--- a/model_data/downloader.py
+++ b/model_data/downloader.py
@@ -15,9 +15,9 @@ def pagenated_epc_download(client, params, page_size, n_pages, verbose=0, slowdo
         # Note: We can only make 10k queries for a single set of search queries.
         # It might make sense to download data via zip for machine learning since we don't need this
         # data to be perfectly up to date
-        if search_resp is None:
+        if not search_resp:
             break
-            
+
         n_completed += 1
 
         results.extend(search_resp["rows"])
diff --git a/model_data/requirements.txt b/model_data/requirements.txt
index d1dfdd73..126c63ed 100644
--- a/model_data/requirements.txt
+++ b/model_data/requirements.txt
@@ -1,4 +1,4 @@
-epc-api-python
+epc-api-python==1.0.2
 python-dotenv
 tqdm
 pandas