diff --git a/model_data/analysis/SapModel.py b/model_data/analysis/SapModel.py
index d18429dc..8b0013c3 100644
--- a/model_data/analysis/SapModel.py
+++ b/model_data/analysis/SapModel.py
@@ -3,6 +3,8 @@ import pandas as pd
 import statsmodels.api as sm
 import matplotlib.pyplot as plt
 import pickle
+from typing import Any, Dict, Tuple
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, \
     median_absolute_error, mean_absolute_percentage_error
 
@@ -10,7 +12,7 @@ with open("all_data.pkl", "rb") as f:
     all_data = pickle.load(f)
 
 
-class SalModel:
+class SapModel:
     # We want to estimate for making improvements on different property components
     RESPONSE = "environment-impact-current"
     # We could potentially  build models by constituency to avoid having too many
@@ -64,81 +66,71 @@ class SalModel:
         "windows-description",
         "glazed-type",
         "glazed-area",
-        "mainheat-description",
-
+        "construction-age-band",
     ]
 
-    def __init__(self, data, cleaner):
+    def __init__(self, data, cleaner, test_size=0.2, random_state=None):
         self.df = pd.DataFrame(data)
         self.cleaner = cleaner
+        self.random_state = random_state if random_state is not None else 42
+        self.test_size = 0.2 if test_size is None else test_size
 
         self.model_data = None
         self.train_x = None
         self.train_y = None
+        self.test_x = None
+        self.test_y = None
         self.results = None
         self.model_data = None
         self.fit_error = None
         self.worst = {"errors": pd.DataFrame(), "x": pd.DataFrame()}
+        self.fit_df = None
 
-    def _append_cleaned_data(self, model_data):
+    def run(self, plot=False):
         """
-        We need to estimate the u-value impact for:
-        1) Walls
-        2) Roof
-        3) Floors
-        We append this data on
-
-        Additionally, we append on the extracted proportion of low energy lighting, which
-        is moreliably extracted that using the low-energy-lighting column
+        A pipeline method to run all necessary methods in correct order.
         """
+        try:
+            self.create_dataset()
+            self.fit_model()
+            if plot:
+                self.plot_regression(self.fit_df)
+        except Exception as e:
+            print("An error occurred during execution.")
+            print(str(e))
 
-        wall_u_values = pd.DataFrame(self.cleaner.cleaned["walls-description"])[
-            ["original_description", "thermal_transmittance"]].rename(
-            columns={"thermal_transmittance": "walls_u_value"}
+    def _merge_with_u_values(
+        self, model_data: pd.DataFrame, description: str, thermal_transmittance: str
+    ) -> pd.DataFrame:
+
+        u_values = pd.DataFrame(self.cleaner.cleaned[f"{description}-description"])[
+            ["original_description", thermal_transmittance]].rename(
+            columns={thermal_transmittance: f"{description}_u_value"}
         )
 
-        floor_u_values = pd.DataFrame(self.cleaner.cleaned["floor-description"])[
-            ["original_description", "thermal_transmittance"]].rename(
-            columns={"thermal_transmittance": "floor_u_value"}
-        )
+        model_data = model_data.merge(
+            u_values,
+            how="left",
+            left_on=f"{description}-description",
+            right_on="original_description"
+        ).drop(columns=["original_description"])
 
-        roof_u_values = pd.DataFrame(self.cleaner.cleaned["roof-description"])[
-            ["original_description", "thermal_transmittance"]].rename(
-            columns={"thermal_transmittance": "roof_u_value", }
-        )
+        return model_data
 
+    def _append_cleaned_data(self, model_data: pd.DataFrame) -> pd.DataFrame:
+        for description in ["walls", "floor", "roof"]:
+            model_data = self._merge_with_u_values(model_data, description, "thermal_transmittance")
+
+        # lighting_proportions added separately as it doesn't use the _merge_with_u_values method
         lighting_proportions = pd.DataFrame(self.cleaner.cleaned["lighting-description"])[
             ["original_description", "low_energy_proportion"]]
 
         model_data = model_data.merge(
-            wall_u_values,
-            how="left",
-            left_on="walls-description",
-            right_on="original_description"
-        ).drop(
-            columns=["original_description"]
-        ).merge(
-            floor_u_values,
-            how="left",
-            left_on="floor-description",
-            right_on="original_description"
-        ).drop(
-            columns=["original_description"]
-        ).merge(
-            roof_u_values,
-            how="left",
-            left_on="roof-description",
-            right_on="original_description"
-        ).drop(
-            columns=["original_description"]
-        ).merge(
             lighting_proportions,
             how="left",
             left_on="lighting-description",
             right_on="original_description"
-        ).drop(
-            columns=["original_description"]
-        )
+        ).drop(columns=["original_description"])
 
         return model_data
 
@@ -195,33 +187,59 @@ class SalModel:
             model_data[col] = model_data[col].astype('category')
 
         # Convert response
-        self.model_data[self.RESPONSE] = self.model_data[self.RESPONSE].astype(float)
+        model_data[self.RESPONSE] = model_data[self.RESPONSE].astype(float)
 
         self.model_data = model_data
 
-    def make_training_test(self):
+    def make_training_test(self, x):
         # Split into training and test
-        # Dummy data
-        pass
+        self.train_x, self.test_x, self.train_y, self.test_y = train_test_split(
+            x.drop(self.RESPONSE, axis=1),
+            x[self.RESPONSE],
+            test_size=self.test_size,
+            random_state=self.random_state
+        )
 
     def fit_model(self):
+
+        # Dummy out the categorical variables
+        x = pd.get_dummies(self.model_data, columns=self.CATEGORICAL_COLS, drop_first=True)
+
+        # Convert booleans to integer
+        for col in x.columns:
+            if x[col].dtype == bool:
+                x[col] = x[col].astype(int)
+
+            if x[col].dtype == object:
+                x[col] = x[col].astype(float)
+
+        # Create the training and test sets for each run
+        self.make_training_test(x)
+
         # Add a constant to the independent value
-        x1 = sm.add_constant(self.X)
+        train_x = sm.add_constant(self.train_x)
 
         # make regression model
-        model = sm.OLS(self.Y, x1)
+        model = sm.OLS(self.train_y, train_x)
 
         # fit model and print results
         self.results = model.fit()
 
         self.fit_error, self.worst["errors"] = self.calculate_regression_metrics(
-            y_true=self.Y, y_pred=self.results.fittedvalues
+            y_true=self.train_y, y_pred=self.results.fittedvalues
         )
 
         self.model_data['fit'] = self.results.fittedvalues
         # The worst errors over index heavily for flats
         self.worst["x"] = self.model_data[self.model_data.index.isin(self.worst["errors"].index)]
 
+        self.fit_df = pd.DataFrame(
+            {
+                "fit": self.results.fittedvalues,
+                "actual": self.train_y
+            }
+        ).sort_values("actual", ascending=True)
+
     @staticmethod
     def plot_regression(df):
         # Extract the "fit" and "actual" columns from the dataframe
@@ -280,7 +298,7 @@ class SalModel:
         return metrics, worst_errors
 
 
-self = SalModel(
+self = SapModel(
     data=all_data["data"],
     cleaner=all_data["cleaner"]
 )
diff --git a/model_data/app.py b/model_data/app.py
index 2fcf48a9..586337db 100644
--- a/model_data/app.py
+++ b/model_data/app.py
@@ -243,21 +243,6 @@ def handler():
 
     # If these categorical variables are not of type 'category', convert them
 
-    # Dummy out the categorical variables
-    training_data = pd.get_dummies(model_data, columns=categorical_cols, drop_first=True)
-
-    # Convert booleans to integer
-    for col in training_data.columns:
-        if training_data[col].dtype == bool:
-            training_data[col] = training_data[col].astype(int)
-
-        if training_data[col].dtype == object:
-            training_data[col] = training_data[col].astype(float)
-
-    # Assuming 'df' is your DataFrame
-    X = training_data.drop(columns=response)
-    Y = training_data[response]
-
     print(results.summary())
 
     import matplotlib.pyplot as plt
@@ -281,15 +266,6 @@ def handler():
     grouped_error = pd.DataFrame(grouped_error)
     grouped_error = grouped_error.sort_values("R2 Score", ascending=True)
 
-    fit_df = pd.DataFrame(
-        {
-            "fit": results.fittedvalues,
-            "actual": Y
-        }
-    )
-
-    # Sort on magnitude of actual
-    fit_df = fit_df.sort_values("actual", ascending=True)
     plot_regression(fit_df)
 
     model_data[["thermal_transmittance", response]].corr()