diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py
index 4ad854c1..92d5511b 100644
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@@ -100,8 +100,8 @@ class EPCDataProcessor:
 
         # FOR NOW IF VIOLATION MODE IS ON, WE USE RUN MODE AS NEWDATA
         self.violation_mode = violation_mode
-        if run_mode not in ["training", "newdata"]:
-            raise ValueError("Run mode must be either training or newdata")
+        if run_mode not in ["training", "newdata", "kwhdata"]:
+            raise ValueError("Run mode must be either training, newdata or kwhdata")
         self.run_mode = run_mode if not violation_mode else "newdata"
 
     def prepare_data(self, filepath: Path | str | None = None) -> None:
@@ -110,7 +110,9 @@ class EPCDataProcessor:
         Ignore step is used to highlight which steps are not needed in newdata
         """
 
-        ignore_step = True if self.run_mode == "newdata" else False
+        ignore_step = True if self.run_mode in ["newdata"] else False
+        if self.run_mode == "kwhdata":
+            self.rename_kwhdata_columns()
 
         if filepath is not None:
             self.load_data(
@@ -126,18 +128,21 @@ class EPCDataProcessor:
         self.remap_build_form()
         self.cast_data_column_values_to_lower()
         self.standardise_construction_age_band(ignore_step=ignore_step)
-        self.clean_missing_rooms(ignore_step=ignore_step)
+        if self.run_mode != "kwhdata":
+            self.clean_missing_rooms(ignore_step=ignore_step)
         self.recast_df_columns(
             column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
         )
         self.clean_multi_glaze_proportion(ignore_step=ignore_step)
         self.clean_photo_supply()
-        self.retain_multiple_epc_properties(
-            epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"],
-            ignore_step=ignore_step,
-        )
+        if self.run_mode != "kwhdata":
+            self.retain_multiple_epc_properties(
+                epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"],
+                ignore_step=ignore_step,
+            )
 
-        self.fill_na_fields()
+        if self.run_mode != "kwhdata":
+            self.fill_na_fields()
 
         self.sort_data_by_uprn_lodgement_date(ignore_step=ignore_step)
 
@@ -148,8 +153,9 @@ class EPCDataProcessor:
 
         self.fill_invalid_constituency_fields(ignore_step=ignore_step)
 
-        self.make_cleaning_averages(ignore_step=ignore_step)
-        self.add_local_authority_to_cleaning_average(ignore_step=ignore_step)
+        if self.run_mode != "kwhdata":
+            self.make_cleaning_averages(ignore_step=ignore_step)
+            self.add_local_authority_to_cleaning_average(ignore_step=ignore_step)
 
         # TODO: check if this has impact on training dataset
         # cleaned_data = self.apply_averages_cleaning(
@@ -160,21 +166,33 @@ class EPCDataProcessor:
         # )
 
         # When running in newdata mode, cleaning_averages has lower cases so we co-erce back to upper
+
         cleaning_averages = self.cleaning_averages.copy()
         if self.run_mode == "newdata":
             cleaning_averages.columns = cleaning_averages.columns.str.upper()
 
-        cleaned_data = self.apply_averages_cleaning(
-            data_to_clean=self.data,
-            cleaning_data=cleaning_averages,
-            cols_to_merge_on=COLUMNS_TO_MERGE_ON,
-        )
+        if self.run_mode == "kwhdata":
+            cleaned_data = self.data
+        else:
+            cleaned_data = self.apply_averages_cleaning(
+                data_to_clean=self.data,
+                cleaning_data=cleaning_averages,
+                cols_to_merge_on=COLUMNS_TO_MERGE_ON,
+            )
 
         self.data = self.data if cleaned_data is None else cleaned_data
 
-        self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step)
+        if self.run_mode != "kwhdata":
+            self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step)
+
         self.cast_data_columns_to_lower()
 
+    def rename_kwhdata_columns(self):
+        """
+        Rename the columns for the kwh data to the epc api data, which are uppercase and underscore
+        """
+        self.data.columns = self.data.columns.str.upper().str.replace("-", "_")
+
     def cast_data_columns_to_lower(self):
         """
         Convert all columns names to lower