minor tweaks to property class to work with router and fix dodgy merge

2026-07-27 23:35:01 +00:00 · 2024-01-16 17:33:02 +00:00 · 2024-01-16 17:33:02 +00:00 · 51e85e7516
commit 51e85e7516
parent cc8b6801b2
2 changed files with 148 additions and 209 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -7,7 +7,8 @@ import pandas as pd

 from etl.epc.DataProcessor import EPCDataProcessor
 from etl.epc.Dataset import TrainingDataset
-from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP
+from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, \
+    BUILT_FORM_REMAP
 from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from utils.logger import setup_logger
@ -18,7 +19,6 @@ from recommendations.recommendation_utils import (
    estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
 )

-
 ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
 DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)

@ -49,8 +49,9 @@ class Property(Definitions):
    lighting = None

    spatial = None
+    base_difference_record = None

-    def __init__(self, id, postcode, address, epc_record, data=None):
+    def __init__(self, id, postcode, address, epc_record):

        self.epc_record = epc_record

@ -58,7 +59,7 @@ class Property(Definitions):

        self.address = address
        self.postcode = postcode
-        self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()}
+        self.data = {k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()}
        self.old_data = epc_record.get("old_data")
        self.property_dimensions = None

@ -135,7 +136,7 @@ class Property(Definitions):
        print("NEED TO CHANGE THE DASH TO LOWER CASE")
        fixed_data_col_names = [x.lower().replace("_", "-") for x in fixed_data_col_names]

-        fixed_data = {k.replace("-", "_"):v for k,v in self.data.items() if k in fixed_data_col_names}
+        fixed_data = {k.replace("-", "_"): v for k, v in self.data.items() if k in fixed_data_col_names}

        difference_record.append_fixed_data(fixed_data)

@ -143,16 +144,16 @@ class Property(Definitions):

        # TODO: adjust the base difference record with the previously calculated u values + features
        # estimated_perimeter is different to the perimeter in the epc record
-        
+
        # self.base_difference_record.df

    def adjust_difference_record_with_recommendations(self, property_recommendations):
        """
        This method will adjust the difference record, based on the recommendations made for the property
-        :param recommendations: dictionary of recommendations for the property
-        :return:
+        :param property_recommendations: dictionary of recommendations for the property
        """

+        self.recommendations_scoring_data = []
        for recommendations_by_type in property_recommendations:
            for i, rec in enumerate(recommendations_by_type):
                scoring_dict = self.create_recommendation_scoring_data(
@ -161,7 +162,7 @@ class Property(Definitions):
                scoring_dict['id'] = "+".join([str(self.id), str(rec["recommendation_id"])])

                self.recommendations_scoring_data.append(scoring_dict)
-    
+
    def create_recommendation_scoring_data(self, recommendation: dict):

        recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy()
@ -180,41 +181,98 @@ class Property(Definitions):
            recommendation_record["walls_insulation_thickness_ending"] = "above average"
            recommendation_record["walls_energy_eff_ending"] = "Good"
        else:
-            wind_turbine_count = int(wind_turbine_count)
+            if recommendation_record["walls_thermal_transmittance_ending"] is None:
+                raise ValueError("We should not have a None value for the u value")

-        self.wind_turbine = {
-            "wind_turbine": wind_turbine_count,
-        }
+            if recommendation_record["walls_insulation_thickness_ending"] is None:
+                recommendation_record["walls_insulation_thickness_ending"] = "none"

-    def set_count_variables(self):
+        # Update description to indicate it's insulate
+        if recommendation["type"] in ["solid_floor_insulation", "suspended_floor_insulation",
+                                      "exposed_floor_insulation"]:
+            if len(recommendation["parts"]) > 1:
+                raise NotImplementedError("Have more than 1 floor insulation part - handle this case")

-        """
-        For EPC fields that are just counts, we'll set them here
-        These are fields that are integers but may contain additional values such as "" so we can't do a direct
-        conversion straight to an integer
-        :return:
-        """
+            recommendation_record["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
+            # We don't really see above average for this in the training data
+            recommendation_record["floor_insulation_thickness_ending"] = "average"
+            recommendation_record["floor_energy_eff_ending"] = "Good"
+        else:
+            if recommendation_record["floor_thermal_transmittance_ending"] is None:
+                raise ValueError("We should not have a None value for the u value")

-        fields = {
-            "number_of_open_fireplaces": "number-open-fireplaces",
-            "number_of_extensions": "extension-count",
-            "number_of_storeys": "flat-storey-count",
-            "number_of_rooms": "number-habitable-rooms",
-        }
+            if recommendation_record["floor_insulation_thickness_ending"] is None:
+                recommendation_record["floor_insulation_thickness_ending"] = "none"

-        null_attributes = ["number_of_storeys", "number_of_rooms"]
+        if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]:
+            recommendation_record["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]

-        for attribute, epc_field in fields.items():
-            value = self.data["extension-count"]
-            if value == "" or value in self.DATA_ANOMALY_MATCHES:
-                if attribute in null_attributes:
-                    value = None
-                else:
-                    value = 0
+            parts = recommendation["parts"]
+            if len(parts) != 1:
+                raise ValueError("More than one part for roof insulation - investiage me")
+
+            # This is based on the values we have in the training data
+            valid_numeric_values = [
+                12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
+            ]
+
+            proposed_depth = int(parts[0]["depth"])
+            if proposed_depth not in valid_numeric_values:
+                # Take the nearest value for scoring
+                proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
+
+            recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
+            recommendation_record["roof_energy_eff_ending"] = "Very Good"
+        else:
+            # Fill missing roof u-values - this fill is not based on recommended upgrades
+            if recommendation_record["roof_thermal_transmittance_ending"] is None:
+                raise ValueError("We should not have a None value for the u value")
+
+            if recommendation_record["roof_insulation_thickness_ending"] is None:
+                recommendation_record["roof_insulation_thickness_ending"] = "none"
+
+        if recommendation["type"] == "mechanical_ventilation":
+            recommendation_record["mechanical_ventilation_ending"] = 'mechanical, extract only'
+
+        if recommendation["type"] == "sealing_open_fireplace":
+            recommendation_record["number_open_fireplaces_ending"] = 0
+
+        if recommendation["type"] == "low_energy_lighting":
+            recommendation_record["low_energy_lighting_ending"] = 100
+            recommendation_record["lighting_energy_eff_starting"] = "Very Good"
+
+        if recommendation["type"] == "windows_glazing":
+            recommendation_record["multi_glaze_proportion_ending"] = 100
+            recommendation_record["windows_energy_eff_ending"] = "Average"
+
+            is_secondary_glazing = recommendation["is_secondary_glazing"]
+
+            if recommendation_record["glazing_type_ending"] == "multiple":
+                pass
+            elif recommendation_record["glazing_type_ending"] == "single":
+                recommendation_record["glazing_type_ending"] = "secondary" if is_secondary_glazing else "double"
+            elif recommendation_record["glazing_type_ending"] == "double":
+                recommendation_record["glazing_type_ending"] = "multiple" if is_secondary_glazing else "double"
+            elif recommendation_record["glazing_type_ending"] == "secondary":
+                recommendation_record["glazing_type_ending"] = "secondary" if is_secondary_glazing else "multiple"
+            elif recommendation_record["glazing_type_ending"] in ["triple", "high performance"]:
+                recommendation_record["glazing_type_ending"] = "multiple"
            else:
-                value = int(value)
+                raise ValueError("Invalid glazing type - implement me")

-            setattr(self, attribute, value)
+        if recommendation["type"] == "solar_pv":
+            recommendation_record["photo_supply_ending"] = recommendation["photo_supply"]
+
+        if recommendation["type"] not in [
+            "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
+            "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
+            "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
+            "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
+            "windows_glazing", "solar_pv"
+        ]:
+            raise NotImplementedError("Implement me")
+
+        return recommendation_record

    def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
        """
@ -473,7 +531,7 @@ class Property(Definitions):
    def set_floor_level(self):
        self.floor_level = (
            FLOOR_LEVEL_MAP[self.data["floor-level"]] if
-            self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES and self.data['floor-level'] is not None 
+            self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES and self.data['floor-level'] is not None
            else None
        )

@ -545,126 +603,6 @@ class Property(Definitions):

        return component_data

-    def get_model_data(self):
-        """
-        This method extracts cleaned data from the property object, which is used in our machine learning models
-
-        This will use many of the cleaned properties, extracted from the epc data, or methods in DataProcessor.
-
-        For future iterations of this, we probably want to implement a singular method in DataProcessor, which can
-        be used in the etl code and in here
-
-        :return: dictionary of model data to be scored in the model
-        """
-
-        drop_cols = ["original_description", "clean_description"]
-        insulation_drop_cols = ["thermal_transmittance_unit", "is_assumed", "is_valid"]
-        insulation_rename_cols = ["thermal_transmittance", "insulation_thickness"]
-
-        walls = self._extract_component(self.walls, insulation_rename_cols, insulation_drop_cols + drop_cols, "walls")
-        roof = self._extract_component(self.roof, insulation_rename_cols, insulation_drop_cols + drop_cols, "roof")
-        floor = self._extract_component(self.floor, insulation_rename_cols, insulation_drop_cols + drop_cols, "floor")
-
-        windows = self._extract_component(self.windows, [], drop_cols + ["no_data"])
-        fuel = self._extract_component(self.main_fuel, ["tariff_type"], drop_cols + ["tariff_type"], "main-fuel")
-        main_heating = self._extract_component(self.main_heating, [], drop_cols + ["has_assumed"])
-        main_heating_controls = self._extract_component(self.main_heating_controls, [], drop_cols)
-        hotwater = self._extract_component(self.hotwater, ["tariff_type"], drop_cols + ['assumed'], "hotwater")
-
-        # We'll need to clean second heating
-        second_heating = self.data["secondheat-description"]
-
-        epc_raw_columns = POTENTIAL_COLUMNS + EFFICIENCY_FEATURES + [
-            'TRANSACTION_TYPE',
-            'ENERGY_TARIFF',
-            'PROPERTY_TYPE',
-            'UPRN',
-            'NUMBER_OPEN_FIREPLACES',
-            'MULTI_GLAZE_PROPORTION',
-            'MECHANICAL_VENTILATION',
-            'PHOTO_SUPPLY',
-            'LOW_ENERGY_LIGHTING',
-            'SOLAR_WATER_HEATING_FLAG',
-            'GLAZED_TYPE',
-            'CONSTITUENCY',
-            'NUMBER_HEATED_ROOMS',
-            'EXTENSION_COUNT',
-        ]
-        epc_raw_data = {
-            k: self.data[k.lower().replace("_", "-")] for k in epc_raw_columns
-        }
-
-        built_form_cleaning_map = {
-            "Flat": "Mid-Terrace",
-            "House": "Semi-Detached",
-            "Bungalow": "Detached",
-            "Maisonette": "Mid-Terrace"
-        }
-
-        built_form = self.data["built-form"]
-        if built_form in self.DATA_ANOMALY_MATCHES:
-            # TODO: If built form isn't captured, we use the most common value for that property type - we shall
-            #       improve this methodology
-            built_form = built_form_cleaning_map.get(self.data["property-type"])
-            if not built_form:
-                raise NotImplementedError("Not handled this property type when cleaning built form")
-
-        property_data = {
-            **walls,
-            **roof,
-            **floor,
-            **fuel,
-            **main_heating,
-            **main_heating_controls,
-            **hotwater,
-            **windows,
-            "SECONDHEAT_DESCRIPTION": second_heating,
-            "DAYS_TO": EPCDataProcessor.calculate_days_to(self.data["lodgement-date"]),
-            "SAP": float(self.data["current-energy-efficiency"]),
-            "CARBON": float(self.data["co2-emissions-current"]),
-            "HEAT_DEMAND": float(self.data["energy-consumption-current"]),
-            "estimated_perimeter": self.perimeter,
-            "CONSTRUCTION_AGE_BAND": self.construction_age_band,
-            "FLOOR_HEIGHT": self.floor_height,
-            "NUMBER_HABITABLE_ROOMS": self.number_of_rooms,
-            "TOTAL_FLOOR_AREA": self.floor_area,
-            "FIXED_LIGHTING_OUTLETS_COUNT": self.number_lighting_outlets,
-            **epc_raw_data,
-            "BUILT_FORM": built_form,
-            "POSTCODE": self.data["postcode"],
-        }
-
-        return property_data
-
-    def set_number_lighting_outlets(self, cleaned_property_data):
-        """
-        Extracts and cleans the estimated number of lighting outlets
-        :return:
-        """
-
-        if self.data["fixed-lighting-outlets-count"] in [None, ""]:
-
-            # We check old EPCs and the full SAP EPC
-
-            lighting_data = []
-
-            if len(self.old_data):
-                lighting_data.extend([
-                    int(x["fixed-lighting-outlets-count"]) for x in self.old_data if
-                    x["fixed-lighting-outlets-count"] != ""
-                ])
-
-            if len(self.full_sap_epc):
-                if self.full_sap_epc["fixed-lighting-outlets-count"] != "":
-                    lighting_data.append(int(self.full_sap_epc["fixed-lighting-outlets-count"]))
-
-            if lighting_data:
-                self.number_lighting_outlets = round(np.median(lighting_data))
-            else:
-                self.number_lighting_outlets = round(cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0])
-        else:
-            self.number_lighting_outlets = float(self.data["fixed-lighting-outlets-count"])
-
    def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy):
        """
        Stores these values for usage later
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@ -34,7 +34,8 @@ class BaseDataset:
    #         raise ValueError(f"Pipeline type {pipeline_type} not found")

    #     return self.pipeline_steps[pipeline_type]
-        
+
+
 class TrainingDataset(BaseDataset):
    """
    A collection of EPCDifferenceRecords can be combined into a TrainingDataset.
@ -45,7 +46,7 @@ class TrainingDataset(BaseDataset):
        # self.pipeline_steps = self.pipeline_factory("training")
        self.datasets = datasets
        self.df = pd.DataFrame([dataset.difference_record for dataset in datasets])
-        
+
        self._feature_generation()
        self._drop_features()
        self._clean_efficiency_variables()
@ -91,7 +92,7 @@ class TrainingDataset(BaseDataset):
        if row["has_dwelling_above"]:
            if row["roof_thermal_transmittance"] != 0:
                raise ValueError("Should have 0 u-value for roof")
-            
+
            if row["roof_thermal_transmittance_ending"] != 0:
                raise ValueError("Should have 0 u-value for roof")

@ -105,15 +106,16 @@ class TrainingDataset(BaseDataset):
            is_pitched=row["is_pitched"],
            is_at_rafters=row["is_at_rafters"],
            age_band=england_wales_age_band_lookup[row["construction_age_band"]]
-        )   
-    
+        )
+
    @staticmethod
    def _lambda_function_to_generate_wall_uvalue(row, is_end=False):
        """
        Using the apply method, use the get_wall_u_value method to generate the u-value
        """
        description_col_name = "walls_clean_description" if not is_end else "walls_clean_description_ending"
-        thermal_transistance_col_name = "walls_thermal_transmittance" if not is_end else "walls_thermal_transmittance_ending"
+        thermal_transistance_col_name = "walls_thermal_transmittance" if not is_end else \
+            "walls_thermal_transmittance_ending"

        if pd.isnull(row[thermal_transistance_col_name]):
            output = get_wall_u_value(
@ -126,7 +128,7 @@ class TrainingDataset(BaseDataset):
            output = row[thermal_transistance_col_name]

        return output
-    
+
    @staticmethod
    def _lambda_function_to_generate_floor_uvalue(row, is_end=False):
        """
@ -146,20 +148,19 @@ class TrainingDataset(BaseDataset):
            uvalue = row[floor_thermal_col_name]

        if pd.isnull(uvalue):
-
            insulation_col_name = "floor_insulation_thickness" if not is_end else "floor_insulation_thickness_ending"
            floor_area_col_name = "estimated_perimeter_starting" if not is_end else "estimated_perimeter_ending"
            perimeter_col_name = "total_floor_area_starting" if not is_end else "total_floor_area_ending"

            uvalue = get_floor_u_value(
-                    floor_type=row["floor_type"],
-                    perimeter=row[floor_area_col_name],
-                    area=row[perimeter_col_name],
-                    insulation_thickness=row[insulation_col_name],
-                    wall_type=row["wall_type"],
-                    age_band=england_wales_age_band_lookup[row["construction_age_band"]]
-                )
-        
+                floor_type=row["floor_type"],
+                perimeter=row[floor_area_col_name],
+                area=row[perimeter_col_name],
+                insulation_thickness=row[insulation_col_name],
+                wall_type=row["wall_type"],
+                age_band=england_wales_age_band_lookup[row["construction_age_band"]]
+            )
+
        return uvalue

    def _generate_u_values_from_features(self):
@ -181,13 +182,15 @@ class TrainingDataset(BaseDataset):
        )

        walls_starting_uvalue = self.df['walls_thermal_transmittance'].fillna(walls_starting_uvalue)
-        walls_starting_equals_ending_flag = self.df['walls_clean_description'] == self.df["walls_clean_description_ending"] 
-        walls_ending_uvalue[walls_starting_equals_ending_flag] = walls_starting_uvalue[walls_starting_equals_ending_flag]
-     
+        walls_starting_equals_ending_flag = self.df['walls_clean_description'] == self.df[
+            "walls_clean_description_ending"]
+        walls_ending_uvalue[walls_starting_equals_ending_flag] = walls_starting_uvalue[
+            walls_starting_equals_ending_flag]
+
        # ~~~~~~~~~~~~~~~~~~
        # Roof
        # ~~~~~~~~~~~~~~~~~~
-            
+
        roof_starting_uvalue = self.df.apply(
            lambda row: self._lambda_function_to_generate_roof_uvalue(row),
            axis=1
@ -200,7 +203,6 @@ class TrainingDataset(BaseDataset):
        roof_starting_uvalue = self.df['roof_thermal_transmittance'].fillna(roof_starting_uvalue)
        roof_ending_uvalue = self.df['roof_thermal_transmittance_ending'].fillna(roof_ending_uvalue)

-             
        # ~~~~~~~~~~~~~~~~~~
        # Floor
        # ~~~~~~~~~~~~~~~~~~
@ -211,7 +213,8 @@ class TrainingDataset(BaseDataset):
        )

        self.df['estimated_perimeter_starting'] = self.df.apply(
-            lambda row: estimate_perimeter(row["total_floor_area_starting"]/ row['estimated_number_of_floors'], row["number_habitable_rooms"]/ row['estimated_number_of_floors']),
+            lambda row: estimate_perimeter(row["total_floor_area_starting"] / row['estimated_number_of_floors'],
+                                           row["number_habitable_rooms"] / row['estimated_number_of_floors']),
            axis=1
        )
        self.df['estimated_perimeter_ending'] = self.df.apply(
@ -221,18 +224,18 @@ class TrainingDataset(BaseDataset):
        self.df["floor_type"] = self.df["is_suspended"].replace({True: "suspended", False: "solid"})
        self.df["wall_type"] = self.df.apply(
            lambda row: get_wall_type(
-                is_cavity_wall=row["is_cavity_wall"], 
-                is_solid_brick=row["is_solid_brick"], 
-                is_timber_frame=row["is_timber_frame"], 
-                is_granite_or_whinstone=row["is_granite_or_whinstone"], 
-                is_cob=row["is_cob"], 
+                is_cavity_wall=row["is_cavity_wall"],
+                is_solid_brick=row["is_solid_brick"],
+                is_timber_frame=row["is_timber_frame"],
+                is_granite_or_whinstone=row["is_granite_or_whinstone"],
+                is_cob=row["is_cob"],
                is_sandstone_or_limestone=row["is_sandstone_or_limestone"],
                is_system_built=row["is_system_built"],
                is_park_home=row["is_park_home"]
-                ),
+            ),
            axis=1
        )
-        
+
        floor_starting_uvalue = self.df.apply(
            lambda row: self._lambda_function_to_generate_floor_uvalue(row),
            axis=1
@ -246,19 +249,21 @@ class TrainingDataset(BaseDataset):
        floor_ending_uvalue = self.df['floor_thermal_transmittance_ending'].fillna(floor_ending_uvalue)

        for component in ["walls", "roof", "floor"]:
-            self.df[f"{component}_thermal_transmittance"] = self.df[f"{component}_thermal_transmittance"].fillna(eval(f"{component}_starting_uvalue"))
-            self.df[f"{component}_thermal_transmittance_ending"] = self.df[f"{component}_thermal_transmittance_ending"].fillna(eval(f"{component}_ending_uvalue"))
+            self.df[f"{component}_thermal_transmittance"] = self.df[f"{component}_thermal_transmittance"].fillna(
+                eval(f"{component}_starting_uvalue"))
+            self.df[f"{component}_thermal_transmittance_ending"] = self.df[
+                f"{component}_thermal_transmittance_ending"].fillna(eval(f"{component}_ending_uvalue"))

-        self.df = self.df.drop(columns=["floor_type", "wall_type", "walls_clean_description", "walls_clean_description_ending", 'estimated_number_of_floors'])
+        self.df = self.df.drop(
+            columns=["floor_type", "wall_type", "walls_clean_description", "walls_clean_description_ending",
+                     'estimated_number_of_floors'])

-    
    def _adjust_assumed_values_in_wall_descriptions(self):
        """
        Strip out assumed values for all wall descriptions
        """
        for col in ["walls_clean_description", "walls_clean_description_ending"]:
-            self.df[col] = self.df[col].str.replace("(assumed)", "").str.rstrip()
-
+            self.df[col] = self.df[col].str.replace("(assumed)", "", regex=False).str.rstrip()

    def _drop_inconsistent_properties(self, expanded_df: pd.DataFrame, component: str):
        """
@ -292,9 +297,8 @@ class TrainingDataset(BaseDataset):
                (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"]) &
                (expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"])
                ]
-            
+
        return expanded_df
-        

    def _expand_description_to_features(self, cleaned_lookup: dict):
        """
@ -306,7 +310,7 @@ class TrainingDataset(BaseDataset):
        # remove this record, as it indicates that the quality of the EPC conducted in the first instance
        # is low
        # We also replace descriptions with their cleaned variants
-        """ 
+        """

        cols_to_drop = {
            "walls": [
@ -361,9 +365,9 @@ class TrainingDataset(BaseDataset):
        }

        components_to_expand = cols_to_drop.keys()
-        
+
        for component in components_to_expand:
-            
+
            # TODO: change cleaned dataframe to have underscores instead of dashes     
            if component == "main-fuel":
                cleaned_key = "main-fuel"
@ -377,7 +381,7 @@ class TrainingDataset(BaseDataset):
                original_cols = [f"{component}_description_starting", f"{component}_description_ending"]

            cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key])
-            
+
            expanded_df = self.df.merge(
                cleaned_lookup_df_for_key,
                how="left",
@ -393,7 +397,7 @@ class TrainingDataset(BaseDataset):

            # Drop properties where key material types have changed
            expanded_df = self._drop_inconsistent_properties(expanded_df, component)
-            
+
            # Drop original cols and cols to drop
            expanded_df = expanded_df.drop(columns=cols_to_drop[component] + original_cols)

@ -411,11 +415,10 @@ class TrainingDataset(BaseDataset):
                }
            )
            self.df = expanded_df
-            
+
        # We don't need any lighting specific cleaning, we just drop the original description as we use
        # LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
        self.df = self.df.drop(columns=["lighting_description_starting", "lighting_description_ending"])
-    

    def _clean_missing_values(self, ignore_cols=None):
        missings = pd.isnull(self.df).sum()
@ -433,7 +436,6 @@ class TrainingDataset(BaseDataset):
            else:
                self.df[col] = self.df[col].fillna("Unknown")

-
    def _null_validation(self, information: str):
        print(f"Null validation after {information}")
        if pd.isnull(self.df).sum().sum():
@ -445,7 +447,6 @@ class TrainingDataset(BaseDataset):
        """
        self.df = self.df.drop(columns=["lodgement_date_starting", "lodgement_date_ending"])

-
    def _feature_generation(self):
        """
        Generate features for modelling
@ -469,16 +470,15 @@ class TrainingDataset(BaseDataset):
        missings = missings[missings >= 1]

        if len(missings) == 0:
-            return 
+            return

-        # Make sure they are all efficiency columns
+            # Make sure they are all efficiency columns
        if any(~missings.index.str.contains("energy_eff")):
            raise ValueError("Non efficiency columns are missing")

        for m in missings.index:
            self.df[m] = self.df[m].fillna("NO_RATING")

-
    @staticmethod
    def _calculate_days_to(lodgement_date):

@ -495,7 +495,7 @@ class TrainingDataset(BaseDataset):
    #     if not isinstance(other, TrainingDataset):
    #         raise TypeError("Addition can only be performed with another instance of TrainingDataset")
    #     return TrainingDataset(self.datasets + other.datasets)
-        
+
    # def __radd__(self, other):
    #     """
    #     Required for sum() to work
@ -505,6 +505,7 @@ class TrainingDataset(BaseDataset):
    #     else:
    #         return self.__add__(other)

+
 class NewDataset(BaseDataset):
    """
    A collection of EPCDifferenceRecords can be combined into a ScoringDataset.
@ -518,7 +519,7 @@ class NewDataset(BaseDataset):
        if not isinstance(other, NewDataset):
            raise TypeError("Addition can only be performed with another instance of ScoringDataset")
        return NewDataset(self.datasets + other.datasets)
-        
+
    def __radd__(self, other):
        """
        Required for sum() to work
@ -526,4 +527,4 @@ class NewDataset(BaseDataset):
        if isinstance(other, int):
            return self
        else:
-            return self.__add__(other)
+            return self.__add__(other)