From 51e85e75162a21cb776a39e7db861a96c0262668 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Jan 2024 17:33:02 +0000 Subject: [PATCH] minor tweaks to property class to work with router and fix dodgy merge --- backend/Property.py | 252 +++++++++++++++++--------------------------- etl/epc/Dataset.py | 105 +++++++++--------- 2 files changed, 148 insertions(+), 209 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index c784f6f2..7db64773 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -7,7 +7,8 @@ import pandas as pd from etl.epc.DataProcessor import EPCDataProcessor from etl.epc.Dataset import TrainingDataset -from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP +from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, \ + BUILT_FORM_REMAP from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map from etl.solar.SolarPhotoSupply import SolarPhotoSupply from utils.logger import setup_logger @@ -18,7 +19,6 @@ from recommendations.recommendation_utils import ( estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows ) - ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev') DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None) @@ -49,8 +49,9 @@ class Property(Definitions): lighting = None spatial = None + base_difference_record = None - def __init__(self, id, postcode, address, epc_record, data=None): + def __init__(self, id, postcode, address, epc_record): self.epc_record = epc_record @@ -58,7 +59,7 @@ class Property(Definitions): self.address = address self.postcode = postcode - self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()} + self.data = {k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()} self.old_data = epc_record.get("old_data") self.property_dimensions = None @@ -135,7 +136,7 @@ class Property(Definitions): print("NEED TO CHANGE THE DASH TO LOWER CASE") fixed_data_col_names = [x.lower().replace("_", "-") for x in fixed_data_col_names] - fixed_data = {k.replace("-", "_"):v for k,v in self.data.items() if k in fixed_data_col_names} + fixed_data = {k.replace("-", "_"): v for k, v in self.data.items() if k in fixed_data_col_names} difference_record.append_fixed_data(fixed_data) @@ -143,16 +144,16 @@ class Property(Definitions): # TODO: adjust the base difference record with the previously calculated u values + features # estimated_perimeter is different to the perimeter in the epc record - + # self.base_difference_record.df def adjust_difference_record_with_recommendations(self, property_recommendations): """ This method will adjust the difference record, based on the recommendations made for the property - :param recommendations: dictionary of recommendations for the property - :return: + :param property_recommendations: dictionary of recommendations for the property """ + self.recommendations_scoring_data = [] for recommendations_by_type in property_recommendations: for i, rec in enumerate(recommendations_by_type): scoring_dict = self.create_recommendation_scoring_data( @@ -161,7 +162,7 @@ class Property(Definitions): scoring_dict['id'] = "+".join([str(self.id), str(rec["recommendation_id"])]) self.recommendations_scoring_data.append(scoring_dict) - + def create_recommendation_scoring_data(self, recommendation: dict): recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy() @@ -180,41 +181,98 @@ class Property(Definitions): recommendation_record["walls_insulation_thickness_ending"] = "above average" recommendation_record["walls_energy_eff_ending"] = "Good" else: - wind_turbine_count = int(wind_turbine_count) + if recommendation_record["walls_thermal_transmittance_ending"] is None: + raise ValueError("We should not have a None value for the u value") - self.wind_turbine = { - "wind_turbine": wind_turbine_count, - } + if recommendation_record["walls_insulation_thickness_ending"] is None: + recommendation_record["walls_insulation_thickness_ending"] = "none" - def set_count_variables(self): + # Update description to indicate it's insulate + if recommendation["type"] in ["solid_floor_insulation", "suspended_floor_insulation", + "exposed_floor_insulation"]: + if len(recommendation["parts"]) > 1: + raise NotImplementedError("Have more than 1 floor insulation part - handle this case") - """ - For EPC fields that are just counts, we'll set them here - These are fields that are integers but may contain additional values such as "" so we can't do a direct - conversion straight to an integer - :return: - """ + recommendation_record["floor_thermal_transmittance_ending"] = recommendation["new_u_value"] + # We don't really see above average for this in the training data + recommendation_record["floor_insulation_thickness_ending"] = "average" + recommendation_record["floor_energy_eff_ending"] = "Good" + else: + if recommendation_record["floor_thermal_transmittance_ending"] is None: + raise ValueError("We should not have a None value for the u value") - fields = { - "number_of_open_fireplaces": "number-open-fireplaces", - "number_of_extensions": "extension-count", - "number_of_storeys": "flat-storey-count", - "number_of_rooms": "number-habitable-rooms", - } + if recommendation_record["floor_insulation_thickness_ending"] is None: + recommendation_record["floor_insulation_thickness_ending"] = "none" - null_attributes = ["number_of_storeys", "number_of_rooms"] + if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]: + recommendation_record["roof_thermal_transmittance_ending"] = recommendation["new_u_value"] - for attribute, epc_field in fields.items(): - value = self.data["extension-count"] - if value == "" or value in self.DATA_ANOMALY_MATCHES: - if attribute in null_attributes: - value = None - else: - value = 0 + parts = recommendation["parts"] + if len(parts) != 1: + raise ValueError("More than one part for roof insulation - investiage me") + + # This is based on the values we have in the training data + valid_numeric_values = [ + 12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400 + ] + + proposed_depth = int(parts[0]["depth"]) + if proposed_depth not in valid_numeric_values: + # Take the nearest value for scoring + proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth)) + + recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth) + recommendation_record["roof_energy_eff_ending"] = "Very Good" + else: + # Fill missing roof u-values - this fill is not based on recommended upgrades + if recommendation_record["roof_thermal_transmittance_ending"] is None: + raise ValueError("We should not have a None value for the u value") + + if recommendation_record["roof_insulation_thickness_ending"] is None: + recommendation_record["roof_insulation_thickness_ending"] = "none" + + if recommendation["type"] == "mechanical_ventilation": + recommendation_record["mechanical_ventilation_ending"] = 'mechanical, extract only' + + if recommendation["type"] == "sealing_open_fireplace": + recommendation_record["number_open_fireplaces_ending"] = 0 + + if recommendation["type"] == "low_energy_lighting": + recommendation_record["low_energy_lighting_ending"] = 100 + recommendation_record["lighting_energy_eff_starting"] = "Very Good" + + if recommendation["type"] == "windows_glazing": + recommendation_record["multi_glaze_proportion_ending"] = 100 + recommendation_record["windows_energy_eff_ending"] = "Average" + + is_secondary_glazing = recommendation["is_secondary_glazing"] + + if recommendation_record["glazing_type_ending"] == "multiple": + pass + elif recommendation_record["glazing_type_ending"] == "single": + recommendation_record["glazing_type_ending"] = "secondary" if is_secondary_glazing else "double" + elif recommendation_record["glazing_type_ending"] == "double": + recommendation_record["glazing_type_ending"] = "multiple" if is_secondary_glazing else "double" + elif recommendation_record["glazing_type_ending"] == "secondary": + recommendation_record["glazing_type_ending"] = "secondary" if is_secondary_glazing else "multiple" + elif recommendation_record["glazing_type_ending"] in ["triple", "high performance"]: + recommendation_record["glazing_type_ending"] = "multiple" else: - value = int(value) + raise ValueError("Invalid glazing type - implement me") - setattr(self, attribute, value) + if recommendation["type"] == "solar_pv": + recommendation_record["photo_supply_ending"] = recommendation["photo_supply"] + + if recommendation["type"] not in [ + "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting", + "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", + "loft_insulation", "room_roof_insulation", "flat_roof_insulation", + "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation", + "windows_glazing", "solar_pv" + ]: + raise NotImplementedError("Implement me") + + return recommendation_record def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds): """ @@ -473,7 +531,7 @@ class Property(Definitions): def set_floor_level(self): self.floor_level = ( FLOOR_LEVEL_MAP[self.data["floor-level"]] if - self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES and self.data['floor-level'] is not None + self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES and self.data['floor-level'] is not None else None ) @@ -545,126 +603,6 @@ class Property(Definitions): return component_data - def get_model_data(self): - """ - This method extracts cleaned data from the property object, which is used in our machine learning models - - This will use many of the cleaned properties, extracted from the epc data, or methods in DataProcessor. - - For future iterations of this, we probably want to implement a singular method in DataProcessor, which can - be used in the etl code and in here - - :return: dictionary of model data to be scored in the model - """ - - drop_cols = ["original_description", "clean_description"] - insulation_drop_cols = ["thermal_transmittance_unit", "is_assumed", "is_valid"] - insulation_rename_cols = ["thermal_transmittance", "insulation_thickness"] - - walls = self._extract_component(self.walls, insulation_rename_cols, insulation_drop_cols + drop_cols, "walls") - roof = self._extract_component(self.roof, insulation_rename_cols, insulation_drop_cols + drop_cols, "roof") - floor = self._extract_component(self.floor, insulation_rename_cols, insulation_drop_cols + drop_cols, "floor") - - windows = self._extract_component(self.windows, [], drop_cols + ["no_data"]) - fuel = self._extract_component(self.main_fuel, ["tariff_type"], drop_cols + ["tariff_type"], "main-fuel") - main_heating = self._extract_component(self.main_heating, [], drop_cols + ["has_assumed"]) - main_heating_controls = self._extract_component(self.main_heating_controls, [], drop_cols) - hotwater = self._extract_component(self.hotwater, ["tariff_type"], drop_cols + ['assumed'], "hotwater") - - # We'll need to clean second heating - second_heating = self.data["secondheat-description"] - - epc_raw_columns = POTENTIAL_COLUMNS + EFFICIENCY_FEATURES + [ - 'TRANSACTION_TYPE', - 'ENERGY_TARIFF', - 'PROPERTY_TYPE', - 'UPRN', - 'NUMBER_OPEN_FIREPLACES', - 'MULTI_GLAZE_PROPORTION', - 'MECHANICAL_VENTILATION', - 'PHOTO_SUPPLY', - 'LOW_ENERGY_LIGHTING', - 'SOLAR_WATER_HEATING_FLAG', - 'GLAZED_TYPE', - 'CONSTITUENCY', - 'NUMBER_HEATED_ROOMS', - 'EXTENSION_COUNT', - ] - epc_raw_data = { - k: self.data[k.lower().replace("_", "-")] for k in epc_raw_columns - } - - built_form_cleaning_map = { - "Flat": "Mid-Terrace", - "House": "Semi-Detached", - "Bungalow": "Detached", - "Maisonette": "Mid-Terrace" - } - - built_form = self.data["built-form"] - if built_form in self.DATA_ANOMALY_MATCHES: - # TODO: If built form isn't captured, we use the most common value for that property type - we shall - # improve this methodology - built_form = built_form_cleaning_map.get(self.data["property-type"]) - if not built_form: - raise NotImplementedError("Not handled this property type when cleaning built form") - - property_data = { - **walls, - **roof, - **floor, - **fuel, - **main_heating, - **main_heating_controls, - **hotwater, - **windows, - "SECONDHEAT_DESCRIPTION": second_heating, - "DAYS_TO": EPCDataProcessor.calculate_days_to(self.data["lodgement-date"]), - "SAP": float(self.data["current-energy-efficiency"]), - "CARBON": float(self.data["co2-emissions-current"]), - "HEAT_DEMAND": float(self.data["energy-consumption-current"]), - "estimated_perimeter": self.perimeter, - "CONSTRUCTION_AGE_BAND": self.construction_age_band, - "FLOOR_HEIGHT": self.floor_height, - "NUMBER_HABITABLE_ROOMS": self.number_of_rooms, - "TOTAL_FLOOR_AREA": self.floor_area, - "FIXED_LIGHTING_OUTLETS_COUNT": self.number_lighting_outlets, - **epc_raw_data, - "BUILT_FORM": built_form, - "POSTCODE": self.data["postcode"], - } - - return property_data - - def set_number_lighting_outlets(self, cleaned_property_data): - """ - Extracts and cleans the estimated number of lighting outlets - :return: - """ - - if self.data["fixed-lighting-outlets-count"] in [None, ""]: - - # We check old EPCs and the full SAP EPC - - lighting_data = [] - - if len(self.old_data): - lighting_data.extend([ - int(x["fixed-lighting-outlets-count"]) for x in self.old_data if - x["fixed-lighting-outlets-count"] != "" - ]) - - if len(self.full_sap_epc): - if self.full_sap_epc["fixed-lighting-outlets-count"] != "": - lighting_data.append(int(self.full_sap_epc["fixed-lighting-outlets-count"])) - - if lighting_data: - self.number_lighting_outlets = round(np.median(lighting_data)) - else: - self.number_lighting_outlets = round(cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]) - else: - self.number_lighting_outlets = float(self.data["fixed-lighting-outlets-count"]) - def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy): """ Stores these values for usage later diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 4ca98fc6..82272be1 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -34,7 +34,8 @@ class BaseDataset: # raise ValueError(f"Pipeline type {pipeline_type} not found") # return self.pipeline_steps[pipeline_type] - + + class TrainingDataset(BaseDataset): """ A collection of EPCDifferenceRecords can be combined into a TrainingDataset. @@ -45,7 +46,7 @@ class TrainingDataset(BaseDataset): # self.pipeline_steps = self.pipeline_factory("training") self.datasets = datasets self.df = pd.DataFrame([dataset.difference_record for dataset in datasets]) - + self._feature_generation() self._drop_features() self._clean_efficiency_variables() @@ -91,7 +92,7 @@ class TrainingDataset(BaseDataset): if row["has_dwelling_above"]: if row["roof_thermal_transmittance"] != 0: raise ValueError("Should have 0 u-value for roof") - + if row["roof_thermal_transmittance_ending"] != 0: raise ValueError("Should have 0 u-value for roof") @@ -105,15 +106,16 @@ class TrainingDataset(BaseDataset): is_pitched=row["is_pitched"], is_at_rafters=row["is_at_rafters"], age_band=england_wales_age_band_lookup[row["construction_age_band"]] - ) - + ) + @staticmethod def _lambda_function_to_generate_wall_uvalue(row, is_end=False): """ Using the apply method, use the get_wall_u_value method to generate the u-value """ description_col_name = "walls_clean_description" if not is_end else "walls_clean_description_ending" - thermal_transistance_col_name = "walls_thermal_transmittance" if not is_end else "walls_thermal_transmittance_ending" + thermal_transistance_col_name = "walls_thermal_transmittance" if not is_end else \ + "walls_thermal_transmittance_ending" if pd.isnull(row[thermal_transistance_col_name]): output = get_wall_u_value( @@ -126,7 +128,7 @@ class TrainingDataset(BaseDataset): output = row[thermal_transistance_col_name] return output - + @staticmethod def _lambda_function_to_generate_floor_uvalue(row, is_end=False): """ @@ -146,20 +148,19 @@ class TrainingDataset(BaseDataset): uvalue = row[floor_thermal_col_name] if pd.isnull(uvalue): - insulation_col_name = "floor_insulation_thickness" if not is_end else "floor_insulation_thickness_ending" floor_area_col_name = "estimated_perimeter_starting" if not is_end else "estimated_perimeter_ending" perimeter_col_name = "total_floor_area_starting" if not is_end else "total_floor_area_ending" uvalue = get_floor_u_value( - floor_type=row["floor_type"], - perimeter=row[floor_area_col_name], - area=row[perimeter_col_name], - insulation_thickness=row[insulation_col_name], - wall_type=row["wall_type"], - age_band=england_wales_age_band_lookup[row["construction_age_band"]] - ) - + floor_type=row["floor_type"], + perimeter=row[floor_area_col_name], + area=row[perimeter_col_name], + insulation_thickness=row[insulation_col_name], + wall_type=row["wall_type"], + age_band=england_wales_age_band_lookup[row["construction_age_band"]] + ) + return uvalue def _generate_u_values_from_features(self): @@ -181,13 +182,15 @@ class TrainingDataset(BaseDataset): ) walls_starting_uvalue = self.df['walls_thermal_transmittance'].fillna(walls_starting_uvalue) - walls_starting_equals_ending_flag = self.df['walls_clean_description'] == self.df["walls_clean_description_ending"] - walls_ending_uvalue[walls_starting_equals_ending_flag] = walls_starting_uvalue[walls_starting_equals_ending_flag] - + walls_starting_equals_ending_flag = self.df['walls_clean_description'] == self.df[ + "walls_clean_description_ending"] + walls_ending_uvalue[walls_starting_equals_ending_flag] = walls_starting_uvalue[ + walls_starting_equals_ending_flag] + # ~~~~~~~~~~~~~~~~~~ # Roof # ~~~~~~~~~~~~~~~~~~ - + roof_starting_uvalue = self.df.apply( lambda row: self._lambda_function_to_generate_roof_uvalue(row), axis=1 @@ -200,7 +203,6 @@ class TrainingDataset(BaseDataset): roof_starting_uvalue = self.df['roof_thermal_transmittance'].fillna(roof_starting_uvalue) roof_ending_uvalue = self.df['roof_thermal_transmittance_ending'].fillna(roof_ending_uvalue) - # ~~~~~~~~~~~~~~~~~~ # Floor # ~~~~~~~~~~~~~~~~~~ @@ -211,7 +213,8 @@ class TrainingDataset(BaseDataset): ) self.df['estimated_perimeter_starting'] = self.df.apply( - lambda row: estimate_perimeter(row["total_floor_area_starting"]/ row['estimated_number_of_floors'], row["number_habitable_rooms"]/ row['estimated_number_of_floors']), + lambda row: estimate_perimeter(row["total_floor_area_starting"] / row['estimated_number_of_floors'], + row["number_habitable_rooms"] / row['estimated_number_of_floors']), axis=1 ) self.df['estimated_perimeter_ending'] = self.df.apply( @@ -221,18 +224,18 @@ class TrainingDataset(BaseDataset): self.df["floor_type"] = self.df["is_suspended"].replace({True: "suspended", False: "solid"}) self.df["wall_type"] = self.df.apply( lambda row: get_wall_type( - is_cavity_wall=row["is_cavity_wall"], - is_solid_brick=row["is_solid_brick"], - is_timber_frame=row["is_timber_frame"], - is_granite_or_whinstone=row["is_granite_or_whinstone"], - is_cob=row["is_cob"], + is_cavity_wall=row["is_cavity_wall"], + is_solid_brick=row["is_solid_brick"], + is_timber_frame=row["is_timber_frame"], + is_granite_or_whinstone=row["is_granite_or_whinstone"], + is_cob=row["is_cob"], is_sandstone_or_limestone=row["is_sandstone_or_limestone"], is_system_built=row["is_system_built"], is_park_home=row["is_park_home"] - ), + ), axis=1 ) - + floor_starting_uvalue = self.df.apply( lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1 @@ -246,19 +249,21 @@ class TrainingDataset(BaseDataset): floor_ending_uvalue = self.df['floor_thermal_transmittance_ending'].fillna(floor_ending_uvalue) for component in ["walls", "roof", "floor"]: - self.df[f"{component}_thermal_transmittance"] = self.df[f"{component}_thermal_transmittance"].fillna(eval(f"{component}_starting_uvalue")) - self.df[f"{component}_thermal_transmittance_ending"] = self.df[f"{component}_thermal_transmittance_ending"].fillna(eval(f"{component}_ending_uvalue")) + self.df[f"{component}_thermal_transmittance"] = self.df[f"{component}_thermal_transmittance"].fillna( + eval(f"{component}_starting_uvalue")) + self.df[f"{component}_thermal_transmittance_ending"] = self.df[ + f"{component}_thermal_transmittance_ending"].fillna(eval(f"{component}_ending_uvalue")) - self.df = self.df.drop(columns=["floor_type", "wall_type", "walls_clean_description", "walls_clean_description_ending", 'estimated_number_of_floors']) + self.df = self.df.drop( + columns=["floor_type", "wall_type", "walls_clean_description", "walls_clean_description_ending", + 'estimated_number_of_floors']) - def _adjust_assumed_values_in_wall_descriptions(self): """ Strip out assumed values for all wall descriptions """ for col in ["walls_clean_description", "walls_clean_description_ending"]: - self.df[col] = self.df[col].str.replace("(assumed)", "").str.rstrip() - + self.df[col] = self.df[col].str.replace("(assumed)", "", regex=False).str.rstrip() def _drop_inconsistent_properties(self, expanded_df: pd.DataFrame, component: str): """ @@ -292,9 +297,8 @@ class TrainingDataset(BaseDataset): (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"]) & (expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"]) ] - + return expanded_df - def _expand_description_to_features(self, cleaned_lookup: dict): """ @@ -306,7 +310,7 @@ class TrainingDataset(BaseDataset): # remove this record, as it indicates that the quality of the EPC conducted in the first instance # is low # We also replace descriptions with their cleaned variants - """ + """ cols_to_drop = { "walls": [ @@ -361,9 +365,9 @@ class TrainingDataset(BaseDataset): } components_to_expand = cols_to_drop.keys() - + for component in components_to_expand: - + # TODO: change cleaned dataframe to have underscores instead of dashes if component == "main-fuel": cleaned_key = "main-fuel" @@ -377,7 +381,7 @@ class TrainingDataset(BaseDataset): original_cols = [f"{component}_description_starting", f"{component}_description_ending"] cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key]) - + expanded_df = self.df.merge( cleaned_lookup_df_for_key, how="left", @@ -393,7 +397,7 @@ class TrainingDataset(BaseDataset): # Drop properties where key material types have changed expanded_df = self._drop_inconsistent_properties(expanded_df, component) - + # Drop original cols and cols to drop expanded_df = expanded_df.drop(columns=cols_to_drop[component] + original_cols) @@ -411,11 +415,10 @@ class TrainingDataset(BaseDataset): } ) self.df = expanded_df - + # We don't need any lighting specific cleaning, we just drop the original description as we use # LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING self.df = self.df.drop(columns=["lighting_description_starting", "lighting_description_ending"]) - def _clean_missing_values(self, ignore_cols=None): missings = pd.isnull(self.df).sum() @@ -433,7 +436,6 @@ class TrainingDataset(BaseDataset): else: self.df[col] = self.df[col].fillna("Unknown") - def _null_validation(self, information: str): print(f"Null validation after {information}") if pd.isnull(self.df).sum().sum(): @@ -445,7 +447,6 @@ class TrainingDataset(BaseDataset): """ self.df = self.df.drop(columns=["lodgement_date_starting", "lodgement_date_ending"]) - def _feature_generation(self): """ Generate features for modelling @@ -469,16 +470,15 @@ class TrainingDataset(BaseDataset): missings = missings[missings >= 1] if len(missings) == 0: - return + return - # Make sure they are all efficiency columns + # Make sure they are all efficiency columns if any(~missings.index.str.contains("energy_eff")): raise ValueError("Non efficiency columns are missing") for m in missings.index: self.df[m] = self.df[m].fillna("NO_RATING") - @staticmethod def _calculate_days_to(lodgement_date): @@ -495,7 +495,7 @@ class TrainingDataset(BaseDataset): # if not isinstance(other, TrainingDataset): # raise TypeError("Addition can only be performed with another instance of TrainingDataset") # return TrainingDataset(self.datasets + other.datasets) - + # def __radd__(self, other): # """ # Required for sum() to work @@ -505,6 +505,7 @@ class TrainingDataset(BaseDataset): # else: # return self.__add__(other) + class NewDataset(BaseDataset): """ A collection of EPCDifferenceRecords can be combined into a ScoringDataset. @@ -518,7 +519,7 @@ class NewDataset(BaseDataset): if not isinstance(other, NewDataset): raise TypeError("Addition can only be performed with another instance of ScoringDataset") return NewDataset(self.datasets + other.datasets) - + def __radd__(self, other): """ Required for sum() to work @@ -526,4 +527,4 @@ class NewDataset(BaseDataset): if isinstance(other, int): return self else: - return self.__add__(other) \ No newline at end of file + return self.__add__(other)