diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 575a99ae..4aa14a4d 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -277,22 +277,6 @@ class EPCRecord: return - def _drop_features(self): - """ - Drop features that are not needed for modelling - """ - self.df = self.df.drop( - columns=["lodgement_date_starting", "lodgement_date_ending"] - ) - - def _feature_generation(self): - """ - Generate features for modelling - """ - self.df["days_to_lodgement_date"] = self._calculate_days_to( - self.prepared_epc["lodgement_date"] - ) - @staticmethod def _calculate_days_to(lodgement_date): if isinstance(lodgement_date, str): @@ -387,44 +371,6 @@ class EPCRecord: same_index = df.apply(pd.Series.duplicated).any() self.prepared_epc_delta_metadata = df[same_index[~same_index].index] - def _expand_description_to_features(self): - pass - - def _expand_description_to_uvalues(self): - # TODO: can be loop over all the descriptions, or done in one - pass - - # def _process_and_prune(self, cleaned_lookup: dict): - # """ - # This method will merge on the cleaned lookup table and ensure that the building fabric in the - # starting and ending EPC is consistent, so ensure that we are performing our modelling on the cleanest - # possible dataset. - # """ - # for component in ["walls", "floor", "roof", "hotwater", "mainheat", "mainheatcont", "windows", "main-fuel"]: - # if component == "main-fuel": - # component = component.replace("-", "_") - # cleaned_key = "main-fuel" if component == "main-fuel" else f"{component}-description" - # left_on_starting = ( - # f"{component}_starting" if component == "main-fuel" else f"{component}_description_starting" - # ) - - # left_on_ending = ( - # f"{component}_ending" if component == "main-fuel" else f"{component}_description_ending" - # ) - - # self.df2 = self.df.merge( - # pd.DataFrame(cleaned_lookup[cleaned_key]), - # how="left", - # left_on=left_on_starting, - # right_on="original_description", - # ).merge( - # pd.DataFrame(cleaned_lookup[cleaned_key]), - # how="left", - # left_on=left_on_ending, - # right_on="original_description", - # suffixes=("", "_ending") - # ) - def _clean_records_using_epc_records(self): """ This method will clean the records