From eeeea467cfae9436f2e4285729dab1d3a6c9e691 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Thu, 14 Dec 2023 21:05:07 +0000 Subject: [PATCH] fixed col names --- etl/epc/Dataset.py | 6 +++--- etl/epc/Record.py | 6 +++--- etl/epc/property_change_app.py | 39 +++++++++++++++++----------------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index e364d0f0..c2ed5538 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -74,15 +74,15 @@ class TrainingDataset(BaseDataset): """ Drop features that are not needed for modelling """ - self.df = self.df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"]) + self.df = self.df.drop(columns=["lodgement_date_starting", "lodgement_date_ending"]) def _feature_generation(self): """ Generate features for modelling """ - self.df["DAYS_TO_STARTING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_STARTING"]) - self.df["DAYS_TO_ENDING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_ENDING"]) + self.df["days_to_starting"] = self._calculate_days_to(self.df["lodgement_date_starting"]) + self.df["day_to_ending"] = self._calculate_days_to(self.df["lodgement_date_ending"]) def _clean_efficiency_variables(self): diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 4f136839..b43168a7 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -259,12 +259,12 @@ class EPCDifferenceRecord: carbon_change = self.record2.get(CARBON_RESPONSE) - self.record1.get(CARBON_RESPONSE) component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES - ending_record = self.record2.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_ENDING") - starting_record = self.record1.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_STARTING") + ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_ending") + starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_starting") # TODO: Take the earliest potentials self.difference_record = { - "uprn": self.record1.get("UPRN"), + "uprn": self.record1.get("uprn"), "rdsap_change": rdsap_change, "heat_demand_change": heat_demand_change, "carbon_change": carbon_change, diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py index ee60ecb3..85118642 100644 --- a/etl/epc/property_change_app.py +++ b/etl/epc/property_change_app.py @@ -552,37 +552,38 @@ def app(): epc_records = [EPCRecord(uprn, **x) for x in variable_data.to_dict(orient='records')] + # TODO: Make this part of a strategy pattern, as we can generate different training datasets property_model_data, all_equal_rows = compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_model_data, all_equal_rows) - for idx in range(0, len(epc_records) - 1): + # for idx in range(0, len(epc_records) - 1): - if idx >= len(epc_records) - 1: - break + # if idx >= len(epc_records) - 1: + # break - earliest_record: EPCRecord = epc_records[idx] - latest_record: EPCRecord = epc_records[idx + 1] + # earliest_record: EPCRecord = epc_records[idx] + # latest_record: EPCRecord = epc_records[idx + 1] - # Auto sort the records so that the record with highest RDSAP score is always record1 - difference_record: EPCDifferenceRecord = latest_record - earliest_record + # # Auto sort the records so that the record with highest RDSAP score is always record1 + # difference_record: EPCDifferenceRecord = latest_record - earliest_record - # TODO: Pull out RDSAP_CHANGE to a variable - if difference_record.get("RDSAP_CHANGE") == 0: - continue + # # TODO: Pull out RDSAP_CHANGE to a variable + # if difference_record.get("RDSAP_CHANGE") == 0: + # continue - all_equal = difference_record.compare_fields_in_records( - fields=CORE_COMPONENT_FEATURES - ) + # all_equal = difference_record.compare_fields_in_records( + # fields=CORE_COMPONENT_FEATURES + # ) - if all_equal: - # Keep track of this for the moment so we can analyse - all_equal_rows.append({"uprn": uprn, "directory_name": directory.name}) - continue + # if all_equal: + # # Keep track of this for the moment so we can analyse + # all_equal_rows.append({"uprn": uprn, "directory_name": directory.name}) + # continue - difference_record.append_fixed_data(fixed_data) + # difference_record.append_fixed_data(fixed_data) - property_model_data.append(difference_record) + # property_model_data.append(difference_record) # property_model_data.append(difference_record.difference_record)