From eeeea467cfae9436f2e4285729dab1d3a6c9e691 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong@Michaels-MacBook-Pro.local>
Date: Thu, 14 Dec 2023 21:05:07 +0000
Subject: [PATCH] fixed col names

---
 etl/epc/Dataset.py             |  6 +++---
 etl/epc/Record.py              |  6 +++---
 etl/epc/property_change_app.py | 39 +++++++++++++++++-----------------
 3 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index e364d0f0..c2ed5538 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -74,15 +74,15 @@ class TrainingDataset(BaseDataset):
         """
         Drop features that are not needed for modelling
         """
-        self.df = self.df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
+        self.df = self.df.drop(columns=["lodgement_date_starting", "lodgement_date_ending"])
 
 
     def _feature_generation(self):
         """
         Generate features for modelling
         """
-        self.df["DAYS_TO_STARTING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_STARTING"])
-        self.df["DAYS_TO_ENDING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_ENDING"])
+        self.df["days_to_starting"] = self._calculate_days_to(self.df["lodgement_date_starting"])
+        self.df["day_to_ending"] = self._calculate_days_to(self.df["lodgement_date_ending"])
 
     def _clean_efficiency_variables(self):
 
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index 4f136839..b43168a7 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -259,12 +259,12 @@ class EPCDifferenceRecord:
         carbon_change = self.record2.get(CARBON_RESPONSE) - self.record1.get(CARBON_RESPONSE)
 
         component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
-        ending_record = self.record2.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_ENDING")
-        starting_record = self.record1.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_STARTING")
+        ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_ending")
+        starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_starting")
 
         # TODO: Take the earliest potentials
         self.difference_record = {
-            "uprn": self.record1.get("UPRN"),
+            "uprn": self.record1.get("uprn"),
             "rdsap_change": rdsap_change,
             "heat_demand_change": heat_demand_change,
             "carbon_change": carbon_change,
diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py
index ee60ecb3..85118642 100644
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@@ -552,37 +552,38 @@ def app():
 
             epc_records = [EPCRecord(uprn, **x) for x in variable_data.to_dict(orient='records')]
 
+            # TODO: Make this part of a strategy pattern, as we can generate different training datasets
             property_model_data, all_equal_rows = compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_model_data, all_equal_rows)
 
 
 
-            for idx in range(0, len(epc_records) - 1):
+            # for idx in range(0, len(epc_records) - 1):
                 
-                if idx >= len(epc_records) - 1:
-                    break
+            #     if idx >= len(epc_records) - 1:
+            #         break
 
-                earliest_record: EPCRecord = epc_records[idx]
-                latest_record: EPCRecord = epc_records[idx + 1]
+            #     earliest_record: EPCRecord = epc_records[idx]
+            #     latest_record: EPCRecord = epc_records[idx + 1]
 
-                # Auto sort the records so that the record with highest RDSAP score is always record1
-                difference_record: EPCDifferenceRecord = latest_record - earliest_record
+            #     # Auto sort the records so that the record with highest RDSAP score is always record1
+            #     difference_record: EPCDifferenceRecord = latest_record - earliest_record
 
-                # TODO: Pull out RDSAP_CHANGE to a variable
-                if difference_record.get("RDSAP_CHANGE") == 0:
-                    continue
+            #     # TODO: Pull out RDSAP_CHANGE to a variable
+            #     if difference_record.get("RDSAP_CHANGE") == 0:
+            #         continue
                 
-                all_equal = difference_record.compare_fields_in_records(
-                    fields=CORE_COMPONENT_FEATURES
-                    )
+            #     all_equal = difference_record.compare_fields_in_records(
+            #         fields=CORE_COMPONENT_FEATURES
+            #         )
                 
-                if all_equal:
-                    # Keep track of this for the moment so we can analyse
-                    all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
-                    continue
+            #     if all_equal:
+            #         # Keep track of this for the moment so we can analyse
+            #         all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
+            #         continue
 
-                difference_record.append_fixed_data(fixed_data)
+            #     difference_record.append_fixed_data(fixed_data)
 
-                property_model_data.append(difference_record)
+            #     property_model_data.append(difference_record)
 
                 # property_model_data.append(difference_record.difference_record)