fixed col names

2026-07-27 23:35:01 +00:00 · 2023-12-14 21:05:07 +00:00 · 2023-12-14 21:05:07 +00:00 · eeeea467cf
commit eeeea467cf
parent 1ce9a4521f
3 changed files with 26 additions and 25 deletions
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@ -74,15 +74,15 @@ class TrainingDataset(BaseDataset):
        """
        Drop features that are not needed for modelling
        """
-        self.df = self.df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
+        self.df = self.df.drop(columns=["lodgement_date_starting", "lodgement_date_ending"])


    def _feature_generation(self):
        """
        Generate features for modelling
        """
-        self.df["DAYS_TO_STARTING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_STARTING"])
-        self.df["DAYS_TO_ENDING"] = self._calculate_days_to(self.df["LODGEMENT_DATE_ENDING"])
+        self.df["days_to_starting"] = self._calculate_days_to(self.df["lodgement_date_starting"])
+        self.df["day_to_ending"] = self._calculate_days_to(self.df["lodgement_date_ending"])

    def _clean_efficiency_variables(self):

--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@ -259,12 +259,12 @@ class EPCDifferenceRecord:
        carbon_change = self.record2.get(CARBON_RESPONSE) - self.record1.get(CARBON_RESPONSE)

        component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
-        ending_record = self.record2.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_ENDING")
-        starting_record = self.record1.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_STARTING")
+        ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_ending")
+        starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_starting")

        # TODO: Take the earliest potentials
        self.difference_record = {
-            "uprn": self.record1.get("UPRN"),
+            "uprn": self.record1.get("uprn"),
            "rdsap_change": rdsap_change,
            "heat_demand_change": heat_demand_change,
            "carbon_change": carbon_change,
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@ -552,37 +552,38 @@ def app():

            epc_records = [EPCRecord(uprn, **x) for x in variable_data.to_dict(orient='records')]

+            # TODO: Make this part of a strategy pattern, as we can generate different training datasets
            property_model_data, all_equal_rows = compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_model_data, all_equal_rows)



-            for idx in range(0, len(epc_records) - 1):
+            # for idx in range(0, len(epc_records) - 1):
                
-                if idx >= len(epc_records) - 1:
-                    break
+            #     if idx >= len(epc_records) - 1:
+            #         break

-                earliest_record: EPCRecord = epc_records[idx]
-                latest_record: EPCRecord = epc_records[idx + 1]
+            #     earliest_record: EPCRecord = epc_records[idx]
+            #     latest_record: EPCRecord = epc_records[idx + 1]

-                # Auto sort the records so that the record with highest RDSAP score is always record1
-                difference_record: EPCDifferenceRecord = latest_record - earliest_record
+            #     # Auto sort the records so that the record with highest RDSAP score is always record1
+            #     difference_record: EPCDifferenceRecord = latest_record - earliest_record

-                # TODO: Pull out RDSAP_CHANGE to a variable
-                if difference_record.get("RDSAP_CHANGE") == 0:
-                    continue
+            #     # TODO: Pull out RDSAP_CHANGE to a variable
+            #     if difference_record.get("RDSAP_CHANGE") == 0:
+            #         continue
                
-                all_equal = difference_record.compare_fields_in_records(
-                    fields=CORE_COMPONENT_FEATURES
-                    )
+            #     all_equal = difference_record.compare_fields_in_records(
+            #         fields=CORE_COMPONENT_FEATURES
+            #         )
                
-                if all_equal:
-                    # Keep track of this for the moment so we can analyse
-                    all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
-                    continue
+            #     if all_equal:
+            #         # Keep track of this for the moment so we can analyse
+            #         all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
+            #         continue

-                difference_record.append_fixed_data(fixed_data)
+            #     difference_record.append_fixed_data(fixed_data)

-                property_model_data.append(difference_record)
+            #     property_model_data.append(difference_record)

                # property_model_data.append(difference_record.difference_record)