adding more to the difference record

2026-08-02 21:08:24 +00:00 · 2023-12-05 16:07:47 +00:00 · 2023-12-05 16:07:47 +00:00 · c6fe7ca5f3
commit c6fe7ca5f3
parent 99d1e9b790
1 changed files with 156 additions and 8 deletions
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@ -399,7 +399,75 @@ def compare_records(earliest_record: pd.Series, latest_record: pd.Series, column

 from dataclasses import dataclass
 from etl.epc.ValidationConfiguration import EPCRecordValidationConfiguration
-from typing import Union
+from typing import Union, List
+
+
+class EPCDifferenceRecord:
+    """
+    Base class for the difference between two EPC records
+    """
+
+    def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False):
+        """
+        This method will initialise the EPCDifferenceRecord
+        Defaults usage is with record1 to have the higher RDSAP score
+        """
+        self.record1 = record1
+        self.record2 = record2
+        self.difference_record = {}
+
+        if auto_sort and (self.record1 <= self.record2):
+            self.record1, self.record2 = self.record2, self.record1
+
+        self._calculate_difference_record()
+        self._validate_difference_record()
+
+    def _calculate_difference_record(self):
+        """
+        This method will calculate the difference between the two records
+        """
+        self.difference_record = {}
+        for key in self.record1.__dict__.keys():
+            if key == "LODGEMENT_DATE":
+                continue
+            self.difference_record[key] = self.record1.get(key)
+            # - self.record2.get(key)
+
+    def _validate_difference_record(self):
+        """
+        This method will validate the difference record
+        """
+        # for key, value in self.difference_record.items():
+        #     if key == "LODGEMENT_DATE":
+        #         continue
+        #     if isinstance(value, str):
+        #         continue
+        #     if value < 0:
+        #         raise ValueError(f"Difference record has negative value for {key}")
+        pass
+            
+    def compare_fields_in_records(self, fields: List[str]):
+        """
+        This method will compare the records, for specific fields
+        """
+        
+        all_equal = True
+        for field in fields:
+            if self.record1.get(field) != self.record2.get(field):
+                return False
+    
+        if all_equal:
+            return True
+            
+    def get(self, key: str):
+        """
+        This method will return the value of the key
+        """
+        return self.difference_record[key]
+
+
+
+
@dataclass
 class EPCRecord:
    """
@ -447,9 +515,10 @@ class EPCRecord:

    def __post_init__(self):
        # We can have validation and cleaning steps for each of the fields
-        self.WALLS_DESCRIPTION = 'check'
-
-        self._field_validation()
+        # self.WALLS_DESCRIPTION = 'check'
+        # Could also have cleaning of records if needed
+        # self._field_validation()
+        pass

    def _field_validation(self):
        """
@ -500,9 +569,61 @@ class EPCRecord:
        
        if validation_config['range'] is not None:
            if field_value < validation_config['range'][0] or field_value > validation_config['range'][1]:
-                raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")
-            
+                raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")    
    
+    def __sub__(self, other):
+        """
+        This method will return the difference between two EPC records
+        """
+        if not isinstance(other, EPCRecord):
+            raise ValueError("Can only subtract EPCRecord from EPCRecord")
+        
+        difference_record = EPCDifferenceRecord(record1=self, record2=other, auto_sort=True)
+        
+        return difference_record
+    
+    def __gt__(self, other):
+        """
+        This method will return True if the EPC record is greater than or equal to the other
+        """
+        if not isinstance(other, EPCRecord):
+            raise ValueError("Can only compare EPCRecord to EPCRecord")
+        
+        return self.__dict__[RDSAP_RESPONSE] > other.__dict__[RDSAP_RESPONSE]
+    
+    def __ge__(self, other):
+        """
+        This method will return True if the EPC record is greater than or equal to the other
+        """
+        if not isinstance(other, EPCRecord):
+            raise ValueError("Can only compare EPCRecord to EPCRecord")
+        
+        return self.__dict__[RDSAP_RESPONSE] >= other.__dict__[RDSAP_RESPONSE]
+        
+    def __lt__(self, other):
+        """
+        This method will return True if the EPC record is greater than or equal to the other
+        """
+        if not isinstance(other, EPCRecord):
+            raise ValueError("Can only compare EPCRecord to EPCRecord")
+        
+        return self.__dict__[RDSAP_RESPONSE] < other.__dict__[RDSAP_RESPONSE]
+    
+    def __le__(self, other):
+        """
+        This method will return True if the EPC record is greater than or equal to the other
+        """
+        if not isinstance(other, EPCRecord):
+            raise ValueError("Can only compare EPCRecord to EPCRecord")
+        
+        return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE]
+        
+    def get(self, key: str):
+        """
+        This method will return the value of the key
+        """
+        return self.__dict__[key]
+        


    # def __init__(self, num) -> None:
@ -573,7 +694,34 @@ def app():
            # e.g. first vs second, second vs third and also first vs third
            property_model_data = []

-            temp = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')]
+            epc_records = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')]
+
+            for idx in range(0, len(epc_records) - 1):
+                
+                if idx >= len(epc_records) - 1:
+                    break
+
+                earliest_record: EPCRecord = epc_records[idx]
+                latest_record: EPCRecord = epc_records[idx + 1]
+
+                # Auto sort the records so that the record with highest RDSAP score is always record1
+                difference_record: EPCDifferenceRecord = latest_record - earliest_record
+
+                # TODO: Pull out RDSAP_CHANGE to a variable
+                if difference_record.get("RDSAP_CHANGE") == 0:
+                    continue
+                
+                all_equal = difference_record.compare_fields_in_records(
+                    fields=CORE_COMPONENT_FEATURES
+                    )
+                
+                if all_equal:
+                    # Keep track of this for the moment so we can analyse
+                    all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
+                    continue
+
+                property_model_data.append(difference_record.difference_record)
+
            for idx in range(0, property_data.shape[0] - 1):
                
                if idx >= property_data.shape[0] - 1:
@ -631,7 +779,7 @@ def app():
                    # Keep track of this for the moment so we can analyse
                    all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
                    continue
-
+                asdasd
                features = pd.concat([starting_record, ending_record])

                property_model_data.append(