adding more to the difference record

This commit is contained in:
Michael Duong 2023-12-05 16:07:47 +00:00
parent 99d1e9b790
commit c6fe7ca5f3

View file

@ -399,7 +399,75 @@ def compare_records(earliest_record: pd.Series, latest_record: pd.Series, column
from dataclasses import dataclass
from etl.epc.ValidationConfiguration import EPCRecordValidationConfiguration
from typing import Union
from typing import Union, List
class EPCDifferenceRecord:
"""
Base class for the difference between two EPC records
"""
def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False):
"""
This method will initialise the EPCDifferenceRecord
Defaults usage is with record1 to have the higher RDSAP score
"""
self.record1 = record1
self.record2 = record2
self.difference_record = {}
if auto_sort and (self.record1 <= self.record2):
self.record1, self.record2 = self.record2, self.record1
self._calculate_difference_record()
self._validate_difference_record()
def _calculate_difference_record(self):
"""
This method will calculate the difference between the two records
"""
self.difference_record = {}
for key in self.record1.__dict__.keys():
if key == "LODGEMENT_DATE":
continue
self.difference_record[key] = self.record1.get(key)
# - self.record2.get(key)
def _validate_difference_record(self):
"""
This method will validate the difference record
"""
# for key, value in self.difference_record.items():
# if key == "LODGEMENT_DATE":
# continue
# if isinstance(value, str):
# continue
# if value < 0:
# raise ValueError(f"Difference record has negative value for {key}")
pass
def compare_fields_in_records(self, fields: List[str]):
"""
This method will compare the records, for specific fields
"""
all_equal = True
for field in fields:
if self.record1.get(field) != self.record2.get(field):
return False
if all_equal:
return True
def get(self, key: str):
"""
This method will return the value of the key
"""
return self.difference_record[key]
@dataclass
class EPCRecord:
"""
@ -447,9 +515,10 @@ class EPCRecord:
def __post_init__(self):
# We can have validation and cleaning steps for each of the fields
self.WALLS_DESCRIPTION = 'check'
self._field_validation()
# self.WALLS_DESCRIPTION = 'check'
# Could also have cleaning of records if needed
# self._field_validation()
pass
def _field_validation(self):
"""
@ -500,9 +569,61 @@ class EPCRecord:
if validation_config['range'] is not None:
if field_value < validation_config['range'][0] or field_value > validation_config['range'][1]:
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")
def __sub__(self, other):
"""
This method will return the difference between two EPC records
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only subtract EPCRecord from EPCRecord")
difference_record = EPCDifferenceRecord(record1=self, record2=other, auto_sort=True)
return difference_record
def __gt__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] > other.__dict__[RDSAP_RESPONSE]
def __ge__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] >= other.__dict__[RDSAP_RESPONSE]
def __lt__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] < other.__dict__[RDSAP_RESPONSE]
def __le__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE]
def get(self, key: str):
"""
This method will return the value of the key
"""
return self.__dict__[key]
# def __init__(self, num) -> None:
@ -573,7 +694,34 @@ def app():
# e.g. first vs second, second vs third and also first vs third
property_model_data = []
temp = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')]
epc_records = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')]
for idx in range(0, len(epc_records) - 1):
if idx >= len(epc_records) - 1:
break
earliest_record: EPCRecord = epc_records[idx]
latest_record: EPCRecord = epc_records[idx + 1]
# Auto sort the records so that the record with highest RDSAP score is always record1
difference_record: EPCDifferenceRecord = latest_record - earliest_record
# TODO: Pull out RDSAP_CHANGE to a variable
if difference_record.get("RDSAP_CHANGE") == 0:
continue
all_equal = difference_record.compare_fields_in_records(
fields=CORE_COMPONENT_FEATURES
)
if all_equal:
# Keep track of this for the moment so we can analyse
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
continue
property_model_data.append(difference_record.difference_record)
for idx in range(0, property_data.shape[0] - 1):
if idx >= property_data.shape[0] - 1:
@ -631,7 +779,7 @@ def app():
# Keep track of this for the moment so we can analyse
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
continue
asdasd
features = pd.concat([starting_record, ending_record])
property_model_data.append(