mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding more to the difference record
This commit is contained in:
parent
99d1e9b790
commit
c6fe7ca5f3
1 changed files with 156 additions and 8 deletions
|
|
@ -399,7 +399,75 @@ def compare_records(earliest_record: pd.Series, latest_record: pd.Series, column
|
|||
|
||||
from dataclasses import dataclass
|
||||
from etl.epc.ValidationConfiguration import EPCRecordValidationConfiguration
|
||||
from typing import Union
|
||||
from typing import Union, List
|
||||
|
||||
|
||||
class EPCDifferenceRecord:
|
||||
"""
|
||||
Base class for the difference between two EPC records
|
||||
"""
|
||||
|
||||
def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False):
|
||||
"""
|
||||
This method will initialise the EPCDifferenceRecord
|
||||
Defaults usage is with record1 to have the higher RDSAP score
|
||||
"""
|
||||
self.record1 = record1
|
||||
self.record2 = record2
|
||||
self.difference_record = {}
|
||||
|
||||
if auto_sort and (self.record1 <= self.record2):
|
||||
self.record1, self.record2 = self.record2, self.record1
|
||||
|
||||
self._calculate_difference_record()
|
||||
self._validate_difference_record()
|
||||
|
||||
def _calculate_difference_record(self):
|
||||
"""
|
||||
This method will calculate the difference between the two records
|
||||
"""
|
||||
self.difference_record = {}
|
||||
for key in self.record1.__dict__.keys():
|
||||
if key == "LODGEMENT_DATE":
|
||||
continue
|
||||
self.difference_record[key] = self.record1.get(key)
|
||||
# - self.record2.get(key)
|
||||
|
||||
def _validate_difference_record(self):
|
||||
"""
|
||||
This method will validate the difference record
|
||||
"""
|
||||
# for key, value in self.difference_record.items():
|
||||
# if key == "LODGEMENT_DATE":
|
||||
# continue
|
||||
# if isinstance(value, str):
|
||||
# continue
|
||||
# if value < 0:
|
||||
# raise ValueError(f"Difference record has negative value for {key}")
|
||||
pass
|
||||
|
||||
def compare_fields_in_records(self, fields: List[str]):
|
||||
"""
|
||||
This method will compare the records, for specific fields
|
||||
"""
|
||||
|
||||
all_equal = True
|
||||
for field in fields:
|
||||
if self.record1.get(field) != self.record2.get(field):
|
||||
return False
|
||||
|
||||
if all_equal:
|
||||
return True
|
||||
|
||||
def get(self, key: str):
|
||||
"""
|
||||
This method will return the value of the key
|
||||
"""
|
||||
return self.difference_record[key]
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class EPCRecord:
|
||||
"""
|
||||
|
|
@ -447,9 +515,10 @@ class EPCRecord:
|
|||
|
||||
def __post_init__(self):
|
||||
# We can have validation and cleaning steps for each of the fields
|
||||
self.WALLS_DESCRIPTION = 'check'
|
||||
|
||||
self._field_validation()
|
||||
# self.WALLS_DESCRIPTION = 'check'
|
||||
# Could also have cleaning of records if needed
|
||||
# self._field_validation()
|
||||
pass
|
||||
|
||||
def _field_validation(self):
|
||||
"""
|
||||
|
|
@ -500,9 +569,61 @@ class EPCRecord:
|
|||
|
||||
if validation_config['range'] is not None:
|
||||
if field_value < validation_config['range'][0] or field_value > validation_config['range'][1]:
|
||||
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")
|
||||
|
||||
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")
|
||||
|
||||
def __sub__(self, other):
|
||||
"""
|
||||
This method will return the difference between two EPC records
|
||||
"""
|
||||
if not isinstance(other, EPCRecord):
|
||||
raise ValueError("Can only subtract EPCRecord from EPCRecord")
|
||||
|
||||
difference_record = EPCDifferenceRecord(record1=self, record2=other, auto_sort=True)
|
||||
|
||||
return difference_record
|
||||
|
||||
def __gt__(self, other):
|
||||
"""
|
||||
This method will return True if the EPC record is greater than or equal to the other
|
||||
"""
|
||||
if not isinstance(other, EPCRecord):
|
||||
raise ValueError("Can only compare EPCRecord to EPCRecord")
|
||||
|
||||
return self.__dict__[RDSAP_RESPONSE] > other.__dict__[RDSAP_RESPONSE]
|
||||
|
||||
def __ge__(self, other):
|
||||
"""
|
||||
This method will return True if the EPC record is greater than or equal to the other
|
||||
"""
|
||||
if not isinstance(other, EPCRecord):
|
||||
raise ValueError("Can only compare EPCRecord to EPCRecord")
|
||||
|
||||
return self.__dict__[RDSAP_RESPONSE] >= other.__dict__[RDSAP_RESPONSE]
|
||||
|
||||
def __lt__(self, other):
|
||||
"""
|
||||
This method will return True if the EPC record is greater than or equal to the other
|
||||
"""
|
||||
if not isinstance(other, EPCRecord):
|
||||
raise ValueError("Can only compare EPCRecord to EPCRecord")
|
||||
|
||||
return self.__dict__[RDSAP_RESPONSE] < other.__dict__[RDSAP_RESPONSE]
|
||||
|
||||
def __le__(self, other):
|
||||
"""
|
||||
This method will return True if the EPC record is greater than or equal to the other
|
||||
"""
|
||||
if not isinstance(other, EPCRecord):
|
||||
raise ValueError("Can only compare EPCRecord to EPCRecord")
|
||||
|
||||
return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE]
|
||||
|
||||
def get(self, key: str):
|
||||
"""
|
||||
This method will return the value of the key
|
||||
"""
|
||||
return self.__dict__[key]
|
||||
|
||||
|
||||
|
||||
# def __init__(self, num) -> None:
|
||||
|
|
@ -573,7 +694,34 @@ def app():
|
|||
# e.g. first vs second, second vs third and also first vs third
|
||||
property_model_data = []
|
||||
|
||||
temp = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')]
|
||||
epc_records = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')]
|
||||
|
||||
for idx in range(0, len(epc_records) - 1):
|
||||
|
||||
if idx >= len(epc_records) - 1:
|
||||
break
|
||||
|
||||
earliest_record: EPCRecord = epc_records[idx]
|
||||
latest_record: EPCRecord = epc_records[idx + 1]
|
||||
|
||||
# Auto sort the records so that the record with highest RDSAP score is always record1
|
||||
difference_record: EPCDifferenceRecord = latest_record - earliest_record
|
||||
|
||||
# TODO: Pull out RDSAP_CHANGE to a variable
|
||||
if difference_record.get("RDSAP_CHANGE") == 0:
|
||||
continue
|
||||
|
||||
all_equal = difference_record.compare_fields_in_records(
|
||||
fields=CORE_COMPONENT_FEATURES
|
||||
)
|
||||
|
||||
if all_equal:
|
||||
# Keep track of this for the moment so we can analyse
|
||||
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
|
||||
continue
|
||||
|
||||
property_model_data.append(difference_record.difference_record)
|
||||
|
||||
for idx in range(0, property_data.shape[0] - 1):
|
||||
|
||||
if idx >= property_data.shape[0] - 1:
|
||||
|
|
@ -631,7 +779,7 @@ def app():
|
|||
# Keep track of this for the moment so we can analyse
|
||||
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
|
||||
continue
|
||||
|
||||
asdasd
|
||||
features = pd.concat([starting_record, ending_record])
|
||||
|
||||
property_model_data.append(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue