diff --git a/etl/epc/ValidationConfiguration.py b/etl/epc/ValidationConfiguration.py index c2487706..5d9c910d 100644 --- a/etl/epc/ValidationConfiguration.py +++ b/etl/epc/ValidationConfiguration.py @@ -9,7 +9,7 @@ def validate_walls_description(value): EPCRecordValidationConfiguration = { "WALLS_DESCRIPTION": { "type": "string", - "acceptable_values": ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"] + "acceptable_values": ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"], "function": validate_walls_description }, "FLOOR_DESCRIPTION": { diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py index da425b3a..ecc79ba3 100644 --- a/etl/epc/property_change_app.py +++ b/etl/epc/property_change_app.py @@ -401,78 +401,12 @@ from dataclasses import dataclass from etl.epc.ValidationConfiguration import EPCRecordValidationConfiguration from typing import Union, List - -class EPCDifferenceRecord: - """ - Base class for the difference between two EPC records - """ - - def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False): - """ - This method will initialise the EPCDifferenceRecord - Defaults usage is with record1 to have the higher RDSAP score - """ - self.record1 = record1 - self.record2 = record2 - self.difference_record = {} - - if auto_sort and (self.record1 <= self.record2): - self.record1, self.record2 = self.record2, self.record1 - - self._calculate_difference_record() - self._validate_difference_record() - - def _calculate_difference_record(self): - """ - This method will calculate the difference between the two records - """ - self.difference_record = {} - for key in self.record1.__dict__.keys(): - if key == "LODGEMENT_DATE": - continue - self.difference_record[key] = self.record1.get(key) - # - self.record2.get(key) - - def _validate_difference_record(self): - """ - This method will validate the difference record - """ - # for key, value in self.difference_record.items(): - # if key == "LODGEMENT_DATE": - # continue - # if isinstance(value, str): - # continue - # if value < 0: - # raise ValueError(f"Difference record has negative value for {key}") - pass - - def compare_fields_in_records(self, fields: List[str]): - """ - This method will compare the records, for specific fields - """ - - all_equal = True - for field in fields: - if self.record1.get(field) != self.record2.get(field): - return False - - if all_equal: - return True - - def get(self, key: str): - """ - This method will return the value of the key - """ - return self.difference_record[key] - - - - @dataclass class EPCRecord: """ Base class for a EPC record """ + UPRN: str WALLS_DESCRIPTION: str FLOOR_DESCRIPTION: str LIGHTING_DESCRIPTION: str @@ -618,20 +552,112 @@ class EPCRecord: return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE] + def get(self, key: Union[str, List[str]], return_asdict: bool = False, key_suffix: str = None): + """ + This method will return the value of the key + """ + if return_asdict: + output_dict = {x: self.__dict__[x] if x in self.__dict__.keys() else None for x in key} + if key_suffix is not None: + output_dict = {f"{x}_{key_suffix}": y for x, y in output_dict.items()} + return output_dict + + if isinstance(key, list): + return [self.__dict__[x] if x in self.__dict__.keys() else None for x in key] + elif isinstance(key, str): + return self.__dict__[key] if key in self.__dict__.keys() else None + + +class EPCDifferenceRecord: + """ + Base class for the difference between two EPC records + """ + + def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False): + """ + This method will initialise the EPCDifferenceRecord + Defaults usage is with record2 to have the higher RDSAP score + """ + self.record1 = record1 + self.record2 = record2 + self.difference_record = {} + + if auto_sort and (self.record2 <= self.record1): + self.record1, self.record2 = self.record2, self.record1 + + self._construct_difference_record() + self._validate_difference_record() + + def _construct_difference_record(self): + """ + This method will construct the difference record between the two records + """ + + rdsap_change = self.record2.get(RDSAP_RESPONSE) - self.record1.get(RDSAP_RESPONSE) + heat_demand_change = self.record2.get(HEAT_DEMAND_RESPONSE) - self.record1.get(HEAT_DEMAND_RESPONSE) + carbon_change = self.record2.get(CARBON_RESPONSE) - self.record1.get(CARBON_RESPONSE) + + component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES + ending_record = self.record2.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_ENDING") + starting_record = self.record1.get(component_variables + ["LODGEMENT_DATE"], return_asdict=True, key_suffix="_STARTING") + + # TODO: DO we want to take the earliest potentials or max potentials? + self.difference_record = { + "UPRN": self.record1.get("UPRN"), + "RDSAP_CHANGE": rdsap_change, + "HEAT_DEMAND_CHANGE": heat_demand_change, + "CARBON_CHANGE": carbon_change, + "SAP_STARTING": self.record1.get(RDSAP_RESPONSE), + "SAP_ENDING": self.record2.get(RDSAP_RESPONSE), + "HEAT_DEMAND_STARTING": self.record1.get(HEAT_DEMAND_RESPONSE), + "HEAT_DEMAND_ENDING": self.record2.get(HEAT_DEMAND_RESPONSE), + "CARBON_STARTING": self.record1.get(CARBON_RESPONSE), + "CARBON_ENDING": self.record2.get(CARBON_RESPONSE), + "POTENTIAL_ENERGY_EFFICIENCY": max(self.record1.get("POTENTIAL_ENERGY_EFFICIENCY"), self.record2.get("POTENTIAL_ENERGY_EFFICIENCY")), + "ENVIRONMENT_IMPACT_POTENTIAL": max(self.record1.get("ENVIRONMENT_IMPACT_POTENTIAL"), self.record2.get("ENVIRONMENT_IMPACT_POTENTIAL")), + "ENERGY_CONSUMPTION_POTENTIAL": max(self.record1.get("ENERGY_CONSUMPTION_POTENTIAL"), self.record2.get("ENERGY_CONSUMPTION_POTENTIAL")), + "CO2_EMISSIONS_POTENTIAL": max(self.record1.get("CO2_EMISSIONS_POTENTIAL"), self.record2.get("CO2_EMISSIONS_POTENTIAL")), + **ending_record, + **starting_record + } + + def _validate_difference_record(self): + """ + This method will validate the difference record + """ + # for key, value in self.difference_record.items(): + # if key == "LODGEMENT_DATE": + # continue + # if isinstance(value, str): + # continue + # if value < 0: + # raise ValueError(f"Difference record has negative value for {key}") + pass + + def compare_fields_in_records(self, fields: List[str]): + """ + This method will compare the records, for specific fields + """ + + all_equal = True + for field in fields: + if self.record1.get(field) != self.record2.get(field): + return False + + if all_equal: + return True + def get(self, key: str): """ This method will return the value of the key """ - return self.__dict__[key] - + return self.difference_record[key] if key in self.difference_record.keys() else None - - # def __init__(self, num) -> None: - # self.num = num - - # def __sub__(self, other): - # return self.num - other.num - + def append_fixed_data(self, fixed_data: dict): + """ + This method will append fixed data to the difference record + """ + self.difference_record.update(fixed_data) def app(): # Get all the files in the directory @@ -662,7 +688,7 @@ def app(): data_by_urpn = [] for uprn, property_data in df.groupby("UPRN", observed=True): - + asdasd # Fixed features - these are property attributes that shouldn't change over time fixed_data = {} @@ -694,7 +720,7 @@ def app(): # e.g. first vs second, second vs third and also first vs third property_model_data = [] - epc_records = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')] + epc_records = [EPCRecord(uprn, **x) for x in variable_data.to_dict(orient='records')] for idx in range(0, len(epc_records) - 1): @@ -720,6 +746,8 @@ def app(): all_equal_rows.append({"uprn": uprn, "directory_name": directory.name}) continue + difference_record.append_fixed_data(fixed_data) + property_model_data.append(difference_record.difference_record) for idx in range(0, property_data.shape[0] - 1):