mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
add validation layer
This commit is contained in:
parent
2845badbc0
commit
d76dc3fc56
2 changed files with 84 additions and 6 deletions
23
etl/epc/ValidationConfiguration.py
Normal file
23
etl/epc/ValidationConfiguration.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
"""
|
||||
Specify the validation rules for each field in the differents record.
|
||||
"""
|
||||
|
||||
def validate_walls_description(value):
|
||||
if value not in ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"]:
|
||||
raise ValueError("Walls description is not valid")
|
||||
|
||||
EPCRecordValidationConfiguration = {
|
||||
"WALLS_DESCRIPTION": {
|
||||
"type": "string",
|
||||
"acceptable_values": ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"]
|
||||
"function": validate_walls_description
|
||||
},
|
||||
"FLOOR_DESCRIPTION": {
|
||||
"type": "string",
|
||||
"acceptable_values": ["Solid", "Suspended", "Other"]
|
||||
},
|
||||
"ENERGY_CONSUMPTION_CURRENT": {
|
||||
"type": "float",
|
||||
"range": [0, 100]
|
||||
}
|
||||
}
|
||||
|
|
@ -398,6 +398,8 @@ def compare_records(earliest_record: pd.Series, latest_record: pd.Series, column
|
|||
return True
|
||||
|
||||
from dataclasses import dataclass
|
||||
from etl.epc.ValidationConfiguration import EPCRecordValidationConfiguration
|
||||
from typing import Union
|
||||
@dataclass
|
||||
class EPCRecord:
|
||||
"""
|
||||
|
|
@ -443,6 +445,62 @@ class EPCRecord:
|
|||
ENERGY_CONSUMPTION_CURRENT: int
|
||||
CO2_EMISSIONS_CURRENT: float
|
||||
|
||||
def __post_init__(self):
|
||||
# We can have validation and cleaning steps for each of the fields
|
||||
self.WALLS_DESCRIPTION = 'check'
|
||||
|
||||
self._field_validation()
|
||||
|
||||
def _field_validation(self):
|
||||
"""
|
||||
This method will validate each of the fields in the EPC record
|
||||
"""
|
||||
self.validation_configuration = EPCRecordValidationConfiguration
|
||||
|
||||
for record_key, validation_config in self.validation_configuration.items():
|
||||
# Get the variable named record key from self
|
||||
field_value = self.__dict__[record_key]
|
||||
|
||||
if validation_config['type'] == "string":
|
||||
self._validate_string(record_key, field_value, validation_config)
|
||||
elif validation_config['type'] == "float":
|
||||
self._validate_float(field_value, validation_config)
|
||||
else:
|
||||
raise ValueError(f"Validation type {validation_config['type']} not supported")
|
||||
|
||||
def _validate_string(self, record_key: str, field_value: Union[str, float], validation_config: dict):
|
||||
"""
|
||||
Validate a string field
|
||||
"""
|
||||
|
||||
if 'function' in validation_config:
|
||||
try:
|
||||
validation_config['function'](field_value)
|
||||
except:
|
||||
raise ValueError(f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}")
|
||||
|
||||
if validation_config['acceptable_values'] is not None:
|
||||
if field_value not in validation_config['acceptable_values']:
|
||||
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable values of {validation_config['acceptable_values']}")
|
||||
|
||||
def _validate_float(self, record_key: str, field_value: Union[str, float], validation_config: dict):
|
||||
"""
|
||||
Validate a float field
|
||||
"""
|
||||
|
||||
if 'function' in validation_config:
|
||||
try:
|
||||
validation_config['function'](field_value)
|
||||
except:
|
||||
raise ValueError(f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}")
|
||||
|
||||
if validation_config['range'] is not None:
|
||||
if field_value < validation_config['range'][0] or field_value > validation_config['range'][1]:
|
||||
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")
|
||||
|
||||
|
||||
|
||||
|
||||
# def __init__(self, num) -> None:
|
||||
# self.num = num
|
||||
|
||||
|
|
@ -450,11 +508,6 @@ class EPCRecord:
|
|||
# return self.num - other.num
|
||||
|
||||
|
||||
test = EPCRecord(10)
|
||||
test2 = EPCRecord(20)
|
||||
test - test2
|
||||
|
||||
|
||||
def app():
|
||||
# Get all the files in the directory
|
||||
|
||||
|
|
@ -484,7 +537,7 @@ def app():
|
|||
|
||||
data_by_urpn = []
|
||||
for uprn, property_data in df.groupby("UPRN", observed=True):
|
||||
|
||||
|
||||
# Fixed features - these are property attributes that shouldn't change over time
|
||||
fixed_data = {}
|
||||
|
||||
|
|
@ -515,6 +568,8 @@ def app():
|
|||
# Note: we look at changes between subsequent EPCS, however we could look at other permutations
|
||||
# e.g. first vs second, second vs third and also first vs third
|
||||
property_model_data = []
|
||||
|
||||
temp = [EPCRecord(**x) for x in variable_data.to_dict(orient='records')]
|
||||
for idx in range(0, property_data.shape[0] - 1):
|
||||
|
||||
if idx >= property_data.shape[0] - 1:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue