From 87a2edcdbaf564cd3fc21dcdb770fe73c5e5a4fb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 14 Jun 2023 11:03:35 +0100 Subject: [PATCH] split out process_parts and added tests, updating WindowAttributes class --- epc_data/app.py | 4 +- epc_data/attributes/MainheatAttributes.py | 26 +++--------- epc_data/attributes/RoofAttributes.py | 2 +- epc_data/attributes/WallAttributes.py | 1 - epc_data/attributes/WindowAttributes.py | 47 +++++++++++----------- epc_data/attributes/attribute_utils.py | 36 ++++++++++++++++- epc_data/tests/test_attribute_utils.py | 48 +++++++++++++++++++++-- 7 files changed, 113 insertions(+), 51 deletions(-) diff --git a/epc_data/app.py b/epc_data/app.py index 2ff9cb9e..78232a7a 100644 --- a/epc_data/app.py +++ b/epc_data/app.py @@ -40,11 +40,11 @@ def handler(): cleaner.clean() # For testing: - from epc_data.attributes.MainheatAttributes import MainHeatAttributes + from epc_data.attributes.WindowAttributes import WindowAttributes descriptions = {x["windows-description"] for x in data} out = [] for description in descriptions: - res = MainHeatAttributes(description).process() + res = WindowAttributes(description).process() out.append( { "original_description": description, diff --git a/epc_data/attributes/MainheatAttributes.py b/epc_data/attributes/MainheatAttributes.py index ae11e7d4..88d4342d 100644 --- a/epc_data/attributes/MainheatAttributes.py +++ b/epc_data/attributes/MainheatAttributes.py @@ -1,5 +1,5 @@ -from epc_data.attributes.attribute_utils import clean_description -from typing import Dict, List, Union +from epc_data.attributes.attribute_utils import clean_description, process_part +from typing import Dict, Union class MainHeatAttributes: @@ -37,33 +37,19 @@ class MainHeatAttributes: part = part.strip() # remove leading/trailing white spaces # Heating Systems - self._process_part(result, part, self.HEAT_SYSTEMS, 'has_') + process_part(result, part, self.HEAT_SYSTEMS, 'has_') # Fuel Types - self._process_part(result, part, self.FUEL_TYPES, 'has_') + process_part(result, part, self.FUEL_TYPES, 'has_') # Distribution Systems - self._process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_') + process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_') # Other attributes - self._process_part(result, part, self.OTHERS, 'has_') + process_part(result, part, self.OTHERS, 'has_') # Check for "underfloor" separately in the entire description if "underfloor" in self.description: result['has_underfloor_heating'] = True return result - - @staticmethod - def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str): - """ - Process a part of the description with a given list of attributes - and update the result dictionary. - """ - part_words = part.split() - for attr in attr_list: - attr_words = attr.split() - if set(attr_words).issubset(set(part_words)): - result[f'{prefix}{attr.replace(" ", "_")}'] = True - - return result diff --git a/epc_data/attributes/RoofAttributes.py b/epc_data/attributes/RoofAttributes.py index 84afdc16..22b4aaec 100644 --- a/epc_data/attributes/RoofAttributes.py +++ b/epc_data/attributes/RoofAttributes.py @@ -1,6 +1,6 @@ import re from typing import Dict, Union -from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance +from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance, process_part class RoofAttributes: diff --git a/epc_data/attributes/WallAttributes.py b/epc_data/attributes/WallAttributes.py index 31099322..f07afb28 100644 --- a/epc_data/attributes/WallAttributes.py +++ b/epc_data/attributes/WallAttributes.py @@ -1,4 +1,3 @@ -import re from typing import Dict, Union from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance diff --git a/epc_data/attributes/WindowAttributes.py b/epc_data/attributes/WindowAttributes.py index 88cc4538..87df696e 100644 --- a/epc_data/attributes/WindowAttributes.py +++ b/epc_data/attributes/WindowAttributes.py @@ -1,5 +1,5 @@ -from typing import Dict, List, Union -from epc_data.attributes.attribute_utils import clean_description +from typing import Dict, Union +from epc_data.attributes.attribute_utils import clean_description, process_part class WindowAttributes: @@ -10,34 +10,35 @@ class WindowAttributes: def __init__(self, description: str): self.description: str = clean_description(description.lower()) - if not description or not any( - rt in self.description for rt in - self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES - ): - raise ValueError('Invalid description') + # In the case of an empty description, we want to return a dictionary with all values set to False + # and indicate there was no data + self.nodata = not description + + if not self.nodata: + if not any( + rt in self.description for rt in + self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES + ): + raise ValueError('Invalid description') def process(self) -> Dict[str, Union[str, bool]]: - result: Dict[str, Union[str, bool]] = {f'has_{wt.replace(" ", "_")}': False for wt in self.WINDOW_TYPES} + result: Dict[str, Union[str, bool]] = { + f'has_{wt.replace(" ", "_")}': False for wt in self.GLAZING_KEYWORDS + } + result.update({f'is_{gc.replace(" ", "_")}': False for gc in self.GLAZING_COVERAGE}) + result.update({f'is_{gt.replace(" ", "_")}': False for gt in self.GLAZING_TYPES}) + result["no_data"] = self.nodata + + if self.nodata: + return result description = self.description.split(',') # Process each part separately for part in description: part = part.strip() # remove leading/trailing white spaces - self._process_part(result, part, self.WINDOW_TYPES, 'has_') - - return result - - @staticmethod - def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str): - """ - Process a part of the description with a given list of attributes - and update the result dictionary. - """ - part_words = part.split() - for attr in attr_list: - attr_words = attr.split() - if set(attr_words).issubset(set(part_words)): - result[f'{prefix}{attr.replace(" ", "_")}'] = True + process_part(result, part, self.GLAZING_KEYWORDS, 'has_') + process_part(result, part, self.GLAZING_COVERAGE, 'is_') + process_part(result, part, self.GLAZING_TYPES, 'is_') return result diff --git a/epc_data/attributes/attribute_utils.py b/epc_data/attributes/attribute_utils.py index 57866115..0312e7c1 100644 --- a/epc_data/attributes/attribute_utils.py +++ b/epc_data/attributes/attribute_utils.py @@ -1,5 +1,5 @@ import re -from typing import Tuple, Union, Dict +from typing import Tuple, Union, Dict, List THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)" THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTENCE_STR) @@ -59,3 +59,37 @@ def clean_description(description: str) -> str: for char in special_chars: description = description.replace(char, " ") return description + + +def process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str): + """ + Process a part of the description with a given list of attributes + and update the result dictionary. + """ + + if not isinstance(result, dict): + raise TypeError('Expected a dictionary for result') + if not isinstance(part, str): + raise TypeError('Expected a string for part') + if not isinstance(attr_list, list) or not all(isinstance(i, str) for i in attr_list): + raise TypeError('Expected a list of strings for attr_list') + if not isinstance(prefix, str): + raise TypeError('Expected a string for prefix') + + if not result: + raise ValueError("Result dictionary cannot be empty") + + if not prefix: + raise ValueError("Prefix cannot be empty") + + part_words = part.split() + for attr in attr_list: + attr_words = attr.split() + if set(attr_words).issubset(set(part_words)): + result[f'{prefix}{attr.replace(" ", "_")}'] = True + + at_least_one_attribute_true = any(result.values()) + if not at_least_one_attribute_true: + raise ValueError("No attribute matches found") + + return result diff --git a/epc_data/tests/test_attribute_utils.py b/epc_data/tests/test_attribute_utils.py index 7280e42b..26cea6f9 100644 --- a/epc_data/tests/test_attribute_utils.py +++ b/epc_data/tests/test_attribute_utils.py @@ -1,9 +1,10 @@ -from epc_data.attributes.attribute_utils import extract_thermal_transmittance, clean_description +import pytest +import epc_data.attributes.attribute_utils as attribute_utils def test_extract_thermal_transmittance(): description = "average thermal transmittance 2.3 w/m-¦k" - assert extract_thermal_transmittance({}, description) == ( + assert attribute_utils.extract_thermal_transmittance({}, description) == ( {'thermal_transmittance': 2.3, 'thermal_transmittance_unit': 'w/m-¦k'}, '') @@ -19,4 +20,45 @@ def test_clean_description(): ] for input_str, expected_output in test_cases: - assert clean_description(input_str) == expected_output + assert attribute_utils.clean_description(input_str) == expected_output + + +# Test for normal operation +def test_process_part_normal_operation(): + result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False} + part = 'high performance glazing' + attr_list = ['glazing', 'glazed', 'glaze'] + prefix = 'has_' + expected_result = {'has_glazing': True, 'has_glazed': False, 'has_glaze': False} + + assert attribute_utils.process_part(result, part, attr_list, prefix) == expected_result + + +# Test for TypeError exceptions +def test_process_part_type_errors(): + result = 'not a dictionary' + part = 'high performance glazing' + attr_list = ['glazing', 'glazed', 'glaze'] + prefix = 'has_' + with pytest.raises(TypeError): + attribute_utils.process_part(result, part, attr_list, prefix) + + +# Test for ValueError exceptions +def test_process_part_value_errors(): + result = {} + part = 'high performance glazing' + attr_list = ['glazing', 'glazed', 'glaze'] + prefix = 'has_' + with pytest.raises(ValueError): + attribute_utils.process_part(result, part, attr_list, prefix) + + +# Test for no attribute matches found +def test_process_part_no_matches(): + result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False} + part = 'high performance coating' + attr_list = ['glazing', 'glazed', 'glaze'] + prefix = 'has_' + with pytest.raises(ValueError): + attribute_utils.process_part(result, part, attr_list, prefix)