split out process_parts and added tests, updating WindowAttributes class

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-14 11:03:35 +01:00
parent d779cc1b4c
commit 87a2edcdba
7 changed files with 113 additions and 51 deletions

View file

@ -40,11 +40,11 @@ def handler():
cleaner.clean()
# For testing:
from epc_data.attributes.MainheatAttributes import MainHeatAttributes
from epc_data.attributes.WindowAttributes import WindowAttributes
descriptions = {x["windows-description"] for x in data}
out = []
for description in descriptions:
res = MainHeatAttributes(description).process()
res = WindowAttributes(description).process()
out.append(
{
"original_description": description,

View file

@ -1,5 +1,5 @@
from epc_data.attributes.attribute_utils import clean_description
from typing import Dict, List, Union
from epc_data.attributes.attribute_utils import clean_description, process_part
from typing import Dict, Union
class MainHeatAttributes:
@ -37,33 +37,19 @@ class MainHeatAttributes:
part = part.strip() # remove leading/trailing white spaces
# Heating Systems
self._process_part(result, part, self.HEAT_SYSTEMS, 'has_')
process_part(result, part, self.HEAT_SYSTEMS, 'has_')
# Fuel Types
self._process_part(result, part, self.FUEL_TYPES, 'has_')
process_part(result, part, self.FUEL_TYPES, 'has_')
# Distribution Systems
self._process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
# Other attributes
self._process_part(result, part, self.OTHERS, 'has_')
process_part(result, part, self.OTHERS, 'has_')
# Check for "underfloor" separately in the entire description
if "underfloor" in self.description:
result['has_underfloor_heating'] = True
return result
@staticmethod
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
"""
Process a part of the description with a given list of attributes
and update the result dictionary.
"""
part_words = part.split()
for attr in attr_list:
attr_words = attr.split()
if set(attr_words).issubset(set(part_words)):
result[f'{prefix}{attr.replace(" ", "_")}'] = True
return result

View file

@ -1,6 +1,6 @@
import re
from typing import Dict, Union
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance, process_part
class RoofAttributes:

View file

@ -1,4 +1,3 @@
import re
from typing import Dict, Union
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance

View file

@ -1,5 +1,5 @@
from typing import Dict, List, Union
from epc_data.attributes.attribute_utils import clean_description
from typing import Dict, Union
from epc_data.attributes.attribute_utils import clean_description, process_part
class WindowAttributes:
@ -10,34 +10,35 @@ class WindowAttributes:
def __init__(self, description: str):
self.description: str = clean_description(description.lower())
if not description or not any(
rt in self.description for rt in
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
):
raise ValueError('Invalid description')
# In the case of an empty description, we want to return a dictionary with all values set to False
# and indicate there was no data
self.nodata = not description
if not self.nodata:
if not any(
rt in self.description for rt in
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
):
raise ValueError('Invalid description')
def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = {f'has_{wt.replace(" ", "_")}': False for wt in self.WINDOW_TYPES}
result: Dict[str, Union[str, bool]] = {
f'has_{wt.replace(" ", "_")}': False for wt in self.GLAZING_KEYWORDS
}
result.update({f'is_{gc.replace(" ", "_")}': False for gc in self.GLAZING_COVERAGE})
result.update({f'is_{gt.replace(" ", "_")}': False for gt in self.GLAZING_TYPES})
result["no_data"] = self.nodata
if self.nodata:
return result
description = self.description.split(',')
# Process each part separately
for part in description:
part = part.strip() # remove leading/trailing white spaces
self._process_part(result, part, self.WINDOW_TYPES, 'has_')
return result
@staticmethod
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
"""
Process a part of the description with a given list of attributes
and update the result dictionary.
"""
part_words = part.split()
for attr in attr_list:
attr_words = attr.split()
if set(attr_words).issubset(set(part_words)):
result[f'{prefix}{attr.replace(" ", "_")}'] = True
process_part(result, part, self.GLAZING_KEYWORDS, 'has_')
process_part(result, part, self.GLAZING_COVERAGE, 'is_')
process_part(result, part, self.GLAZING_TYPES, 'is_')
return result

View file

@ -1,5 +1,5 @@
import re
from typing import Tuple, Union, Dict
from typing import Tuple, Union, Dict, List
THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)"
THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTENCE_STR)
@ -59,3 +59,37 @@ def clean_description(description: str) -> str:
for char in special_chars:
description = description.replace(char, " ")
return description
def process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
"""
Process a part of the description with a given list of attributes
and update the result dictionary.
"""
if not isinstance(result, dict):
raise TypeError('Expected a dictionary for result')
if not isinstance(part, str):
raise TypeError('Expected a string for part')
if not isinstance(attr_list, list) or not all(isinstance(i, str) for i in attr_list):
raise TypeError('Expected a list of strings for attr_list')
if not isinstance(prefix, str):
raise TypeError('Expected a string for prefix')
if not result:
raise ValueError("Result dictionary cannot be empty")
if not prefix:
raise ValueError("Prefix cannot be empty")
part_words = part.split()
for attr in attr_list:
attr_words = attr.split()
if set(attr_words).issubset(set(part_words)):
result[f'{prefix}{attr.replace(" ", "_")}'] = True
at_least_one_attribute_true = any(result.values())
if not at_least_one_attribute_true:
raise ValueError("No attribute matches found")
return result

View file

@ -1,9 +1,10 @@
from epc_data.attributes.attribute_utils import extract_thermal_transmittance, clean_description
import pytest
import epc_data.attributes.attribute_utils as attribute_utils
def test_extract_thermal_transmittance():
description = "average thermal transmittance 2.3 w/m-¦k"
assert extract_thermal_transmittance({}, description) == (
assert attribute_utils.extract_thermal_transmittance({}, description) == (
{'thermal_transmittance': 2.3, 'thermal_transmittance_unit': 'w/m-¦k'}, '')
@ -19,4 +20,45 @@ def test_clean_description():
]
for input_str, expected_output in test_cases:
assert clean_description(input_str) == expected_output
assert attribute_utils.clean_description(input_str) == expected_output
# Test for normal operation
def test_process_part_normal_operation():
result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
part = 'high performance glazing'
attr_list = ['glazing', 'glazed', 'glaze']
prefix = 'has_'
expected_result = {'has_glazing': True, 'has_glazed': False, 'has_glaze': False}
assert attribute_utils.process_part(result, part, attr_list, prefix) == expected_result
# Test for TypeError exceptions
def test_process_part_type_errors():
result = 'not a dictionary'
part = 'high performance glazing'
attr_list = ['glazing', 'glazed', 'glaze']
prefix = 'has_'
with pytest.raises(TypeError):
attribute_utils.process_part(result, part, attr_list, prefix)
# Test for ValueError exceptions
def test_process_part_value_errors():
result = {}
part = 'high performance glazing'
attr_list = ['glazing', 'glazed', 'glaze']
prefix = 'has_'
with pytest.raises(ValueError):
attribute_utils.process_part(result, part, attr_list, prefix)
# Test for no attribute matches found
def test_process_part_no_matches():
result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
part = 'high performance coating'
attr_list = ['glazing', 'glazed', 'glaze']
prefix = 'has_'
with pytest.raises(ValueError):
attribute_utils.process_part(result, part, attr_list, prefix)