mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
split out process_parts and added tests, updating WindowAttributes class
This commit is contained in:
parent
d779cc1b4c
commit
87a2edcdba
7 changed files with 113 additions and 51 deletions
|
|
@ -40,11 +40,11 @@ def handler():
|
|||
cleaner.clean()
|
||||
|
||||
# For testing:
|
||||
from epc_data.attributes.MainheatAttributes import MainHeatAttributes
|
||||
from epc_data.attributes.WindowAttributes import WindowAttributes
|
||||
descriptions = {x["windows-description"] for x in data}
|
||||
out = []
|
||||
for description in descriptions:
|
||||
res = MainHeatAttributes(description).process()
|
||||
res = WindowAttributes(description).process()
|
||||
out.append(
|
||||
{
|
||||
"original_description": description,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from epc_data.attributes.attribute_utils import clean_description
|
||||
from typing import Dict, List, Union
|
||||
from epc_data.attributes.attribute_utils import clean_description, process_part
|
||||
from typing import Dict, Union
|
||||
|
||||
|
||||
class MainHeatAttributes:
|
||||
|
|
@ -37,33 +37,19 @@ class MainHeatAttributes:
|
|||
part = part.strip() # remove leading/trailing white spaces
|
||||
|
||||
# Heating Systems
|
||||
self._process_part(result, part, self.HEAT_SYSTEMS, 'has_')
|
||||
process_part(result, part, self.HEAT_SYSTEMS, 'has_')
|
||||
|
||||
# Fuel Types
|
||||
self._process_part(result, part, self.FUEL_TYPES, 'has_')
|
||||
process_part(result, part, self.FUEL_TYPES, 'has_')
|
||||
|
||||
# Distribution Systems
|
||||
self._process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
|
||||
process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
|
||||
|
||||
# Other attributes
|
||||
self._process_part(result, part, self.OTHERS, 'has_')
|
||||
process_part(result, part, self.OTHERS, 'has_')
|
||||
|
||||
# Check for "underfloor" separately in the entire description
|
||||
if "underfloor" in self.description:
|
||||
result['has_underfloor_heating'] = True
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
||||
"""
|
||||
Process a part of the description with a given list of attributes
|
||||
and update the result dictionary.
|
||||
"""
|
||||
part_words = part.split()
|
||||
for attr in attr_list:
|
||||
attr_words = attr.split()
|
||||
if set(attr_words).issubset(set(part_words)):
|
||||
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import re
|
||||
from typing import Dict, Union
|
||||
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
|
||||
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance, process_part
|
||||
|
||||
|
||||
class RoofAttributes:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import re
|
||||
from typing import Dict, Union
|
||||
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Dict, List, Union
|
||||
from epc_data.attributes.attribute_utils import clean_description
|
||||
from typing import Dict, Union
|
||||
from epc_data.attributes.attribute_utils import clean_description, process_part
|
||||
|
||||
|
||||
class WindowAttributes:
|
||||
|
|
@ -10,34 +10,35 @@ class WindowAttributes:
|
|||
def __init__(self, description: str):
|
||||
self.description: str = clean_description(description.lower())
|
||||
|
||||
if not description or not any(
|
||||
rt in self.description for rt in
|
||||
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
|
||||
):
|
||||
raise ValueError('Invalid description')
|
||||
# In the case of an empty description, we want to return a dictionary with all values set to False
|
||||
# and indicate there was no data
|
||||
self.nodata = not description
|
||||
|
||||
if not self.nodata:
|
||||
if not any(
|
||||
rt in self.description for rt in
|
||||
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
|
||||
):
|
||||
raise ValueError('Invalid description')
|
||||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
result: Dict[str, Union[str, bool]] = {f'has_{wt.replace(" ", "_")}': False for wt in self.WINDOW_TYPES}
|
||||
result: Dict[str, Union[str, bool]] = {
|
||||
f'has_{wt.replace(" ", "_")}': False for wt in self.GLAZING_KEYWORDS
|
||||
}
|
||||
result.update({f'is_{gc.replace(" ", "_")}': False for gc in self.GLAZING_COVERAGE})
|
||||
result.update({f'is_{gt.replace(" ", "_")}': False for gt in self.GLAZING_TYPES})
|
||||
result["no_data"] = self.nodata
|
||||
|
||||
if self.nodata:
|
||||
return result
|
||||
|
||||
description = self.description.split(',')
|
||||
|
||||
# Process each part separately
|
||||
for part in description:
|
||||
part = part.strip() # remove leading/trailing white spaces
|
||||
self._process_part(result, part, self.WINDOW_TYPES, 'has_')
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
||||
"""
|
||||
Process a part of the description with a given list of attributes
|
||||
and update the result dictionary.
|
||||
"""
|
||||
part_words = part.split()
|
||||
for attr in attr_list:
|
||||
attr_words = attr.split()
|
||||
if set(attr_words).issubset(set(part_words)):
|
||||
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
||||
process_part(result, part, self.GLAZING_KEYWORDS, 'has_')
|
||||
process_part(result, part, self.GLAZING_COVERAGE, 'is_')
|
||||
process_part(result, part, self.GLAZING_TYPES, 'is_')
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import re
|
||||
from typing import Tuple, Union, Dict
|
||||
from typing import Tuple, Union, Dict, List
|
||||
|
||||
THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)"
|
||||
THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTENCE_STR)
|
||||
|
|
@ -59,3 +59,37 @@ def clean_description(description: str) -> str:
|
|||
for char in special_chars:
|
||||
description = description.replace(char, " ")
|
||||
return description
|
||||
|
||||
|
||||
def process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
||||
"""
|
||||
Process a part of the description with a given list of attributes
|
||||
and update the result dictionary.
|
||||
"""
|
||||
|
||||
if not isinstance(result, dict):
|
||||
raise TypeError('Expected a dictionary for result')
|
||||
if not isinstance(part, str):
|
||||
raise TypeError('Expected a string for part')
|
||||
if not isinstance(attr_list, list) or not all(isinstance(i, str) for i in attr_list):
|
||||
raise TypeError('Expected a list of strings for attr_list')
|
||||
if not isinstance(prefix, str):
|
||||
raise TypeError('Expected a string for prefix')
|
||||
|
||||
if not result:
|
||||
raise ValueError("Result dictionary cannot be empty")
|
||||
|
||||
if not prefix:
|
||||
raise ValueError("Prefix cannot be empty")
|
||||
|
||||
part_words = part.split()
|
||||
for attr in attr_list:
|
||||
attr_words = attr.split()
|
||||
if set(attr_words).issubset(set(part_words)):
|
||||
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
||||
|
||||
at_least_one_attribute_true = any(result.values())
|
||||
if not at_least_one_attribute_true:
|
||||
raise ValueError("No attribute matches found")
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
from epc_data.attributes.attribute_utils import extract_thermal_transmittance, clean_description
|
||||
import pytest
|
||||
import epc_data.attributes.attribute_utils as attribute_utils
|
||||
|
||||
|
||||
def test_extract_thermal_transmittance():
|
||||
description = "average thermal transmittance 2.3 w/m-¦k"
|
||||
assert extract_thermal_transmittance({}, description) == (
|
||||
assert attribute_utils.extract_thermal_transmittance({}, description) == (
|
||||
{'thermal_transmittance': 2.3, 'thermal_transmittance_unit': 'w/m-¦k'}, '')
|
||||
|
||||
|
||||
|
|
@ -19,4 +20,45 @@ def test_clean_description():
|
|||
]
|
||||
|
||||
for input_str, expected_output in test_cases:
|
||||
assert clean_description(input_str) == expected_output
|
||||
assert attribute_utils.clean_description(input_str) == expected_output
|
||||
|
||||
|
||||
# Test for normal operation
|
||||
def test_process_part_normal_operation():
|
||||
result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
|
||||
part = 'high performance glazing'
|
||||
attr_list = ['glazing', 'glazed', 'glaze']
|
||||
prefix = 'has_'
|
||||
expected_result = {'has_glazing': True, 'has_glazed': False, 'has_glaze': False}
|
||||
|
||||
assert attribute_utils.process_part(result, part, attr_list, prefix) == expected_result
|
||||
|
||||
|
||||
# Test for TypeError exceptions
|
||||
def test_process_part_type_errors():
|
||||
result = 'not a dictionary'
|
||||
part = 'high performance glazing'
|
||||
attr_list = ['glazing', 'glazed', 'glaze']
|
||||
prefix = 'has_'
|
||||
with pytest.raises(TypeError):
|
||||
attribute_utils.process_part(result, part, attr_list, prefix)
|
||||
|
||||
|
||||
# Test for ValueError exceptions
|
||||
def test_process_part_value_errors():
|
||||
result = {}
|
||||
part = 'high performance glazing'
|
||||
attr_list = ['glazing', 'glazed', 'glaze']
|
||||
prefix = 'has_'
|
||||
with pytest.raises(ValueError):
|
||||
attribute_utils.process_part(result, part, attr_list, prefix)
|
||||
|
||||
|
||||
# Test for no attribute matches found
|
||||
def test_process_part_no_matches():
|
||||
result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
|
||||
part = 'high performance coating'
|
||||
attr_list = ['glazing', 'glazed', 'glaze']
|
||||
prefix = 'has_'
|
||||
with pytest.raises(ValueError):
|
||||
attribute_utils.process_part(result, part, attr_list, prefix)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue