mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
split out process_parts and added tests, updating WindowAttributes class
This commit is contained in:
parent
d779cc1b4c
commit
87a2edcdba
7 changed files with 113 additions and 51 deletions
|
|
@ -40,11 +40,11 @@ def handler():
|
||||||
cleaner.clean()
|
cleaner.clean()
|
||||||
|
|
||||||
# For testing:
|
# For testing:
|
||||||
from epc_data.attributes.MainheatAttributes import MainHeatAttributes
|
from epc_data.attributes.WindowAttributes import WindowAttributes
|
||||||
descriptions = {x["windows-description"] for x in data}
|
descriptions = {x["windows-description"] for x in data}
|
||||||
out = []
|
out = []
|
||||||
for description in descriptions:
|
for description in descriptions:
|
||||||
res = MainHeatAttributes(description).process()
|
res = WindowAttributes(description).process()
|
||||||
out.append(
|
out.append(
|
||||||
{
|
{
|
||||||
"original_description": description,
|
"original_description": description,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from epc_data.attributes.attribute_utils import clean_description
|
from epc_data.attributes.attribute_utils import clean_description, process_part
|
||||||
from typing import Dict, List, Union
|
from typing import Dict, Union
|
||||||
|
|
||||||
|
|
||||||
class MainHeatAttributes:
|
class MainHeatAttributes:
|
||||||
|
|
@ -37,33 +37,19 @@ class MainHeatAttributes:
|
||||||
part = part.strip() # remove leading/trailing white spaces
|
part = part.strip() # remove leading/trailing white spaces
|
||||||
|
|
||||||
# Heating Systems
|
# Heating Systems
|
||||||
self._process_part(result, part, self.HEAT_SYSTEMS, 'has_')
|
process_part(result, part, self.HEAT_SYSTEMS, 'has_')
|
||||||
|
|
||||||
# Fuel Types
|
# Fuel Types
|
||||||
self._process_part(result, part, self.FUEL_TYPES, 'has_')
|
process_part(result, part, self.FUEL_TYPES, 'has_')
|
||||||
|
|
||||||
# Distribution Systems
|
# Distribution Systems
|
||||||
self._process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
|
process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
|
||||||
|
|
||||||
# Other attributes
|
# Other attributes
|
||||||
self._process_part(result, part, self.OTHERS, 'has_')
|
process_part(result, part, self.OTHERS, 'has_')
|
||||||
|
|
||||||
# Check for "underfloor" separately in the entire description
|
# Check for "underfloor" separately in the entire description
|
||||||
if "underfloor" in self.description:
|
if "underfloor" in self.description:
|
||||||
result['has_underfloor_heating'] = True
|
result['has_underfloor_heating'] = True
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
|
||||||
"""
|
|
||||||
Process a part of the description with a given list of attributes
|
|
||||||
and update the result dictionary.
|
|
||||||
"""
|
|
||||||
part_words = part.split()
|
|
||||||
for attr in attr_list:
|
|
||||||
attr_words = attr.split()
|
|
||||||
if set(attr_words).issubset(set(part_words)):
|
|
||||||
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Union
|
from typing import Dict, Union
|
||||||
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
|
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance, process_part
|
||||||
|
|
||||||
|
|
||||||
class RoofAttributes:
|
class RoofAttributes:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import re
|
|
||||||
from typing import Dict, Union
|
from typing import Dict, Union
|
||||||
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
|
from epc_data.attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from typing import Dict, List, Union
|
from typing import Dict, Union
|
||||||
from epc_data.attributes.attribute_utils import clean_description
|
from epc_data.attributes.attribute_utils import clean_description, process_part
|
||||||
|
|
||||||
|
|
||||||
class WindowAttributes:
|
class WindowAttributes:
|
||||||
|
|
@ -10,34 +10,35 @@ class WindowAttributes:
|
||||||
def __init__(self, description: str):
|
def __init__(self, description: str):
|
||||||
self.description: str = clean_description(description.lower())
|
self.description: str = clean_description(description.lower())
|
||||||
|
|
||||||
if not description or not any(
|
# In the case of an empty description, we want to return a dictionary with all values set to False
|
||||||
rt in self.description for rt in
|
# and indicate there was no data
|
||||||
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
|
self.nodata = not description
|
||||||
):
|
|
||||||
raise ValueError('Invalid description')
|
if not self.nodata:
|
||||||
|
if not any(
|
||||||
|
rt in self.description for rt in
|
||||||
|
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
|
||||||
|
):
|
||||||
|
raise ValueError('Invalid description')
|
||||||
|
|
||||||
def process(self) -> Dict[str, Union[str, bool]]:
|
def process(self) -> Dict[str, Union[str, bool]]:
|
||||||
result: Dict[str, Union[str, bool]] = {f'has_{wt.replace(" ", "_")}': False for wt in self.WINDOW_TYPES}
|
result: Dict[str, Union[str, bool]] = {
|
||||||
|
f'has_{wt.replace(" ", "_")}': False for wt in self.GLAZING_KEYWORDS
|
||||||
|
}
|
||||||
|
result.update({f'is_{gc.replace(" ", "_")}': False for gc in self.GLAZING_COVERAGE})
|
||||||
|
result.update({f'is_{gt.replace(" ", "_")}': False for gt in self.GLAZING_TYPES})
|
||||||
|
result["no_data"] = self.nodata
|
||||||
|
|
||||||
|
if self.nodata:
|
||||||
|
return result
|
||||||
|
|
||||||
description = self.description.split(',')
|
description = self.description.split(',')
|
||||||
|
|
||||||
# Process each part separately
|
# Process each part separately
|
||||||
for part in description:
|
for part in description:
|
||||||
part = part.strip() # remove leading/trailing white spaces
|
part = part.strip() # remove leading/trailing white spaces
|
||||||
self._process_part(result, part, self.WINDOW_TYPES, 'has_')
|
process_part(result, part, self.GLAZING_KEYWORDS, 'has_')
|
||||||
|
process_part(result, part, self.GLAZING_COVERAGE, 'is_')
|
||||||
return result
|
process_part(result, part, self.GLAZING_TYPES, 'is_')
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
|
||||||
"""
|
|
||||||
Process a part of the description with a given list of attributes
|
|
||||||
and update the result dictionary.
|
|
||||||
"""
|
|
||||||
part_words = part.split()
|
|
||||||
for attr in attr_list:
|
|
||||||
attr_words = attr.split()
|
|
||||||
if set(attr_words).issubset(set(part_words)):
|
|
||||||
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import re
|
import re
|
||||||
from typing import Tuple, Union, Dict
|
from typing import Tuple, Union, Dict, List
|
||||||
|
|
||||||
THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)"
|
THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)"
|
||||||
THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTENCE_STR)
|
THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTENCE_STR)
|
||||||
|
|
@ -59,3 +59,37 @@ def clean_description(description: str) -> str:
|
||||||
for char in special_chars:
|
for char in special_chars:
|
||||||
description = description.replace(char, " ")
|
description = description.replace(char, " ")
|
||||||
return description
|
return description
|
||||||
|
|
||||||
|
|
||||||
|
def process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
||||||
|
"""
|
||||||
|
Process a part of the description with a given list of attributes
|
||||||
|
and update the result dictionary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not isinstance(result, dict):
|
||||||
|
raise TypeError('Expected a dictionary for result')
|
||||||
|
if not isinstance(part, str):
|
||||||
|
raise TypeError('Expected a string for part')
|
||||||
|
if not isinstance(attr_list, list) or not all(isinstance(i, str) for i in attr_list):
|
||||||
|
raise TypeError('Expected a list of strings for attr_list')
|
||||||
|
if not isinstance(prefix, str):
|
||||||
|
raise TypeError('Expected a string for prefix')
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
raise ValueError("Result dictionary cannot be empty")
|
||||||
|
|
||||||
|
if not prefix:
|
||||||
|
raise ValueError("Prefix cannot be empty")
|
||||||
|
|
||||||
|
part_words = part.split()
|
||||||
|
for attr in attr_list:
|
||||||
|
attr_words = attr.split()
|
||||||
|
if set(attr_words).issubset(set(part_words)):
|
||||||
|
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
||||||
|
|
||||||
|
at_least_one_attribute_true = any(result.values())
|
||||||
|
if not at_least_one_attribute_true:
|
||||||
|
raise ValueError("No attribute matches found")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,10 @@
|
||||||
from epc_data.attributes.attribute_utils import extract_thermal_transmittance, clean_description
|
import pytest
|
||||||
|
import epc_data.attributes.attribute_utils as attribute_utils
|
||||||
|
|
||||||
|
|
||||||
def test_extract_thermal_transmittance():
|
def test_extract_thermal_transmittance():
|
||||||
description = "average thermal transmittance 2.3 w/m-¦k"
|
description = "average thermal transmittance 2.3 w/m-¦k"
|
||||||
assert extract_thermal_transmittance({}, description) == (
|
assert attribute_utils.extract_thermal_transmittance({}, description) == (
|
||||||
{'thermal_transmittance': 2.3, 'thermal_transmittance_unit': 'w/m-¦k'}, '')
|
{'thermal_transmittance': 2.3, 'thermal_transmittance_unit': 'w/m-¦k'}, '')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -19,4 +20,45 @@ def test_clean_description():
|
||||||
]
|
]
|
||||||
|
|
||||||
for input_str, expected_output in test_cases:
|
for input_str, expected_output in test_cases:
|
||||||
assert clean_description(input_str) == expected_output
|
assert attribute_utils.clean_description(input_str) == expected_output
|
||||||
|
|
||||||
|
|
||||||
|
# Test for normal operation
|
||||||
|
def test_process_part_normal_operation():
|
||||||
|
result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
|
||||||
|
part = 'high performance glazing'
|
||||||
|
attr_list = ['glazing', 'glazed', 'glaze']
|
||||||
|
prefix = 'has_'
|
||||||
|
expected_result = {'has_glazing': True, 'has_glazed': False, 'has_glaze': False}
|
||||||
|
|
||||||
|
assert attribute_utils.process_part(result, part, attr_list, prefix) == expected_result
|
||||||
|
|
||||||
|
|
||||||
|
# Test for TypeError exceptions
|
||||||
|
def test_process_part_type_errors():
|
||||||
|
result = 'not a dictionary'
|
||||||
|
part = 'high performance glazing'
|
||||||
|
attr_list = ['glazing', 'glazed', 'glaze']
|
||||||
|
prefix = 'has_'
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
attribute_utils.process_part(result, part, attr_list, prefix)
|
||||||
|
|
||||||
|
|
||||||
|
# Test for ValueError exceptions
|
||||||
|
def test_process_part_value_errors():
|
||||||
|
result = {}
|
||||||
|
part = 'high performance glazing'
|
||||||
|
attr_list = ['glazing', 'glazed', 'glaze']
|
||||||
|
prefix = 'has_'
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
attribute_utils.process_part(result, part, attr_list, prefix)
|
||||||
|
|
||||||
|
|
||||||
|
# Test for no attribute matches found
|
||||||
|
def test_process_part_no_matches():
|
||||||
|
result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
|
||||||
|
part = 'high performance coating'
|
||||||
|
attr_list = ['glazing', 'glazed', 'glaze']
|
||||||
|
prefix = 'has_'
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
attribute_utils.process_part(result, part, attr_list, prefix)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue