diff --git a/epc_data/app.py b/epc_data/app.py index 6ad677ea..2ff9cb9e 100644 --- a/epc_data/app.py +++ b/epc_data/app.py @@ -41,7 +41,7 @@ def handler(): # For testing: from epc_data.attributes.MainheatAttributes import MainHeatAttributes - descriptions = {x["mainheat-description"] for x in data} + descriptions = {x["windows-description"] for x in data} out = [] for description in descriptions: res = MainHeatAttributes(description).process() diff --git a/epc_data/attributes/MainheatAttributes.py b/epc_data/attributes/MainheatAttributes.py index 1e84debf..ae11e7d4 100644 --- a/epc_data/attributes/MainheatAttributes.py +++ b/epc_data/attributes/MainheatAttributes.py @@ -1,3 +1,4 @@ +from epc_data.attributes.attribute_utils import clean_description from typing import Dict, List, Union @@ -13,7 +14,7 @@ class MainHeatAttributes: OTHERS = ["assumed", "electricaire", "assumed for most rooms"] def __init__(self, description: str): - self.description: str = self._clean_description(description.lower()) + self.description: str = clean_description(description.lower()) # Remove special characters if not description or not any( @@ -22,16 +23,6 @@ class MainHeatAttributes: ): raise ValueError('Invalid description') - @staticmethod - def _clean_description(description: str) -> str: - """ - Clean the description by replacing any special characters with a space. - """ - special_chars = [":", ";", "*", "@", "?", "!", "(", ")"] - for char in special_chars: - description = description.replace(char, " ") - return description - def process(self) -> Dict[str, Union[str, bool]]: result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS} diff --git a/epc_data/attributes/WindowAttributes.py b/epc_data/attributes/WindowAttributes.py new file mode 100644 index 00000000..19c71e77 --- /dev/null +++ b/epc_data/attributes/WindowAttributes.py @@ -0,0 +1,42 @@ +from typing import Dict, List, Union + + +class WindowAttributes: + GLAZING_KEYWORDS = ["glazing", "glazed", "glaze"] + GLAZING_COVERAGE = ["fully", "mostly", "partial", "some"] + GLAZING_TYPES = ["double", "triple", "secondary", "multiple", "high performance"] + + def __init__(self, description: str): + self.description: str = self._clean_description(description.lower()) + + if not description or not any( + rt in self.description for rt in + self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES + ): + raise ValueError('Invalid description') + + def process(self) -> Dict[str, Union[str, bool]]: + result: Dict[str, Union[str, bool]] = {f'has_{wt.replace(" ", "_")}': False for wt in self.WINDOW_TYPES} + + description = self.description.split(',') + + # Process each part separately + for part in description: + part = part.strip() # remove leading/trailing white spaces + self._process_part(result, part, self.WINDOW_TYPES, 'has_') + + return result + + @staticmethod + def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str): + """ + Process a part of the description with a given list of attributes + and update the result dictionary. + """ + part_words = part.split() + for attr in attr_list: + attr_words = attr.split() + if set(attr_words).issubset(set(part_words)): + result[f'{prefix}{attr.replace(" ", "_")}'] = True + + return result diff --git a/epc_data/attributes/attribute_utils.py b/epc_data/attributes/attribute_utils.py index 321b4131..57866115 100644 --- a/epc_data/attributes/attribute_utils.py +++ b/epc_data/attributes/attribute_utils.py @@ -49,3 +49,13 @@ def extract_component_types(result: dict, description: str, list_of_components: description = description.replace(component, "") return result, description + + +def clean_description(description: str) -> str: + """ + Clean the description by replacing any special characters with a space. + """ + special_chars = [":", ";", "*", "@", "?", "!", "(", ")"] + for char in special_chars: + description = description.replace(char, " ") + return description diff --git a/epc_data/tests/test_attribute_utils.py b/epc_data/tests/test_attribute_utils.py index 9c37a1ac..7280e42b 100644 --- a/epc_data/tests/test_attribute_utils.py +++ b/epc_data/tests/test_attribute_utils.py @@ -1,7 +1,22 @@ -from epc_data.attributes.attribute_utils import extract_thermal_transmittance +from epc_data.attributes.attribute_utils import extract_thermal_transmittance, clean_description def test_extract_thermal_transmittance(): description = "average thermal transmittance 2.3 w/m-¦k" assert extract_thermal_transmittance({}, description) == ( {'thermal_transmittance': 2.3, 'thermal_transmittance_unit': 'w/m-¦k'}, '') + + +def test_clean_description(): + test_cases = [ + ("this:is;a*test", "this is a test"), + ("hello@world", "hello world"), + ("what?!?", "what "), + ("hello(world)", "hello world "), + ("", ""), + (":;*@?!", " "), + ("no special chars", "no special chars") + ] + + for input_str, expected_output in test_cases: + assert clean_description(input_str) == expected_output