mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
broke out clean_description
This commit is contained in:
parent
363c0745fe
commit
d10bed5b6d
5 changed files with 71 additions and 13 deletions
|
|
@ -41,7 +41,7 @@ def handler():
|
|||
|
||||
# For testing:
|
||||
from epc_data.attributes.MainheatAttributes import MainHeatAttributes
|
||||
descriptions = {x["mainheat-description"] for x in data}
|
||||
descriptions = {x["windows-description"] for x in data}
|
||||
out = []
|
||||
for description in descriptions:
|
||||
res = MainHeatAttributes(description).process()
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from epc_data.attributes.attribute_utils import clean_description
|
||||
from typing import Dict, List, Union
|
||||
|
||||
|
||||
|
|
@ -13,7 +14,7 @@ class MainHeatAttributes:
|
|||
OTHERS = ["assumed", "electricaire", "assumed for most rooms"]
|
||||
|
||||
def __init__(self, description: str):
|
||||
self.description: str = self._clean_description(description.lower())
|
||||
self.description: str = clean_description(description.lower())
|
||||
# Remove special characters
|
||||
|
||||
if not description or not any(
|
||||
|
|
@ -22,16 +23,6 @@ class MainHeatAttributes:
|
|||
):
|
||||
raise ValueError('Invalid description')
|
||||
|
||||
@staticmethod
|
||||
def _clean_description(description: str) -> str:
|
||||
"""
|
||||
Clean the description by replacing any special characters with a space.
|
||||
"""
|
||||
special_chars = [":", ";", "*", "@", "?", "!", "(", ")"]
|
||||
for char in special_chars:
|
||||
description = description.replace(char, " ")
|
||||
return description
|
||||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
|
||||
result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS}
|
||||
|
|
|
|||
42
epc_data/attributes/WindowAttributes.py
Normal file
42
epc_data/attributes/WindowAttributes.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from typing import Dict, List, Union
|
||||
|
||||
|
||||
class WindowAttributes:
|
||||
GLAZING_KEYWORDS = ["glazing", "glazed", "glaze"]
|
||||
GLAZING_COVERAGE = ["fully", "mostly", "partial", "some"]
|
||||
GLAZING_TYPES = ["double", "triple", "secondary", "multiple", "high performance"]
|
||||
|
||||
def __init__(self, description: str):
|
||||
self.description: str = self._clean_description(description.lower())
|
||||
|
||||
if not description or not any(
|
||||
rt in self.description for rt in
|
||||
self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES
|
||||
):
|
||||
raise ValueError('Invalid description')
|
||||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
result: Dict[str, Union[str, bool]] = {f'has_{wt.replace(" ", "_")}': False for wt in self.WINDOW_TYPES}
|
||||
|
||||
description = self.description.split(',')
|
||||
|
||||
# Process each part separately
|
||||
for part in description:
|
||||
part = part.strip() # remove leading/trailing white spaces
|
||||
self._process_part(result, part, self.WINDOW_TYPES, 'has_')
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
||||
"""
|
||||
Process a part of the description with a given list of attributes
|
||||
and update the result dictionary.
|
||||
"""
|
||||
part_words = part.split()
|
||||
for attr in attr_list:
|
||||
attr_words = attr.split()
|
||||
if set(attr_words).issubset(set(part_words)):
|
||||
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
||||
|
||||
return result
|
||||
|
|
@ -49,3 +49,13 @@ def extract_component_types(result: dict, description: str, list_of_components:
|
|||
description = description.replace(component, "")
|
||||
|
||||
return result, description
|
||||
|
||||
|
||||
def clean_description(description: str) -> str:
|
||||
"""
|
||||
Clean the description by replacing any special characters with a space.
|
||||
"""
|
||||
special_chars = [":", ";", "*", "@", "?", "!", "(", ")"]
|
||||
for char in special_chars:
|
||||
description = description.replace(char, " ")
|
||||
return description
|
||||
|
|
|
|||
|
|
@ -1,7 +1,22 @@
|
|||
from epc_data.attributes.attribute_utils import extract_thermal_transmittance
|
||||
from epc_data.attributes.attribute_utils import extract_thermal_transmittance, clean_description
|
||||
|
||||
|
||||
def test_extract_thermal_transmittance():
|
||||
description = "average thermal transmittance 2.3 w/m-¦k"
|
||||
assert extract_thermal_transmittance({}, description) == (
|
||||
{'thermal_transmittance': 2.3, 'thermal_transmittance_unit': 'w/m-¦k'}, '')
|
||||
|
||||
|
||||
def test_clean_description():
|
||||
test_cases = [
|
||||
("this:is;a*test", "this is a test"),
|
||||
("hello@world", "hello world"),
|
||||
("what?!?", "what "),
|
||||
("hello(world)", "hello world "),
|
||||
("", ""),
|
||||
(":;*@?!", " "),
|
||||
("no special chars", "no special chars")
|
||||
]
|
||||
|
||||
for input_str, expected_output in test_cases:
|
||||
assert clean_description(input_str) == expected_output
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue