mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
155 lines
5.2 KiB
Python
155 lines
5.2 KiB
Python
import re
|
|
import string
|
|
from typing import Tuple, Union, Dict, List
|
|
|
|
THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)"
|
|
THERMAL_TRANSMITTANCE_REGEX = re.compile(THERMAL_TRANSMITTENCE_STR)
|
|
DOUBLE_SPACE_PATTERN = re.compile(r"\s+")
|
|
|
|
|
|
def extract_thermal_transmittance(result: dict, description: str) -> Tuple[
|
|
Dict[str, Union[None, str, float]], str
|
|
]:
|
|
"""
|
|
Extracts thermal transmittance from the description and updates the result dictionary.
|
|
|
|
:param result: Dictionary to store the result in.
|
|
:param description: Lowercase description string.
|
|
:return: A tuple containing the updated result dictionary and the description with the thermal transmittance part
|
|
removed.
|
|
"""
|
|
|
|
match = THERMAL_TRANSMITTANCE_REGEX.search(description)
|
|
if match:
|
|
result['thermal_transmittance'] = float(match.group(1))
|
|
result['thermal_transmittance_unit'] = match.group(2)
|
|
# Remove the match from the description
|
|
description = re.sub(THERMAL_TRANSMITTENCE_STR, "", description)
|
|
else:
|
|
result['thermal_transmittance'] = None
|
|
result['thermal_transmittance_unit'] = None
|
|
|
|
return result, description
|
|
|
|
|
|
def extract_component_types(result: dict, description: str, list_of_components: list) -> Tuple[
|
|
Dict[str, Union[None, str, float]], str
|
|
]:
|
|
"""
|
|
Extracts component data_types from the description, updates the result dictionary, and removes the matched component
|
|
data_types from the description.
|
|
|
|
:param result: Dictionary to store the results in.
|
|
:param description: Lowercase description string.
|
|
:param list_of_components: List of component data_types to extract from the description.
|
|
:return: A tuple containing the updated result dictionary and the description with the matched component data_types
|
|
removed.
|
|
"""
|
|
for component in list_of_components:
|
|
result[f'is_{component.replace(" ", "_")}'] = component in description
|
|
# Remove the component from the description
|
|
description = description.replace(component, "")
|
|
|
|
return result, description
|
|
|
|
|
|
def clean_description(description: str) -> str:
|
|
"""
|
|
Clean the description by replacing any special characters with a space.
|
|
"""
|
|
special_chars = [":", ";", "*", "@", "?", "!", "(", ")"]
|
|
for char in special_chars:
|
|
description = description.replace(char, " ")
|
|
|
|
description = remove_double_spaces(description)
|
|
return description
|
|
|
|
|
|
def switch_chars(description: str) -> str:
|
|
"""
|
|
Switches specified characters in a description with a ,
|
|
Useful for descriptions like "Gas: mains gas"
|
|
"""
|
|
|
|
# Switch : to ,
|
|
chars = [":"]
|
|
for char in chars:
|
|
description = description.replace(char, ",")
|
|
|
|
return description
|
|
|
|
|
|
def process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
|
|
"""
|
|
Process a part of the description with a given list of epc_attributes
|
|
and update the result dictionary.
|
|
"""
|
|
|
|
if not isinstance(result, dict):
|
|
raise TypeError('Expected a dictionary for result')
|
|
if not isinstance(part, str):
|
|
raise TypeError('Expected a string for part')
|
|
if not isinstance(attr_list, list) or not all(isinstance(i, str) for i in attr_list):
|
|
raise TypeError('Expected a list of strings for attr_list')
|
|
if not isinstance(prefix, str):
|
|
raise TypeError('Expected a string for prefix')
|
|
|
|
if not result:
|
|
raise ValueError("Result dictionary cannot be empty")
|
|
|
|
if not prefix:
|
|
raise ValueError("Prefix cannot be empty")
|
|
|
|
part_words = part.split()
|
|
for attr in attr_list:
|
|
attr_words = attr.split()
|
|
if set(attr_words).issubset(set(part_words)):
|
|
result[f'{prefix}{attr.replace(" ", "_")}'] = True
|
|
|
|
at_least_one_attribute_true = any(result.values())
|
|
if not at_least_one_attribute_true:
|
|
raise ValueError("No attribute matches found")
|
|
|
|
return result
|
|
|
|
|
|
def remove_punctuation(text: str) -> str:
|
|
# Create a translation table using the string.punctuation string
|
|
translation_table = str.maketrans("", "", string.punctuation)
|
|
|
|
# Use the translation table to remove punctuation from the text
|
|
text_without_punctuation = text.translate(translation_table)
|
|
|
|
text_without_punctuation = remove_double_spaces(text_without_punctuation)
|
|
|
|
text_without_punctuation = text_without_punctuation.strip()
|
|
|
|
return text_without_punctuation
|
|
|
|
|
|
def remove_double_spaces(text):
|
|
cleaned_text = DOUBLE_SPACE_PATTERN.sub(" ", text)
|
|
return cleaned_text
|
|
|
|
|
|
def find_keyword(description, keywords, synonyms=None):
|
|
if synonyms is None:
|
|
synonyms = {}
|
|
|
|
# Sort keywords by length, longest first.
|
|
# This ensures that 'time and temperature zone control'
|
|
# will be checked before 'temperature zone control' if both are present in the keywords list
|
|
keywords.sort(key=len, reverse=True)
|
|
|
|
for keyword in keywords:
|
|
if keyword in description:
|
|
return synonyms.get(keyword, keyword)
|
|
|
|
# If no keyword is found, try again after removing punctuation
|
|
description_without_punct = remove_punctuation(description)
|
|
|
|
for keyword in keywords:
|
|
if keyword in description_without_punct:
|
|
return synonyms.get(keyword, keyword)
|
|
|
|
return None
|