Implemented stricter find_keywords and made it a util

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-14 19:12:06 +01:00
parent e1137d3ba7
commit 6a48eea55c
3 changed files with 34 additions and 59 deletions

View file

@ -1,5 +1,5 @@
from typing import Dict, Union
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
class MainFuelAttributes:
@ -55,11 +55,11 @@ class MainFuelAttributes:
def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = {
"fuel_type": self._find_keyword(self.FUEL_KEYWORDS),
"tariff_type": self._find_keyword(self.TARIFF_KEYWORDS),
"fuel_type": find_keyword(self.description, self.FUEL_KEYWORDS),
"tariff_type": find_keyword(self.description, self.TARIFF_KEYWORDS),
"is_community": self.is_community,
"no_individual_heating_or_community_network": self._find_keyword(
self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK
"no_individual_heating_or_community_network": find_keyword(
self.description, self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK
),
}
@ -75,25 +75,3 @@ class MainFuelAttributes:
self.is_unknown = True
return result
def _find_keyword(self, keywords):
description = self.description
# Sort keywords by length, longest first.
# This ensures that 'time and temperature zone control'
# will be checked before 'temperature zone control' if both are present in the keywords list
keywords.sort(key=len, reverse=True)
for keyword in keywords:
if keyword in description:
return keyword
# If no keyword is found, try again after removing punctuation
description_without_punct = remove_punctuation(description)
for keyword in keywords:
if keyword in description_without_punct:
return keyword
return None

View file

@ -1,5 +1,5 @@
from typing import Dict, Union
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
class MainheatControlAttributes:
@ -86,41 +86,18 @@ class MainheatControlAttributes:
return any(keyword in self.description for keyword in keywords)
def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = {
"thermostatic_control": self._find_keyword(self.THERMOSTATIC_CONTROL_KEYWORDS),
"charging_system": self._find_keyword(self.CHARGING_SYSTEM_KEYWORDS),
"switch_system": self._find_keyword(self.SWITCH_SYSTEM_KEYWORDS),
"no_control": self._find_keyword(self.NO_CONTROL_SYSTEM_KEYWORDS),
"dhw_control": self._find_keyword(self.DHW_CONTROL_KEYWORDS),
"community_heating": self._find_keyword(self.COMMUNITY_HEATING_KEYWORDS),
"thermostatic_control": find_keyword(self.description, self.THERMOSTATIC_CONTROL_KEYWORDS),
"charging_system": find_keyword(self.description, self.CHARGING_SYSTEM_KEYWORDS),
"switch_system": find_keyword(self.description, self.SWITCH_SYSTEM_KEYWORDS),
"no_control": find_keyword(self.description, self.NO_CONTROL_SYSTEM_KEYWORDS),
"dhw_control": find_keyword(self.description, self.DHW_CONTROL_KEYWORDS),
"community_heating": find_keyword(self.description, self.COMMUNITY_HEATING_KEYWORDS),
"multiple_room_thermostats": any(
phrase in self.description for phrase in self.MULTIPLE_ROOM_THERMOSTATS_PHRASES
),
"auxiliary_systems": self._find_keyword(self.AUXILIARY_SYSTEM_KEYWORDS),
"trvs": self._find_keyword(self.TRVS_KEYWORDS)
"auxiliary_systems": find_keyword(self.description, self.AUXILIARY_SYSTEM_KEYWORDS),
"trvs": find_keyword(self.description, self.TRVS_KEYWORDS)
}
return result
def _find_keyword(self, keywords):
description = self.description
# Sort keywords by length, longest first.
# This ensures that 'time and temperature zone control'
# will be checked before 'temperature zone control' if both are present in the keywords list
keywords.sort(key=len, reverse=True)
for keyword in keywords:
if keyword in description:
return keyword
# If no keyword is found, try again after removing punctuation
description_without_punct = remove_punctuation(description)
for keyword in keywords:
if keyword in description_without_punct:
return keyword
return None

View file

@ -114,3 +114,23 @@ def remove_punctuation(text: str) -> str:
def remove_double_spaces(text):
cleaned_text = DOUBLE_SPACE_PATTERN.sub(" ", text)
return cleaned_text
def find_keyword(description, keywords):
# Sort keywords by length, longest first.
# This ensures that 'time and temperature zone control'
# will be checked before 'temperature zone control' if both are present in the keywords list
keywords.sort(key=len, reverse=True)
for keyword in keywords:
if keyword in description:
return keyword
# If no keyword is found, try again after removing punctuation
description_without_punct = remove_punctuation(description)
for keyword in keywords:
if keyword in description_without_punct:
return keyword
return None