mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Implemented stricter find_keywords and made it a util
This commit is contained in:
parent
e1137d3ba7
commit
6a48eea55c
3 changed files with 34 additions and 59 deletions
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Dict, Union
|
||||
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation
|
||||
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
|
||||
|
||||
|
||||
class MainFuelAttributes:
|
||||
|
|
@ -55,11 +55,11 @@ class MainFuelAttributes:
|
|||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
result: Dict[str, Union[str, bool]] = {
|
||||
"fuel_type": self._find_keyword(self.FUEL_KEYWORDS),
|
||||
"tariff_type": self._find_keyword(self.TARIFF_KEYWORDS),
|
||||
"fuel_type": find_keyword(self.description, self.FUEL_KEYWORDS),
|
||||
"tariff_type": find_keyword(self.description, self.TARIFF_KEYWORDS),
|
||||
"is_community": self.is_community,
|
||||
"no_individual_heating_or_community_network": self._find_keyword(
|
||||
self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK
|
||||
"no_individual_heating_or_community_network": find_keyword(
|
||||
self.description, self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK
|
||||
),
|
||||
}
|
||||
|
||||
|
|
@ -75,25 +75,3 @@ class MainFuelAttributes:
|
|||
self.is_unknown = True
|
||||
|
||||
return result
|
||||
|
||||
def _find_keyword(self, keywords):
|
||||
description = self.description
|
||||
|
||||
# Sort keywords by length, longest first.
|
||||
# This ensures that 'time and temperature zone control'
|
||||
# will be checked before 'temperature zone control' if both are present in the keywords list
|
||||
keywords.sort(key=len, reverse=True)
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in description:
|
||||
return keyword
|
||||
|
||||
# If no keyword is found, try again after removing punctuation
|
||||
description_without_punct = remove_punctuation(description)
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in description_without_punct:
|
||||
return keyword
|
||||
|
||||
return None
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Dict, Union
|
||||
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation
|
||||
from epc_data.attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
|
||||
|
||||
|
||||
class MainheatControlAttributes:
|
||||
|
|
@ -86,41 +86,18 @@ class MainheatControlAttributes:
|
|||
return any(keyword in self.description for keyword in keywords)
|
||||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
|
||||
result: Dict[str, Union[str, bool]] = {
|
||||
"thermostatic_control": self._find_keyword(self.THERMOSTATIC_CONTROL_KEYWORDS),
|
||||
"charging_system": self._find_keyword(self.CHARGING_SYSTEM_KEYWORDS),
|
||||
"switch_system": self._find_keyword(self.SWITCH_SYSTEM_KEYWORDS),
|
||||
"no_control": self._find_keyword(self.NO_CONTROL_SYSTEM_KEYWORDS),
|
||||
"dhw_control": self._find_keyword(self.DHW_CONTROL_KEYWORDS),
|
||||
"community_heating": self._find_keyword(self.COMMUNITY_HEATING_KEYWORDS),
|
||||
"thermostatic_control": find_keyword(self.description, self.THERMOSTATIC_CONTROL_KEYWORDS),
|
||||
"charging_system": find_keyword(self.description, self.CHARGING_SYSTEM_KEYWORDS),
|
||||
"switch_system": find_keyword(self.description, self.SWITCH_SYSTEM_KEYWORDS),
|
||||
"no_control": find_keyword(self.description, self.NO_CONTROL_SYSTEM_KEYWORDS),
|
||||
"dhw_control": find_keyword(self.description, self.DHW_CONTROL_KEYWORDS),
|
||||
"community_heating": find_keyword(self.description, self.COMMUNITY_HEATING_KEYWORDS),
|
||||
"multiple_room_thermostats": any(
|
||||
phrase in self.description for phrase in self.MULTIPLE_ROOM_THERMOSTATS_PHRASES
|
||||
),
|
||||
"auxiliary_systems": self._find_keyword(self.AUXILIARY_SYSTEM_KEYWORDS),
|
||||
"trvs": self._find_keyword(self.TRVS_KEYWORDS)
|
||||
"auxiliary_systems": find_keyword(self.description, self.AUXILIARY_SYSTEM_KEYWORDS),
|
||||
"trvs": find_keyword(self.description, self.TRVS_KEYWORDS)
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
def _find_keyword(self, keywords):
|
||||
description = self.description
|
||||
|
||||
# Sort keywords by length, longest first.
|
||||
# This ensures that 'time and temperature zone control'
|
||||
# will be checked before 'temperature zone control' if both are present in the keywords list
|
||||
keywords.sort(key=len, reverse=True)
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in description:
|
||||
return keyword
|
||||
|
||||
# If no keyword is found, try again after removing punctuation
|
||||
description_without_punct = remove_punctuation(description)
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in description_without_punct:
|
||||
return keyword
|
||||
|
||||
return None
|
||||
|
||||
|
|
|
|||
|
|
@ -114,3 +114,23 @@ def remove_punctuation(text: str) -> str:
|
|||
def remove_double_spaces(text):
|
||||
cleaned_text = DOUBLE_SPACE_PATTERN.sub(" ", text)
|
||||
return cleaned_text
|
||||
|
||||
|
||||
def find_keyword(description, keywords):
|
||||
# Sort keywords by length, longest first.
|
||||
# This ensures that 'time and temperature zone control'
|
||||
# will be checked before 'temperature zone control' if both are present in the keywords list
|
||||
keywords.sort(key=len, reverse=True)
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in description:
|
||||
return keyword
|
||||
|
||||
# If no keyword is found, try again after removing punctuation
|
||||
description_without_punct = remove_punctuation(description)
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in description_without_punct:
|
||||
return keyword
|
||||
|
||||
return None
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue