From 02fce2e163426d3e256fedb2bf01b99d797d835f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 14 Jun 2023 15:41:29 +0100 Subject: [PATCH] working on heating controls --- epc_data/app.py | 2 +- .../attributes/MainheatControlAttributes.py | 30 +++++++++++++++++-- epc_data/attributes/attribute_utils.py | 11 +++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/epc_data/app.py b/epc_data/app.py index eff07b74..0986332a 100644 --- a/epc_data/app.py +++ b/epc_data/app.py @@ -56,7 +56,7 @@ def handler(): df = df.reset_index(drop=True) import numpy as np - idx = 5 + idx = 3 record = df[df.index == idx].to_dict("records")[0] record = {k: v for k, v in record.items() if v not in [None, np.nan, False]} from pprint import pprint diff --git a/epc_data/attributes/MainheatControlAttributes.py b/epc_data/attributes/MainheatControlAttributes.py index 77db712a..01744de7 100644 --- a/epc_data/attributes/MainheatControlAttributes.py +++ b/epc_data/attributes/MainheatControlAttributes.py @@ -1,5 +1,5 @@ from typing import Dict, Union -from epc_data.attributes.attribute_utils import clean_description +from epc_data.attributes.attribute_utils import clean_description, remove_punctuation class MainheatControlAttributes: @@ -46,6 +46,11 @@ class MainheatControlAttributes: 'use of community heating' ] + MULTIPLE_ROOM_THERMOSTATS_PHRASES = [ + 'at least two room stats', + 'at least two room thermostats' + ] + def __init__(self, description: str): self.description: str = clean_description(description.lower()) @@ -57,7 +62,8 @@ class MainheatControlAttributes: self.THERMOSTATIC_CONTROL_KEYWORDS, self.CHARGING_SYSTEM_KEYWORDS, self.SWITCH_SYSTEM_KEYWORDS, - self.DHW_CONTROL_KEYWORDS + self.DHW_CONTROL_KEYWORDS, + self.COMMUNITY_HEATING_KEYWORDS ] ): raise ValueError('Invalid description') @@ -73,13 +79,31 @@ class MainheatControlAttributes: "no_control": self._find_keyword(self.NO_CONTROL_SYSTEM_KEYWORDS), "dhw_control": self._find_keyword(self.DHW_CONTROL_KEYWORDS), "community_heating": self._find_keyword(self.COMMUNITY_HEATING_KEYWORDS), + "multiple_room_thermostats": any( + phrase in self.description for phrase in self.MULTIPLE_ROOM_THERMOSTATS_PHRASES), } return result def _find_keyword(self, keywords): + description_words = set(self.description.split()) + + # Sort keywords by length, longest first. This ensures that 'time and temperature zone control' + # will be checked before 'temperature zone control' if both are present in the keywords list + keywords.sort(key=len, reverse=True) + for keyword in keywords: - if keyword in self.description: + keyword_words = set(keyword.split()) + if keyword_words.issubset(description_words): + return keyword + + # If no keyword is found, try again after removing punctuation + description_without_punct = remove_punctuation(self.description) + description_words_without_punct = set(description_without_punct.split()) + + for keyword in keywords: + keyword_words = set(keyword.split()) + if keyword_words.issubset(description_words_without_punct): return keyword return None diff --git a/epc_data/attributes/attribute_utils.py b/epc_data/attributes/attribute_utils.py index 0312e7c1..7bdd4288 100644 --- a/epc_data/attributes/attribute_utils.py +++ b/epc_data/attributes/attribute_utils.py @@ -1,4 +1,5 @@ import re +import string from typing import Tuple, Union, Dict, List THERMAL_TRANSMITTENCE_STR = r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)" @@ -93,3 +94,13 @@ def process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List raise ValueError("No attribute matches found") return result + + +def remove_punctuation(text: str) -> str: + # Create a translation table using the string.punctuation string + translation_table = str.maketrans("", "", string.punctuation) + + # Use the translation table to remove punctuation from the text + text_without_punctuation = text.translate(translation_table) + + return text_without_punctuation