fix missing case of secondary system in hot water

This commit is contained in:
Khalim Conn-Kowlessar 2023-07-01 17:36:21 +01:00
parent 36adaba8db
commit d6f83bc292
4 changed files with 20 additions and 7 deletions

View file

@ -97,8 +97,7 @@ def handler():
)
)
test = [x for x in data if "Conservatory" in x["floor-description"]]
test = pd.DataFrame(test)
z = pd.DataFrame([x for x in data if x["hotwater-description"] == "From second main heating system"])
# Incorporate input data into cleaning
cleaner = EpcClean(data + [p.data for p in input_properties])

View file

@ -22,6 +22,7 @@ class HotWaterAttributes(BaseUtility):
'from main system', # The hot water is provided by the main heating system of the building
'from secondary system',
# The hot water is provided by a secondary (or supplementary) heating system in the building
'from second main heating system', # Same as 'from secondary system'
'community scheme', # The hot water is provided by a community heating system
]
@ -83,6 +84,11 @@ class HotWaterAttributes(BaseUtility):
'gas range cooker', # A gas-powered range cooker
]
# Descriptions which represent the same thing
SYNONYMS = {
'from second main heating system': 'from secondary system',
}
def __init__(self, description: str):
self.description: str = clean_description(description.lower())
@ -128,7 +134,7 @@ class HotWaterAttributes(BaseUtility):
result: Dict[str, Union[str, bool]] = {
"heater_type": find_keyword(self.description, self.HEATER_TYPES),
"system_type": find_keyword(self.description, self.SYSTEM_TYPES),
"system_type": find_keyword(self.description, self.SYSTEM_TYPES, self.SYNONYMS),
"thermostat_characteristics": find_keyword(self.description, self.THERMOSTAT_CHARACTERISTICS),
"heating_scope": find_keyword(self.description, self.HEATING_SCOPE),
"energy_recovery": find_keyword(self.description, self.ENERGY_RECOVERY),

View file

@ -118,7 +118,10 @@ def remove_double_spaces(text):
return cleaned_text
def find_keyword(description, keywords):
def find_keyword(description, keywords, synonyms=None):
if synonyms is None:
synonyms = {}
# Sort keywords by length, longest first.
# This ensures that 'time and temperature zone control'
# will be checked before 'temperature zone control' if both are present in the keywords list
@ -126,13 +129,13 @@ def find_keyword(description, keywords):
for keyword in keywords:
if keyword in description:
return keyword
return synonyms.get(keyword, keyword)
# If no keyword is found, try again after removing punctuation
description_without_punct = remove_punctuation(description)
for keyword in keywords:
if keyword in description_without_punct:
return keyword
return synonyms.get(keyword, keyword)
return None

View file

@ -120,5 +120,10 @@ hotwater_cases = [
{'original_description': 'Gas range cooker', 'heater_type': None, 'system_type': None,
'thermostat_characteristics': None, 'heating_scope': None, 'energy_recovery': None, 'tariff_type': None,
'extra_features': None, 'chp_systems': None, 'distribution_system': None, 'no_system_present': None,
'assumed': False, "appliance": "gas range cooker"}
'assumed': False, "appliance": "gas range cooker"},
{'original_description': 'From second main heating system', 'heater_type': None,
'system_type': 'from secondary system',
'thermostat_characteristics': None, 'heating_scope': None, 'energy_recovery': None, 'tariff_type': None,
'extra_features': None, 'chp_systems': None, 'distribution_system': None, 'no_system_present': None,
'assumed': False, "appliance": None}
]