got MainheatAttributes working

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-13 23:11:45 +01:00
parent e37b977bb5
commit fd3fd16d75
2 changed files with 42 additions and 34 deletions

View file

@ -55,27 +55,21 @@ def handler():
df = df.sort_values("original_description")
df = df.reset_index(drop=True)
import numpy as np
idx = 88
record = df[df.index == idx].to_dict("records")[0]
record = {k: v for k, v in record.items() if v not in [None, np.nan, False]}
from pprint import pprint
pprint(record)
# This has has_electric as true and not sure if we want that
description = 'Boiler and radiators, mains gas, Electric storage heaters'
# This has has_electric as true and has has_underfloor_heating as true and has_electric_underfloor_heating as true
# and not sure if we want that but might be fine
description = 'Boiler and radiators, mains gas, Electric underfloor heating'
z = df[df["original_description"] == 'Air source heat pump, radiators and underfloor, electric']
# Up to index: 14
# Bugs:
# 1)
# Description: 'Air source heat pump fan coil units, electric'
# Issue: Because "oil" is a fuel type, "oil" is stripped out of the description and the description
# gets converted to "fan c units". It also marks this description as having oil, which it doesn't
# So this code probably won't detect any "fan coil units"
# 2)
# Description: 'Air source heat pump, Systems with radiators, electric'
# Issue: Check detecton of Systems with radiators - it's only searching for "radiators" in DISTRIBUTION_SYSTEMS
# This may actually be fine as we have other descriptions such as
# 'Air source heat pump, Underfloor heating and radiators, pipes in insulated timber floor, electric'
# 3)
# Description: 'Air source heat pump, radiators and underfloor, electric'
# Issue: We don't have any logic which identifies this heating system has having underfloor heating.
# Currently, we look for "electric underfloor heating" and "underfloor heating" so we miss
# the underfloor characterisation. There are a few descriptions that just include "underfloor"
# e.g. 'Air source heat pump, radiators, electric' which will get missed
# 4)
#
# LPG boiler
df.to_dict("records")

View file

@ -3,15 +3,18 @@ from typing import Dict, List, Union
class MainHeatAttributes:
HEAT_SYSTEMS = ["boiler", "air source heat pump", "room heaters", "electric storage heaters", "warm air",
"electric underfloor heating", "electric ceiling heating", "community scheme"]
"electric underfloor heating", "electric ceiling heating", "community scheme",
"ground source heat pump", "no system present", "portable electric heaters",
"water source heat pump"]
FUEL_TYPES = ["electric", "mains gas", "wood logs", "LPG", "coal", "oil", "wood pellets", "anthracite",
"dual fuel (mineral and wood)", "smokeless fuel"]
DISTRIBUTION_SYSTEMS = ["underfloor heating", "radiators", "fan coil units", "pipes in screed above insulation",
"dual fuel mineral and wood", "smokeless fuel", "lpg"]
DISTRIBUTION_SYSTEMS = ["radiators", "fan coil units", "pipes in screed above insulation",
"pipes in insulated timber floor", "pipes in concrete slab"]
OTHERS = ["assumed", "Electricaire"]
OTHERS = ["assumed", "electricaire", "assumed for most rooms"]
def __init__(self, description: str):
self.description: str = description.lower()
self.description: str = self._clean_description(description.lower())
# Remove special characters
if not description or not any(
rt in self.description for rt in
@ -19,8 +22,23 @@ class MainHeatAttributes:
):
raise ValueError('Invalid description')
@staticmethod
def _clean_description(description: str) -> str:
"""
Clean the description by replacing any special characters with a space.
"""
special_chars = [":", ";", "*", "@", "?", "!", "(", ")"]
for char in special_chars:
description = description.replace(char, " ")
return description
def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = {}
result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS}
result.update({f'has_{hs.replace(" ", "_")}': False for hs in self.HEAT_SYSTEMS})
result.update({f'has_{ft.replace(" ", "_")}': False for ft in self.FUEL_TYPES})
result.update({f'has_{ot.replace(" ", "_")}': False for ot in self.OTHERS})
description = self.description.split(',')
# Process each part separately
@ -39,6 +57,10 @@ class MainHeatAttributes:
# Other attributes
self._process_part(result, part, self.OTHERS, 'has_')
# Check for "underfloor" separately in the entire description
if "underfloor" in self.description:
result['has_underfloor_heating'] = True
return result
@staticmethod
@ -52,13 +74,5 @@ class MainHeatAttributes:
attr_words = attr.split()
if set(attr_words).issubset(set(part_words)):
result[f'{prefix}{attr.replace(" ", "_")}'] = True
for word in attr_words:
part_words.remove(word) # remove the attribute words from part
part = " ".join(part_words)
# Check for variations of "underfloor heating"
if "underfloor" in part.split():
result[f'{prefix}underfloor_heating'] = True
return result