implemented first version of MainhearAttributes - wip

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-13 16:58:50 +01:00
parent 11d26720c9
commit e37b977bb5
3 changed files with 91 additions and 5 deletions

View file

@ -40,11 +40,11 @@ def handler():
cleaner.clean()
# For testing:
from epc_data.attributes.FloorAttributes import FloorAttributes
descriptions = {x["floor-description"] for x in data}
from epc_data.attributes.MainheatAttributes import MainHeatAttributes
descriptions = {x["mainheat-description"] for x in data}
out = []
for description in descriptions:
res = FloorAttributes(description).process()
res = MainHeatAttributes(description).process()
out.append(
{
"original_description": description,
@ -53,5 +53,29 @@ def handler():
)
df = pd.DataFrame(out)
df = df.sort_values("original_description")
df = df.reset_index(drop=True)
z = df[df["original_description"] == 'Air source heat pump, radiators and underfloor, electric']
# Up to index: 14
# Bugs:
# 1)
# Description: 'Air source heat pump fan coil units, electric'
# Issue: Because "oil" is a fuel type, "oil" is stripped out of the description and the description
# gets converted to "fan c units". It also marks this description as having oil, which it doesn't
# So this code probably won't detect any "fan coil units"
# 2)
# Description: 'Air source heat pump, Systems with radiators, electric'
# Issue: Check detecton of Systems with radiators - it's only searching for "radiators" in DISTRIBUTION_SYSTEMS
# This may actually be fine as we have other descriptions such as
# 'Air source heat pump, Underfloor heating and radiators, pipes in insulated timber floor, electric'
# 3)
# Description: 'Air source heat pump, radiators and underfloor, electric'
# Issue: We don't have any logic which identifies this heating system has having underfloor heating.
# Currently, we look for "electric underfloor heating" and "underfloor heating" so we miss
# the underfloor characterisation. There are a few descriptions that just include "underfloor"
# e.g. 'Air source heat pump, radiators, electric' which will get missed
# 4)
#
df.to_dict("records")

View file

@ -0,0 +1,64 @@
from typing import Dict, List, Union
class MainHeatAttributes:
HEAT_SYSTEMS = ["boiler", "air source heat pump", "room heaters", "electric storage heaters", "warm air",
"electric underfloor heating", "electric ceiling heating", "community scheme"]
FUEL_TYPES = ["electric", "mains gas", "wood logs", "LPG", "coal", "oil", "wood pellets", "anthracite",
"dual fuel (mineral and wood)", "smokeless fuel"]
DISTRIBUTION_SYSTEMS = ["underfloor heating", "radiators", "fan coil units", "pipes in screed above insulation",
"pipes in insulated timber floor", "pipes in concrete slab"]
OTHERS = ["assumed", "Electricaire"]
def __init__(self, description: str):
self.description: str = description.lower()
if not description or not any(
rt in self.description for rt in
self.HEAT_SYSTEMS + self.FUEL_TYPES + self.DISTRIBUTION_SYSTEMS + self.OTHERS
):
raise ValueError('Invalid description')
def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = {}
description = self.description.split(',')
# Process each part separately
for part in description:
part = part.strip() # remove leading/trailing white spaces
# Heating Systems
self._process_part(result, part, self.HEAT_SYSTEMS, 'has_')
# Fuel Types
self._process_part(result, part, self.FUEL_TYPES, 'has_')
# Distribution Systems
self._process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
# Other attributes
self._process_part(result, part, self.OTHERS, 'has_')
return result
@staticmethod
def _process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
"""
Process a part of the description with a given list of attributes
and update the result dictionary.
"""
part_words = part.split()
for attr in attr_list:
attr_words = attr.split()
if set(attr_words).issubset(set(part_words)):
result[f'{prefix}{attr.replace(" ", "_")}'] = True
for word in attr_words:
part_words.remove(word) # remove the attribute words from part
part = " ".join(part_words)
# Check for variations of "underfloor heating"
if "underfloor" in part.split():
result[f'{prefix}underfloor_heating'] = True
return result

View file

@ -2,8 +2,6 @@ import pytest
import pickle
from epc_data.EpcClean import EpcClean
from pathlib import Path
from epc_data.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
from epc_data.attributes.RoofAttributes import RoofAttributes
# For local testing
if __file__ == "<input>":