diff --git a/epc_data/app.py b/epc_data/app.py index ea14882f..6ad677ea 100644 --- a/epc_data/app.py +++ b/epc_data/app.py @@ -55,27 +55,21 @@ def handler(): df = df.sort_values("original_description") df = df.reset_index(drop=True) + import numpy as np + idx = 88 + record = df[df.index == idx].to_dict("records")[0] + record = {k: v for k, v in record.items() if v not in [None, np.nan, False]} + from pprint import pprint + pprint(record) + + # This has has_electric as true and not sure if we want that + description = 'Boiler and radiators, mains gas, Electric storage heaters' + # This has has_electric as true and has has_underfloor_heating as true and has_electric_underfloor_heating as true + # and not sure if we want that but might be fine + description = 'Boiler and radiators, mains gas, Electric underfloor heating' + z = df[df["original_description"] == 'Air source heat pump, radiators and underfloor, electric'] - # Up to index: 14 - # Bugs: - # 1) - # Description: 'Air source heat pump fan coil units, electric' - # Issue: Because "oil" is a fuel type, "oil" is stripped out of the description and the description - # gets converted to "fan c units". It also marks this description as having oil, which it doesn't - # So this code probably won't detect any "fan coil units" - # 2) - # Description: 'Air source heat pump, Systems with radiators, electric' - # Issue: Check detecton of Systems with radiators - it's only searching for "radiators" in DISTRIBUTION_SYSTEMS - # This may actually be fine as we have other descriptions such as - # 'Air source heat pump, Underfloor heating and radiators, pipes in insulated timber floor, electric' - # 3) - # Description: 'Air source heat pump, radiators and underfloor, electric' - # Issue: We don't have any logic which identifies this heating system has having underfloor heating. - # Currently, we look for "electric underfloor heating" and "underfloor heating" so we miss - # the underfloor characterisation. There are a few descriptions that just include "underfloor" - # e.g. 'Air source heat pump, radiators, electric' which will get missed - # 4) - # + # LPG boiler df.to_dict("records") diff --git a/epc_data/attributes/MainheatAttributes.py b/epc_data/attributes/MainheatAttributes.py index ce0c9dc7..1e84debf 100644 --- a/epc_data/attributes/MainheatAttributes.py +++ b/epc_data/attributes/MainheatAttributes.py @@ -3,15 +3,18 @@ from typing import Dict, List, Union class MainHeatAttributes: HEAT_SYSTEMS = ["boiler", "air source heat pump", "room heaters", "electric storage heaters", "warm air", - "electric underfloor heating", "electric ceiling heating", "community scheme"] + "electric underfloor heating", "electric ceiling heating", "community scheme", + "ground source heat pump", "no system present", "portable electric heaters", + "water source heat pump"] FUEL_TYPES = ["electric", "mains gas", "wood logs", "LPG", "coal", "oil", "wood pellets", "anthracite", - "dual fuel (mineral and wood)", "smokeless fuel"] - DISTRIBUTION_SYSTEMS = ["underfloor heating", "radiators", "fan coil units", "pipes in screed above insulation", + "dual fuel mineral and wood", "smokeless fuel", "lpg"] + DISTRIBUTION_SYSTEMS = ["radiators", "fan coil units", "pipes in screed above insulation", "pipes in insulated timber floor", "pipes in concrete slab"] - OTHERS = ["assumed", "Electricaire"] + OTHERS = ["assumed", "electricaire", "assumed for most rooms"] def __init__(self, description: str): - self.description: str = description.lower() + self.description: str = self._clean_description(description.lower()) + # Remove special characters if not description or not any( rt in self.description for rt in @@ -19,8 +22,23 @@ class MainHeatAttributes: ): raise ValueError('Invalid description') + @staticmethod + def _clean_description(description: str) -> str: + """ + Clean the description by replacing any special characters with a space. + """ + special_chars = [":", ";", "*", "@", "?", "!", "(", ")"] + for char in special_chars: + description = description.replace(char, " ") + return description + def process(self) -> Dict[str, Union[str, bool]]: - result: Dict[str, Union[str, bool]] = {} + + result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS} + result.update({f'has_{hs.replace(" ", "_")}': False for hs in self.HEAT_SYSTEMS}) + result.update({f'has_{ft.replace(" ", "_")}': False for ft in self.FUEL_TYPES}) + result.update({f'has_{ot.replace(" ", "_")}': False for ot in self.OTHERS}) + description = self.description.split(',') # Process each part separately @@ -39,6 +57,10 @@ class MainHeatAttributes: # Other attributes self._process_part(result, part, self.OTHERS, 'has_') + # Check for "underfloor" separately in the entire description + if "underfloor" in self.description: + result['has_underfloor_heating'] = True + return result @staticmethod @@ -52,13 +74,5 @@ class MainHeatAttributes: attr_words = attr.split() if set(attr_words).issubset(set(part_words)): result[f'{prefix}{attr.replace(" ", "_")}'] = True - for word in attr_words: - part_words.remove(word) # remove the attribute words from part - - part = " ".join(part_words) - - # Check for variations of "underfloor heating" - if "underfloor" in part.split(): - result[f'{prefix}underfloor_heating'] = True return result