from typing import Dict, Union from model_data.BaseUtility import Definitions from model_data.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword class MainFuelAttributes(Definitions): FUEL_KEYWORDS = [ 'heat network', 'mains gas', 'electricity', 'oil', 'biomass', 'biodiesel', # Note: there is als a category called 'bottled LPG', but only 2/50k entries had this 'lpg', 'waste combustion', 'biogas', 'wood logs', 'dual fuel mineral wood', 'gas', 'anthracite', 'smokeless coal', 'house coal', 'wood chips', # We don't treat wood chips and wood pelles as the same. # Wood pellets have a higher energy density than wood chips. This is due to their manufacturing process, # which compresses the wood and removes most of the moisture, making them more efficient as a fuel 'wood pellets', 'b30k', 'dual fuel appliance mineral and wood', 'coal', 'b30d', 'bioethanol', ] COMPLEX_FUEL_KEYWORDS = [ 'heat from boilers using biodiesel from any biomass source' ] TARIFF_KEYWORDS = [ 'unspecified tariff' # We may come across more later but this is all observed for now ] UNKNOWN_FUEL = "unknown" NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK = [ 'to be used only when there is no heatinghotwater system or data is from a community network', 'to be used only when there is no heatinghotwater system' ] def __init__(self, description: str): self.description: str = remove_punctuation(clean_description(description.lower())) self.is_community = 'community' in self.description and 'not community' not in self.description self.is_unknown = False self.nodata = not description or description in self.DATA_ANOMALY_MATCHES if not self.nodata and not any( self._keyword_in_description(keywords) for keywords in [ self.FUEL_KEYWORDS, self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK, self.TARIFF_KEYWORDS, self.COMPLEX_FUEL_KEYWORDS ] ): raise ValueError('Invalid description') def _keyword_in_description(self, keywords): return any(keyword in self.description for keyword in keywords) def process(self) -> Dict[str, Union[str, bool]]: if self.nodata: result = { "fuel_type": self.UNKNOWN_FUEL, "tariff_type": None, "is_community": False, "no_individual_heating_or_community_network": False, "complex_fuel_type": None } return result result: Dict[str, Union[str, bool]] = { "fuel_type": find_keyword(self.description, self.FUEL_KEYWORDS), "tariff_type": find_keyword(self.description, self.TARIFF_KEYWORDS), "is_community": self.is_community, "no_individual_heating_or_community_network": find_keyword( self.description, self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK ), "complex_fuel_type": find_keyword(self.description, self.COMPLEX_FUEL_KEYWORDS), } # to make this field palettable, if no_individual_heating_or_community_network is populated, we'll # just set it to true result["no_individual_heating_or_community_network"] = bool( result["no_individual_heating_or_community_network"] ) if not result["fuel_type"]: result["fuel_type"] = self.UNKNOWN_FUEL # We'll do checks on unknown fuel data_types to ensure we don't miss anything self.is_unknown = True return result