mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
102 lines
3.7 KiB
Python
102 lines
3.7 KiB
Python
from typing import Dict, Union
|
|
from model_data.BaseUtility import BaseUtility
|
|
from model_data.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
|
|
|
|
|
|
class MainFuelAttributes(BaseUtility):
|
|
FUEL_KEYWORDS = [
|
|
'heat network',
|
|
'mains gas',
|
|
'electricity',
|
|
'oil',
|
|
'biomass',
|
|
'biodiesel',
|
|
# Note: there is als a category called 'bottled LPG', but only 2/50k entries had this
|
|
'lpg',
|
|
'waste combustion',
|
|
'biogas',
|
|
'wood logs',
|
|
'dual fuel mineral wood',
|
|
'gas',
|
|
'anthracite',
|
|
'smokeless coal',
|
|
'house coal',
|
|
'wood chips',
|
|
# We don't treat wood chips and wood pelles as the same.
|
|
# Wood pellets have a higher energy density than wood chips. This is due to their manufacturing process,
|
|
# which compresses the wood and removes most of the moisture, making them more efficient as a fuel
|
|
'wood pellets',
|
|
]
|
|
|
|
COMPLEX_FUEL_KEYWORDS = [
|
|
'heat from boilers using biodiesel from any biomass source'
|
|
]
|
|
|
|
TARIFF_KEYWORDS = [
|
|
'unspecified tariff'
|
|
# We may come across more later but this is all observed for now
|
|
]
|
|
|
|
UNKNOWN_FUEL = "unknown"
|
|
|
|
NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK = [
|
|
'to be used only when there is no heatinghotwater system or data is from a community network',
|
|
'to be used only when there is no heatinghotwater system'
|
|
]
|
|
|
|
def __init__(self, description: str):
|
|
self.description: str = remove_punctuation(clean_description(description.lower()))
|
|
|
|
self.is_community = 'community' in self.description and 'not community' not in self.description
|
|
self.is_unknown = False
|
|
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
|
|
|
|
if not self.nodata and not any(
|
|
self._keyword_in_description(keywords)
|
|
for keywords in [
|
|
self.FUEL_KEYWORDS,
|
|
self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK,
|
|
self.TARIFF_KEYWORDS,
|
|
self.COMPLEX_FUEL_KEYWORDS
|
|
]
|
|
):
|
|
raise ValueError('Invalid description')
|
|
|
|
def _keyword_in_description(self, keywords):
|
|
return any(keyword in self.description for keyword in keywords)
|
|
|
|
def process(self) -> Dict[str, Union[str, bool]]:
|
|
|
|
if self.nodata:
|
|
result = {
|
|
"fuel_type": self.UNKNOWN_FUEL,
|
|
"tariff_type": None,
|
|
"is_community": False,
|
|
"no_individual_heating_or_community_network": False,
|
|
"complex_fuel_type": None
|
|
}
|
|
return result
|
|
|
|
result: Dict[str, Union[str, bool]] = {
|
|
"fuel_type": find_keyword(self.description, self.FUEL_KEYWORDS),
|
|
"tariff_type": find_keyword(self.description, self.TARIFF_KEYWORDS),
|
|
"is_community": self.is_community,
|
|
"no_individual_heating_or_community_network": find_keyword(
|
|
self.description, self.NO_INDIVIDUAL_HEATING_OR_COMMUNITY_NETWORK
|
|
),
|
|
"complex_fuel_type": find_keyword(self.description, self.COMPLEX_FUEL_KEYWORDS),
|
|
|
|
}
|
|
|
|
# to make this field palettable, if no_individual_heating_or_community_network is populated, we'll
|
|
# just set it to true
|
|
result["no_individual_heating_or_community_network"] = bool(
|
|
result["no_individual_heating_or_community_network"]
|
|
)
|
|
|
|
if not result["fuel_type"]:
|
|
result["fuel_type"] = self.UNKNOWN_FUEL
|
|
# We'll do checks on unknown fuel types to ensure we don't miss anything
|
|
self.is_unknown = True
|
|
|
|
return result
|