Model/etl/epc_clean/epc_attributes/MainheatAttributes.py
2025-08-09 22:21:36 +01:00

225 lines
11 KiB
Python

from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, process_part, switch_chars
from typing import Dict, Union
class MainHeatAttributes(Definitions):
HEAT_SYSTEMS = [
"boiler", "air source heat pump", "room heaters", "electric storage heaters", "warm air",
"electric underfloor heating", "electric ceiling heating", "community scheme",
"ground source heat pump", "no system present", "portable electric heaters",
"water source heat pump", "electric heat pump",
# "Micro-cogeneration", also known as micro combined heat and power (micro-CHP), is a technology that
# generates heat and electricity simultaneously from the same energy source in residential or commercial
# buildings. The main output of micro-CHP systems is heat, with electricity generation as a secondary output.
"micro-cogeneration",
"solar assisted heat pump",
"exhaust source heat pump",
"community heat pump",
"hot-water-only"
]
FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite",
"dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k", "mineral and wood",
"dual fuel appliance"]
DISTRIBUTION_SYSTEMS = ["radiators", "fan coil units", "pipes in screed above insulation",
"pipes in insulated timber floor", "pipes in concrete slab"]
OTHERS = ["assumed", "electricaire", "assumed for most rooms"]
WELSH_TEXT = {
"bwyler a rheiddiaduron, nwy prif gyflenwad": "boiler and radiators, mains gas",
"st+¦r wresogyddion trydan": "electric storage heaters",
"bwyler a rheiddiaduron, olew": "boiler and radiators, oil",
"heat pumptrydan": "electric heat pump",
"bwyler a rheiddiaduron, trydan": "boiler and radiators, electric",
"bwyler a gwres dan y llawr, olew": "boiler and underfloor heating, oil",
'bwyler a rheiddiaduron, lpg': 'boiler and radiators, lpg',
"gwresogyddion ystafell, trydan": "room heaters, electric",
"pwmp gwres sygçön tarddu yn yr awyr, dan y llawr, trydan": "air source heat pump, underfloor heating, "
"electric",
"pwmp gwres sygçön tarddu yn yr awyr, dan y llawr, trydan, pwmp gwres sygçön tarddu yn yr awyr, dan y llawr, "
"trydan": "air source heat pump, underfloor heating, electric",
"cynllun cymunedol": "community scheme",
"cynllun cymunedol, heat from boilers - mains gas": "community scheme",
"bwyler a gwres dan y llawr, nwy prif gyflenwad": "boiler and underfloor heating, mains gas",
"bwyler a rheiddiaduron, logiau coed": 'boiler and radiators, wood logs',
"bwyler a rheiddiaduron, tanwydd di-fwg": "boiler and radiators, smokeless fuel",
"bwyler a rheiddiaduron, b30k": "boiler and radiators, b30k",
"bwyler a rheiddiaduron, glo": "boiler and radiators, coal",
"dim system ar gael, rhagdybir bod gwresogyddion trydan": "no system present, electric heaters assumed",
"gwresogyddion ystafell, glo carreg": "room heaters, coal",
"pwmp gwres sygçön tarddu yn yr awyr, rheiddiaduron, trydan": "air source heat pump, radiators, electric",
"gwresogyddion ystafell, nwy prif gyflenwad": "room heaters, mains gas",
"bwyler a rheiddiaduron, dau danwydd mwynau a choed": "boiler and radiators, dual fuel mineral and wood",
"gwresogyddion ystafell, dau danwydd mwynau a choed": "room heaters, dual fuel mineral and wood",
"pwmp gwres sygçön tarddu yn y ddaear, dan y llawr, trydan": "ground source heat pump, underfloor, electric",
"gwresogi dan y llawr trydan": "electric underfloor heating",
# This descripton is slightly unclear & was repeated
"st+¦r wresogyddion trydan, st+¦r wresogyddion trydan": "room heaters, electric",
"pwmp gwres sygçön tarddu yn y ddaear, rheiddiaduron, trydan": "ground source heat pump, radiators, electric",
"gwresogyddion ystafell, pelenni coed": "room heaters, wood pellets",
"gwresogyddion ystafell, glo": "room heaters, coal",
"bwyler a gwres dan y llawr, lpg": "boiler and underfloor heating, lpg",
"bwyler a gwres dan y llawr, trydan": "boiler and underfloor heating, electric",
"boiler and radiators, nwy prif gyflenwad, mains gas": "boiler and radiators, mains gas",
"bwyler a rheiddiaduron, olew, st+¦r wresogyddion trydan": "boiler and radiators, oil, electric storage "
"heaters",
"pwmp gwres sygçön tarddu yn yr awyr, awyr gynnes, trydan": "air source heat pump, warm air, electric",
"stor wresogyddion trydan": "electric storage heaters",
# Not 100% certain - the translation is "bottled gas"
"bwyler a rheiddiaduron, nwy potel": "boiler and radiators, lpg",
"gwresogyddion trydan cludadwy wedi i ragdybio ar gyfer y rhan fwyaf o r ystafelloedd": "portable electric "
"heaters assumed for "
"most rooms",
"st r wresogyddion trydan": "electric storage heaters",
"dim system ar gael, rhagdybir bod gwresogyddion trydan, trydan": "no system present, electric heaters assumed",
# Should be handled by edge cases
", trydan": ", electric",
'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas'
}
REMAP = {
"electric ceiling": "electric ceiling heating",
"electric heat pumps": "electric heat pump",
"solar-assisted heat pump": "solar assisted heat pump",
"portable electric heating": "portable electric heaters",
"portable electric heating assumed for most rooms": "portable electric heaters assumed for most rooms",
"electric storage, electric": "electric storage heaters",
"radiator heating, electric": "room heaters, electric",
"hot-water-only systems, gas": "no system present, electric heaters assumed",
"gas-fired heat pumps, electric": "air source heat pump, electric",
"radiator heating, heat from boilers - gas": "boiler and radiators, mains gas",
"heat pump, warm air, mains gas": "air source heat pump, warm air, mains gas",
"air sourceheat pump, radiators, electric": "air source heat pump, radiators, electric"
}
edge_case_result = {}
is_edge_case = False
def __init__(self, description: str):
self.description = switch_chars(description.lower())
self.description: str = clean_description(self.description).strip()
# Remove special characters
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or (
description == "SAP05:Main-Heating"
)
translation = self.WELSH_TEXT.get(self.description)
if translation:
self.nodata = False
self.description = translation
remapped = []
for term in self.description.split(", "):
remap = self.REMAP.get(term)
if remap:
remapped.append(remap)
else:
remapped.append(term)
remapped = ", ".join(remapped)
self.description = remapped
backup_remap = self.REMAP.get(self.description)
if backup_remap:
self.description = backup_remap
self.process_edge_cases()
if not self.nodata:
if (not description or not any(
rt in self.description for rt in
self.HEAT_SYSTEMS + self.FUEL_TYPES + self.DISTRIBUTION_SYSTEMS + self.OTHERS
) and not self.is_edge_case):
raise ValueError('Invalid description')
def process_edge_cases(self) -> (dict, bool):
"""
We handle some edge cases that will cause issues, for example descriptions that are missing a
heating system
:return: truple containing dictionary result, and boolean is_edge_case
"""
self.edge_case_result = {}
self.is_edge_case = False
if self.description == ", underfloor, electric":
self.edge_case_result["has_electric"] = True
self.edge_case_result['has_underfloor_heating'] = True
self.is_edge_case = True
return
if self.description == ", radiators, electric":
self.edge_case_result["has_electric"] = True
self.edge_case_result['has_radiators'] = True
self.is_edge_case = True
return
if self.description == ", underfloor":
self.edge_case_result['has_underfloor_heating'] = True
self.is_edge_case = True
return
if self.description == ", wood pellets":
self.edge_case_result['has_wood_pellets'] = True
self.is_edge_case = True
return
if self.description == ', electric':
self.edge_case_result['has_electric'] = True
self.is_edge_case = True
return
if self.description == ', mains gas':
self.edge_case_result['has_mains_gas'] = True
self.is_edge_case = True
return
if self.description == 'community, community':
self.edge_case_result['has_community_scheme'] = True
self.is_edge_case = True
return
def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS}
result.update({f'has_{hs.replace(" ", "_")}': False for hs in self.HEAT_SYSTEMS})
result.update({f'has_{ft.replace(" ", "_")}': False for ft in self.FUEL_TYPES})
result.update({f'has_{ot.replace(" ", "_")}': False for ot in self.OTHERS})
result['has_underfloor_heating'] = False
# We re-map entries that are the same
# We just drop those keys
if self.nodata:
return result
if self.is_edge_case:
result.update(self.edge_case_result)
return result
description = self.description.split(',')
# Process each part separately
for part in description:
part = part.strip() # remove leading/trailing white spaces
if not part:
continue
# Heating Systems
process_part(result, part, self.HEAT_SYSTEMS, 'has_')
# Fuel Types
process_part(result, part, self.FUEL_TYPES, 'has_')
# Distribution Systems
process_part(result, part, self.DISTRIBUTION_SYSTEMS, 'has_')
# Other epc_attributes
process_part(result, part, self.OTHERS, 'has_')
# Check for "underfloor" separately in the entire description
if "underfloor" in self.description:
result['has_underfloor_heating'] = True
return result