Model/etl/epc_clean/epc_attributes/WallAttributes.py
2025-11-30 18:16:09 +00:00

202 lines
11 KiB
Python

import re
from typing import Dict, Union
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import (
extract_component_types,
extract_thermal_transmittance,
handle_mixed_translation
)
class WallAttributes(Definitions):
WALL_TYPES = [
"cavity wall",
"filled cavity",
"solid brick",
"system built",
"timber frame",
"granite or whinstone",
"as built",
"cob",
"assumed",
"sandstone or limestone",
"park home",
]
WELSH_TEXT = {
"Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation "
"(assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, "
"partial insulation (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation"
" (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation",
"Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, "
"no insulation (assumed)",
"Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation",
"Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, "
"insulated (assumed)",
"Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated",
"Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, "
"as built, no insulation ("
"assumed)",
"Waliau ceudod,": "Cavity wall, as built, no insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, "
"insulated (assumed)",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated",
"Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, "
"as built, "
"no insulation ("
"assumed)",
"Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation",
"Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation",
"Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, "
"partial insulation (assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System "
"built, "
"as built, "
"insulated ("
"assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, "
"insulated",
"WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation",
"Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, "
"as built, "
"partial "
"insulation ("
"assumed)",
"WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, "
"partial insulation",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation "
"(assumed)",
"Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation",
"Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation",
"Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation",
"Briciau solet, gydag inswleiddio allanol": "Solid brick, with external insulation",
# Add in some corrections:
"Co with external insulation": "Cob, with external insulation",
"Cowith external insulation": "Cob, with external insulation",
}
DEFAULT_KEYS = [
"thermal_transmittance",
"thermal_transmittance_unit",
"is_cavity_wall",
"is_filled_cavity",
"is_solid_brick",
"is_system_built",
"is_timber_frame",
"is_granite_or_whinstone",
"is_as_built",
"is_cob",
"is_assumed",
"is_sandstone_or_limestone",
"insulation_thickness",
"external_insulation",
"internal_insulation",
]
CORRECTIONS = {
"Granite or whin, as built, no insulation (assumed)": "Granite or whinstone, as built, no insulation (assumed)",
}
def __init__(self, description: str):
"""
:param description: Description of the walls.
"""
self.description: str = description
if self.description in self.CORRECTIONS:
self.description = self.CORRECTIONS[self.description]
self.welsh_translation_search()
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
def welsh_translation_search(self):
"""
For some descriptions, we need to translate from Welsh to English
:return:
"""
uvalue_search = re.search(
r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description
)
if uvalue_search:
uvalue = uvalue_search.group(1)
self.description = f"Average thermal transmittance {uvalue} W/m-¦K"
else:
translation = self.WELSH_TEXT.get(self.description)
if translation:
self.nodata = False
self.description = translation
def process(self) -> Dict[str, Union[float, str, bool, None]]:
result: Dict[str, Union[float, str, bool, None]] = {}
if self.nodata:
for key in self.DEFAULT_KEYS:
result[key] = False
return result
description = self.description.lower()
# thermal transmittance - it can be negative which is errneous however we'll still pull it out
result, description = extract_thermal_transmittance(result, description)
# wall type
result, description = extract_component_types(
result, description, list_of_components=self.WALL_TYPES
)
# Handle some edge cases
if "sandstone" in description and not result["is_sandstone_or_limestone"]:
result["is_sandstone_or_limestone"] = True
# insulation thickness - this is far from a perfect approach and we'd likely need to use nlp to do this
# generally however this is sufficient for mvp
thickness_map = {
"external insulation": "average",
"internal insulation": "average",
"partial insulation": "below average",
"no insulation": "none",
"additional insulation": "above average",
"insulated": "average",
}
for key, value in thickness_map.items():
if key in description:
result["insulation_thickness"] = value
break
else:
result["insulation_thickness"] = None
# insulation type
result["external_insulation"] = "external insulation" in description
result["internal_insulation"] = "internal insulation" in description
if result["is_filled_cavity"]:
# If it has a filled cavity + internal/external insulation, it's deemed to have above average insulation
if result["external_insulation"]:
result["insulation_thickness"] = "above average"
elif result["internal_insulation"]:
result["insulation_thickness"] = "above average"
else:
result["insulation_thickness"] = "average"
if (
result["is_cavity_wall"]
& result["is_as_built"]
& (result["insulation_thickness"] == "average")
):
result["is_filled_cavity"] = True
return result