import re from typing import Dict, Union from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, extract_thermal_transmittance, handle_mixed_translation ) class WallAttributes(Definitions): WALL_TYPES = [ "cavity wall", "filled cavity", "solid brick", "system built", "timber frame", "granite or whinstone", "as built", "cob", "assumed", "sandstone or limestone", "park home", ] WELSH_TEXT = { "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation " "(assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, " "partial insulation (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation", "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation" " (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation", "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, " "no insulation (assumed)", "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation", "Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity", "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, " "insulated (assumed)", "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated", "Waliau ceudod, fel y’u hadeiladwyd, wedi’u hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated (" "assumed)", "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, " "as built, no insulation (" "assumed)", "Waliau ceudod,": "Cavity wall, as built, no insulation", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, " "insulated (assumed)", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated", "Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation", "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, " "as built, " "no insulation (" "assumed)", "Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation", "Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation", "Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, " "partial insulation (assumed)", "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System " "built, " "as built, " "insulated (" "assumed)", "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, " "insulated", "WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation", "Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation", "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, " "as built, " "partial " "insulation (" "assumed)", "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, " "partial insulation", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation " "(assumed)", "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation", "Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation", "Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation", "Briciau solet, gydag inswleiddio allanol": "Solid brick, with external insulation", # Add in some corrections: "Co with external insulation": "Cob, with external insulation", "Cowith external insulation": "Cob, with external insulation", } DEFAULT_KEYS = [ "thermal_transmittance", "thermal_transmittance_unit", "is_cavity_wall", "is_filled_cavity", "is_solid_brick", "is_system_built", "is_timber_frame", "is_granite_or_whinstone", "is_as_built", "is_cob", "is_assumed", "is_sandstone_or_limestone", "insulation_thickness", "external_insulation", "internal_insulation", ] CORRECTIONS = { "Granite or whin, as built, no insulation (assumed)": "Granite or whinstone, as built, no insulation (assumed)", } def __init__(self, description: str): """ :param description: Description of the walls. """ self.description: str = description if self.description in self.CORRECTIONS: self.description = self.CORRECTIONS[self.description] self.welsh_translation_search() # We handle seemind occurances of mixed translations self.description = handle_mixed_translation(self.description) self.nodata = not description or description in self.DATA_ANOMALY_MATCHES def welsh_translation_search(self): """ For some descriptions, we need to translate from Welsh to English :return: """ uvalue_search = re.search( r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description ) if uvalue_search: uvalue = uvalue_search.group(1) self.description = f"Average thermal transmittance {uvalue} W/m-¦K" else: translation = self.WELSH_TEXT.get(self.description) if translation: self.nodata = False self.description = translation def process(self) -> Dict[str, Union[float, str, bool, None]]: result: Dict[str, Union[float, str, bool, None]] = {} if self.nodata: for key in self.DEFAULT_KEYS: result[key] = False result["thermal_transmittance"] = None result["thermal_transmittance_unit"] = None result["insulation_thickness"] = "none" result["is_park_home"] = False return result description = self.description.lower() # thermal transmittance - it can be negative which is errneous however we'll still pull it out result, description = extract_thermal_transmittance(result, description) # wall type result, description = extract_component_types( result, description, list_of_components=self.WALL_TYPES ) # Handle some edge cases if "sandstone" in description and not result["is_sandstone_or_limestone"]: result["is_sandstone_or_limestone"] = True # insulation thickness - this is far from a perfect approach and we'd likely need to use nlp to do this # generally however this is sufficient for mvp thickness_map = { "external insulation": "average", "internal insulation": "average", "partial insulation": "below average", "no insulation": "none", "additional insulation": "above average", "insulated": "average", } for key, value in thickness_map.items(): if key in description: result["insulation_thickness"] = value break else: result["insulation_thickness"] = None # insulation type result["external_insulation"] = "external insulation" in description result["internal_insulation"] = "internal insulation" in description if result["is_filled_cavity"]: # If it has a filled cavity + internal/external insulation, it's deemed to have above average insulation if result["external_insulation"]: result["insulation_thickness"] = "above average" elif result["internal_insulation"]: result["insulation_thickness"] = "above average" else: result["insulation_thickness"] = "average" if ( result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average") ): result["is_filled_cavity"] = True return result