import re from typing import Dict, Union from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, extract_thermal_transmittance, handle_mixed_translation ) class RoofAttributes(Definitions): ROOF_TYPES = [ "pitched", "roof room", "loft", "flat", "thatched", "at rafters", "assumed", ] DWELLING_ABOVE = [ "another dwelling above", "other premises above", "other dwelling above", "(same dwelling above)", ] WELSH_TEXT = { "ar oleddf, dim inswleiddio": "pitched, no insulation", "ar oleddf, dim inswleiddio (rhagdybiaeth)": "pitched, no insulation (assumed)", "ar oleddf, wedigçöi inswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)", "ar oleddf, wedi?i inswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)", "ar oleddf, wedigçöi hinswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)", "ar oleddf, wedigçöi inswleiddio": "pitched, insulated", "ar oleddf, wedi?i inswleiddio": "pitched, insulated", "ar oleddf, inswleiddio cyfyngedig (rhagdybiaeth)": "pitched, limited insulation (assumed)", "ar oleddf, inswleiddio cyfyngedig": "pitched, limited insulation", "ar oleddf, wedigçöi inswleiddio wrth y trawstiau": "pitched, insulated at rafters", "ar oleddf, wedi?i inswleiddio wrth y trawstiau": "pitched, insulated at rafters", "ar oleddf, wedi?i inswleiddio wrth y trawstia": "pitched, insulated at rafters", "ar oleddf, wedigçöi inswleiddio wrth y trawstia": "pitched, insulated at rafters", "yn wastad, inswleiddio cyfyngedig (rhagdybiaeth)": "flat, limited insulation (assumed)", "yn wastad, inswleiddio cyfyngedig": "flat, limited insulation", "yn wastad, dim inswleiddio (rhagdybiaeth)": "flat, no insulation (assumed)", "yn wastad, dim inswleiddio": "flat, no insulation", "yn wastad, wedigçöi inswleiddio (rhagdybiaeth)": "flat, insulated (assumed)", "yn wastad, wedi?i hinswleiddio (rhagdybiaeth)": "flat, insulated (assumed)", "yn wastad, wedigçöi inswleiddio": "flat, insulated", "(eiddo arall uwchben)": "(another dwelling above)", "(annedd arall uwchben)": "(another dwelling above)", "ystafell(oedd) to, wedigçöi hinswleiddio": "roof room(s), insulated", "ystafell(oedd) to, wedi?i hinswleiddio (rhagdybiaeth)": "roof room(s), insulated (assumed)", "ystafell(oedd) to, wedigçöi hinswleiddio (rhagdybiaeth)": "roof room(s), insulated (assumed)", "ystafell(oedd) to, inswleiddio cyfyngedig (rhagdybiaeth)": "roof room(s), limited insulation (assumed)", "ystafell(oedd) to, inswleiddio cyfyngedig": "roof room(s), limited insulation", "ystafell(oedd) to, nenfwd wedigçöi inswleiddio": "roof room(s), ceiling insulated", "ystafell(oedd) to, dim inswleiddio (rhagdybiaeth)": "roof room(s), no insulation (assumed)", "ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation", "to gwellt, gydag inswleiddio ychwanegol": "thatched, with additional insulation", } DEFAULT_KEYS = [ "thermal_transmittance", "thermal_transmittance_unit", "is_pitched", "is_roof_room", "is_loft", "is_flat", "is_thatched", "is_at_rafters", "is_assumed", "has_dwelling_above", "is_valid", "insulation_thickness", ] NODATA_NULLS = ["insulation_thickness", "thermal_transmittance", "thermal_transmittance_unit"] def __init__(self, description: str): """ :param description: Description of the roof. """ self.description: str = description.lower().strip() # We handle seemind occurances of mixed translations self.description = handle_mixed_translation(self.description) self.nodata = ( not description or description in self.DATA_ANOMALY_MATCHES or self.description == "sap05:roof" ) self.welsh_translation_search() if not self.nodata and not any( rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"] ): raise ValueError("Invalid description") def welsh_translation_search(self): """ For some descriptions, we want to translate, however they have a consistent structure, where the only change is the thickness of insulation. Instead of manually adding a record for each translation, we search for regular expressions and translate """ loft_insulation_regexes = [ r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft", r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof", r"ar oleddf, (\d+mm) o inswleiddio yn y llofft", r"ar oleddf, (\d+\+ mm) o inswleiddio yn y llofft", ] li_thickness_match = None for regex in loft_insulation_regexes: li_thickness_match = re.search(regex, self.description) if li_thickness_match: break uvalue_search = re.search( r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description, ) uvalue_search2 = re.search( r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k", self.description, re.IGNORECASE, ) # Step 2: Generalized translation with placeholder if li_thickness_match is not None: insulation_thickness = li_thickness_match.group(1) self.description = f"pitched, {insulation_thickness} loft insulation" elif uvalue_search is not None or uvalue_search2 is not None: if uvalue_search is not None: uvalue = uvalue_search.group(1) else: uvalue = uvalue_search2.group(1) self.description = f"average thermal transmittance {uvalue} W/m-¦K" else: translation = self.WELSH_TEXT.get(self.description) if translation: self.nodata = False self.description = translation def process(self) -> Dict[str, Union[float, str, bool, None]]: result: Dict[str, Union[float, str, bool, None]] = {} if self.nodata: for key in self.DEFAULT_KEYS: result[key] = False # Insulation thickness, thermal transmittance and thermal transmittance unit are set to None for nodata # cases for k in self.NODATA_NULLS: result[k] = None return result description = self.description # thermal transmittance result, description = extract_thermal_transmittance(result, description) # roof type result, description = extract_component_types( result, description, list_of_components=self.ROOF_TYPES ) result["has_dwelling_above"] = any( [x in description for x in self.DWELLING_ABOVE] ) for dwelling_above in self.DWELLING_ABOVE: description = description.replace(dwelling_above, "") result["is_valid"] = "invalid" not in description description = description.replace("invalid", "") # We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm if result["is_pitched"] or result["is_loft"]: # Search for a regular expression that matches 150 insulation match = re.search(r"(\d+\+?)\s*insulation", description) if match: result["insulation_thickness"] = match.group(1) # insulation thickness thickness_map = { "ceiling insulated": "average", "insulated": "average", "limited": "below average", "no insulation": "none", "limited insulation": "below average", "additional insulation": "above average", } for key, value in thickness_map.items(): if key in description: result["insulation_thickness"] = value # Remove the match from the description # description = description.replace(key, "") break # Extract insulation thickness in mm, if present match = re.search(r"(\d+\+?)\s*mm", description) if match: result["insulation_thickness"] = match.group(1) if "insulation_thickness" not in result: result["insulation_thickness"] = None if result["has_dwelling_above"]: result["thermal_transmittance"] = 0 result["thermal_transmittance_unit"] = "w/m-¦k" return result