import re from typing import Dict, Union from model_data.BaseUtility import Definitions from model_data.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance class RoofAttributes(Definitions): ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed'] DWELLING_ABOVE = ["another dwelling above", "other premises above"] WELSH_TEXT = { "ar oleddf, dim inswleiddio": "pitched, no insulation", "ar oleddf, dim inswleiddio (rhagdybiaeth)": "pitched, no insulation (assumed)", "ar oleddf, wedigçöi inswleiddio (rhagdybiaeth)": "pitched, insulated (assumed)", "ar oleddf, wedigçöi inswleiddio": "pitched, insulated", "ar oleddf, inswleiddio cyfyngedig (rhagdybiaeth)": "pitched, limited insulation (assumed)", "ar oleddf, inswleiddio cyfyngedig": "pitched, limited insulation", "ar oleddf, wedigçöi inswleiddio wrth y trawstiau": 'pitched, insulated at rafters', "yn wastad, inswleiddio cyfyngedig (rhagdybiaeth)": "flat, limited insulation (assumed)", "yn wastad, inswleiddio cyfyngedig": "flat, limited insulation", "yn wastad, dim inswleiddio (rhagdybiaeth)": "flat, no insulation (assumed)", "yn wastad, dim inswleiddio": "flat, no insulation", "yn wastad, wedigçöi inswleiddio (rhagdybiaeth)": "flat, insulated (assumed)", "yn wastad, wedigçöi inswleiddio": "flat, insulated", "(eiddo arall uwchben)": "(another dwelling above)", "(annedd arall uwchben)": "(another dwelling above)", "ystafell(oedd) to, wedigçöi hinswleiddio": "roof room(s), insulated", "ystafell(oedd) to, wedigçöi hinswleiddio (rhagdybiaeth)": "roof room(s), insulated (assumed)", "ystafell(oedd) to, inswleiddio cyfyngedig (rhagdybiaeth)": "roof room(s), limited insulation (assumed)", "ystafell(oedd) to, inswleiddio cyfyngedig": "roof room(s), limited insulation", "ystafell(oedd) to, nenfwd wedigçöi inswleiddio": "roof room(s), ceiling insulated", "ystafell(oedd) to, dim inswleiddio (rhagdybiaeth)": "roof room(s), no insulation (assumed)", "ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation", } def __init__(self, description: str): """ :param description: Description of the roof. """ self.description: str = description.lower().strip() self.nodata = not description or description in self.DATA_ANOMALY_MATCHES self.welsh_translation_search() if not self.nodata and not any( rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"] ): raise ValueError('Invalid description') def welsh_translation_search(self): """ For some descriptions, we want to translate, however they have a consistent structure, where the only change is the thickness of insulation. Instead of manually adding a record for each translation, we search for regular expressions and translate """ loft_insulation_thickness_match = re.search(r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft", self.description) loft_insulation_thickness_match2 = re.search(r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", self.description) loft_insulation_thickness_match3 = re.search(r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof", self.description) uvalue_search = re.search(r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description) uvalue_search2 = re.search( r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k', self.description, re.IGNORECASE ) # Step 2: Generalized translation with placeholder if (loft_insulation_thickness_match is not None) | \ (loft_insulation_thickness_match2 is not None) | \ (loft_insulation_thickness_match3 is not None): if loft_insulation_thickness_match is not None: insulation_thickness = loft_insulation_thickness_match.group(1) elif loft_insulation_thickness_match2 is not None: insulation_thickness = loft_insulation_thickness_match2.group(1) else: insulation_thickness = loft_insulation_thickness_match3.group(1) self.description = f"pitched, {insulation_thickness} loft insulation" elif uvalue_search is not None or uvalue_search2 is not None: if uvalue_search is not None: uvalue = uvalue_search.group(1) else: uvalue = uvalue_search2.group(1) self.description = f"average thermal transmittance {uvalue} W/m-¦K" else: translation = self.WELSH_TEXT.get(self.description) if translation: self.nodata = False self.description = translation def process(self) -> Dict[str, Union[float, str, bool, None]]: result: Dict[str, Union[float, str, bool, None]] = {} if self.nodata: return result description = self.description # thermal transmittance result, description = extract_thermal_transmittance(result, description) # roof type result, description = extract_component_types(result, description, list_of_components=self.ROOF_TYPES) result["has_dwelling_above"] = ( "another dwelling above" in description or "other premises above" in description ) for dwelling_above in self.DWELLING_ABOVE: description = description.replace(dwelling_above, "") result["is_valid"] = "invalid" not in description description = description.replace("invalid", "") # insulation thickness thickness_map = { "ceiling insulated": "average", "insulated": "average", "limited": "below average", "no insulation": "none", "limited insulation": "below average", "additional insulation": "above average", } for key, value in thickness_map.items(): if key in description: result['insulation_thickness'] = value # Remove the match from the description # description = description.replace(key, "") break else: # Extract insulation thickness in mm, if present match = re.search(r'(\d+\+?)\s*mm', description) if match: result['insulation_thickness'] = match.group(1) if "insulation_thickness" not in result: result['insulation_thickness'] = None return result