import re from typing import Dict, Union from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_thermal_transmittance, extract_component_types, handle_mixed_translation ) class FloorAttributes(Definitions): DWELLING_BELOW = ["another dwelling below", "other premises below"] FLOOR_TYPES = [ "assumed", "to unheated space", "to external air", "suspended", "solid", ] # For the short term, while we are still exploring the data, we maintain a list of error cases which # we want to ignore and consider as no data. OBSERVED_ERRORS = ["Conservatory", "insulated", "Basement"] WELSH_TEXT = { "(anheddiad arall islaw)": "(another dwelling below)", "solet, dim inswleiddio (rhagdybiaeth)": "solid, no insulation (assumed)", "solet, dim inswleiddio": "solid, no insulation)", "crog, dim inswleiddio (rhagdybiaeth)": "suspended, no insulation (assumed)", "crog, dim inswleiddio": "suspended, no insulation", "(eiddo arall islaw)": "(other premises below)", "solet, inswleiddio cyfyngedig (rhagdybiaeth)": "solid, limited insulation (assumed)", "solet, inswleiddio cyfyngedig": "solid, limited insulation", "crog, wedigçöi inswleiddio (rhagdybiaeth)": "suspended, insulated (assumed)", "crog, wedigçöi inswleiddio": "suspended, insulated", "igçör awyr y tu allan, dim inswleiddio (rhagdybiaeth)": "to external air, no insulation (assumed)", "igçör awyr y tu allan, dim inswleiddio": "to external air, no insulation", "i ofod heb ei wresogi, wedigçöi inswleiddio (rhagdybiaeth)": "to unheated space, insulated (assumed)", "i ofod heb ei wresogi, wedigçöi inswleiddio": "to unheated space, insulated", "solet, wedigçöi inswleiddio (rhagdybiaeth)": "solid, insulated (assumed)", "solet, wedigçöi inswleiddio": "solid, insulated", "solet, wedi???i inswleiddio (rhagdybiaeth)": "solid, insulated (assumed)", "i ofod heb ei wresogi, dim inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)", "i ofod heb ei wresogi, heb ei inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)", "i ofod heb ei wresogi, dim inswleiddio": "to unheated space, no insulation", "igçör awyr y tu allan, wedigçöi inswleiddio (rhagdybiaeth)": "to external air, insulated (assumed)", "crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)", } REMAP = { # Have only seen this once - though perhaps need to investigate older EPCs in the production of EPC clean. # When looking at a newer EPC, which had been re-assessed as another dwelling below "above unheated space or full exposed": "(another dwelling below)", } def __init__(self, description: str): self.description: str = description.lower() self.nodata = ( (not description) or (description in self.DATA_ANOMALY_MATCHES) or (description in self.OBSERVED_ERRORS) or (self.description == "sap05:floor") or not self.description ) # Try and perform a translation, incase it's in welsh self.translate_welsh_text() # Remap known issues if self.description in self.REMAP: self.description = self.REMAP[self.description] # We handle seemind occurances of mixed translations self.description = handle_mixed_translation(self.description) if not self.nodata and not any( rt in self.description for rt in self.FLOOR_TYPES + self.DWELLING_BELOW + ["average thermal transmittance"] ): raise ValueError("Invalid description") def translate_welsh_text(self): uvalue_match = re.search( r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description, ) uvalue_match2 = re.search( r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k", self.description, ) # Step 2: Generalized translation with placeholder if uvalue_match is not None or uvalue_match2 is not None: if uvalue_match is not None: uvalue = uvalue_match.group(1) else: uvalue = uvalue_match2.group(1) self.description = f"average thermal transmittance {uvalue} w/m-¦k" else: translation = self.WELSH_TEXT.get(self.description) if translation: self.nodata = False self.description = translation def process(self) -> Dict[str, Union[str, bool, int, None]]: if self.nodata: return { 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False, 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'another_property_below': False, 'insulation_thickness': 'none', 'no_data': True } result: Dict[str, Union[float, str, bool, None]] = {} description = self.description # thermal transmittance result, description = extract_thermal_transmittance(result, description) # floor type result, description = extract_component_types( result, description, list_of_components=self.FLOOR_TYPES ) # check if there is another dwelling below result["another_property_below"] = ( "(another dwelling below)" in description or "(other premises below)" in description or "another dwelling below" in description or "other premises below" in description ) thickness_map = { "external insulation": "average", "internal insulation": "average", "limited insulation": "below average", "partial insulation": "below average", "no insulation": "none", "additional insulation": "above average", "insulated": "average", } for key, value in thickness_map.items(): if key in description: result["insulation_thickness"] = value break else: result["insulation_thickness"] = None if result["another_property_below"]: result["thermal_transmittance"] = 0 result["thermal_transmittance_unit"] = "w/m-¦k" return result