import re from typing import Dict, Union from model_data.BaseUtility import Definitions from model_data.epc_attributes.attribute_utils import extract_thermal_transmittance, extract_component_types class FloorAttributes(Definitions): DWELLING_BELOW = ["another dwelling below", "other premises below"] FLOOR_TYPES = ["assumed", "to unheated space", "to external air", "suspended", "solid"] # For the short term, while we are still exploring the data, we maintain a list of error cases which # we want to ignore and consider as no data. OBSERVED_ERRORS = ["Conservatory"] WELSH_TEXT = { "(anheddiad arall islaw)": "(another dwelling below)", "solet, dim inswleiddio (rhagdybiaeth)": "solid, no insulation (assumed)", "solet, dim inswleiddio": "solid, no insulation)", "crog, dim inswleiddio (rhagdybiaeth)": "suspended, no insulation (assumed)", "crog, dim inswleiddio": "suspended, no insulation", "(eiddo arall islaw)": "(other premises below)", "solet, inswleiddio cyfyngedig (rhagdybiaeth)": "solid, limited insulation (assumed)", "solet, inswleiddio cyfyngedig": "solid, limited insulation", "crog, wedigçöi inswleiddio (rhagdybiaeth)": "suspended, insulated (assumed)", "crog, wedigçöi inswleiddio": "suspended, insulated", "igçör awyr y tu allan, dim inswleiddio (rhagdybiaeth)": "to external air, no insulation (assumed)", "igçör awyr y tu allan, dim inswleiddio": "to external air, no insulation", "i ofod heb ei wresogi, wedigçöi inswleiddio (rhagdybiaeth)": "to unheated space, insulated (assumed)", "i ofod heb ei wresogi, wedigçöi inswleiddio": "to unheated space, insulated", "solet, wedigçöi inswleiddio (rhagdybiaeth)": "solid, insulated (assumed)", "solet, wedigçöi inswleiddio": "solid, insulated", "i ofod heb ei wresogi, dim inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)", "i ofod heb ei wresogi, dim inswleiddio": "to unheated space, no insulation" } def __init__(self, description: str): self.description: str = description.lower() self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or ( description in self.OBSERVED_ERRORS) # Try and perform a translation, incase it's in welsh self.translate_welsh_text() if not self.nodata and not any( rt in self.description for rt in self.FLOOR_TYPES + self.DWELLING_BELOW + ["average thermal transmittance"] ): raise ValueError('Invalid description') def translate_welsh_text(self): uvalue_match = re.search( r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k', self.description ) # Step 2: Generalized translation with placeholder if uvalue_match: uvalue = uvalue_match.group(1) self.description = f"average thermal transmittance {uvalue} W/m-¦K" else: translation = self.WELSH_TEXT.get(self.description) if translation: self.nodata = False self.description = translation def process(self) -> Dict[str, Union[str, bool, int, None]]: if self.nodata: return {"no_data": True} result: Dict[str, Union[float, str, bool, None]] = {} description = self.description # thermal transmittance result, description = extract_thermal_transmittance(result, description) # floor type result, description = extract_component_types(result, description, list_of_components=self.FLOOR_TYPES) # check if there is another dwelling below result['another_property_below'] = "(another dwelling below)" in description or "(other premises below)" in \ description thickness_map = { "external insulation": "average", "internal insulation": "average", "limited insulation": "below average", "partial insulation": "below average", "no insulation": "none", "additional insulation": "above average", "insulated": "average" } for key, value in thickness_map.items(): if key in description: result['insulation_thickness'] = value break else: result['insulation_thickness'] = None return result