from typing import Dict, Union from model_data.BaseUtility import BaseUtility from model_data.epc_attributes.attribute_utils import clean_description class WindowAttributes(BaseUtility): GLAZING_KEYWORDS = ["glazing", "glazed", "glaze"] GLAZING_COVERAGE = ["fully", "mostly", "partial", "some", "full", "thoughout"] GLAZING_TYPES = ["double", "triple", "secondary", "multiple", "high performance", "single"] coverage_map = { "full": "full", "fully": "full", "mostly": "most", "partial": "partial", "some": "partial", "throughout": "full" } WELSH_TEXT = { "gwydrau dwbl llawn": "full double glazing", } def __init__(self, description: str): self.description: str = clean_description(description.lower()) # In the case of an empty description, we want to return a dictionary with all values set to False # and indicate there was no data self.nodata = not description or description in self.DATA_ANOMALY_MATCHES translation = self.WELSH_TEXT.get(self.description) if translation: self.nodata = False self.description = translation if not self.nodata: if not any( rt in self.description for rt in self.GLAZING_KEYWORDS + self.GLAZING_COVERAGE + self.GLAZING_TYPES ): raise ValueError('Invalid description') def process(self) -> Dict[str, Union[str, bool]]: result: Dict[str, Union[str, bool]] = { "has_glazing": False, "glazing_coverage": None, "glazing_type": None, "no_data": self.nodata } if self.nodata: return result # We consolidate GLAZING_KEYWORDS into a single attribute result["has_glazing"] = any(keyword in self.description for keyword in self.GLAZING_KEYWORDS) # For coverage and type, we will only store the first one we find for part in self.description.split(','): part = part.strip() # remove leading/trailing white spaces if not result["glazing_coverage"]: for coverage in self.GLAZING_COVERAGE: if coverage in part: result["glazing_coverage"] = self.coverage_map[coverage] break if not result["glazing_type"]: for glazing_type in self.GLAZING_TYPES: if glazing_type in part: result["glazing_type"] = glazing_type break # If we didn't find any coverage or type, we assume full coverage if not result["glazing_coverage"]: result["glazing_coverage"] = "full" return result