import re from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation from etl.epc_clean.utils import correct_spelling class LightingAttributes(Definitions): WELSH_TEXT = { "goleuadau ynni-isel ym mhob un ogçör mannau gosod": "low energy lighting in all fixed outlets", "goleuadau ynni-isel ym mhob un o r mannau gosod": "low energy lighting in all fixed outlets", "dim goleuadau ynni-isel": "no low energy lighting", "goleuadau ynni-isel ym mhob un o'r mannau gosod": 'Low energy lighting in all fixed outlets', "effeithlonrwydd goleuo da": 'good lighting efficiency', "effeithlonrwydd goleuo is na'r cyfartaledd": 'below average lighting efficiency', "effeithlonrwydd goleuo rhagorol": "excellent lighting efficiency" } OBSERVED_ERRORS = [] def __init__(self, description, averages): self.description: str = clean_description(description.lower()) self.welsh_translation_search() self.description = correct_spelling(self.description) self.averages = averages # We handle seemind occurances of mixed translations self.description = handle_mixed_translation(self.description) self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or ( description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting") if description == "SAP05:Lighting": self.description = description # Reset self.description def welsh_translation_search(self): """ For welsh text describing the percentage of low energy lighting, we match the regular expression and perform the translation """ lel_match = re.search(r"goleuadau ynni-isel mewn (\d+)%? ogçör mannau gosod", self.description) lel_match2 = re.search(r"goleuadau ynni-isel mewn (\d+)%? o'r mannau gosod", self.description) if lel_match is not None or lel_match2 is not None: # Perform the actual translation percentage = lel_match.group(1) if lel_match is not None else lel_match2.group(1) self.description = f"low energy lighting in {percentage}% of fixed outlets" else: translation = self.WELSH_TEXT.get(self.description) if translation: self.description = translation def process(self): description = self.description if self.nodata: return {"low_energy_proportion": None} if 'no low energy lighting' in description: return {"low_energy_proportion": 0} if "all fixed outlets" in description: return {"low_energy_proportion": 1} if "excellent lighting efficiency" in description: return {"low_energy_proportion": 1} if ('good lighting efficiency' in description) or ('excellent lighting efficiency' in description) or \ ('below average lighting efficiency' in description): average = [ x for x in self.averages if x["lighting-description"] == description ][0]["low-energy-lighting"] return { "low_energy_proportion": average } match = re.search(r'\d+', description) if match: proportion = int(match.group()) / 100.0 return { "low_energy_proportion": proportion } raise NotImplementedError("Not handled this case - investigate me")