Model/etl/epc_clean/epc_attributes/LightingAttributes.py
2025-11-30 18:16:09 +00:00

88 lines
3.5 KiB
Python

import re
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation
from etl.epc_clean.utils import correct_spelling
class LightingAttributes(Definitions):
WELSH_TEXT = {
"goleuadau ynni-isel ym mhob un ogçör mannau gosod": "low energy lighting in all fixed outlets",
"goleuadau ynni-isel ym mhob un o r mannau gosod": "low energy lighting in all fixed outlets",
"dim goleuadau ynni-isel": "no low energy lighting",
"goleuadau ynni-isel ym mhob un o'r mannau gosod": 'Low energy lighting in all fixed outlets',
"effeithlonrwydd goleuo da": 'good lighting efficiency',
"effeithlonrwydd goleuo is na'r cyfartaledd": 'below average lighting efficiency',
"effeithlonrwydd goleuo rhagorol": "excellent lighting efficiency"
}
OBSERVED_ERRORS = []
def __init__(self, description, averages):
self.description: str = clean_description(description.lower())
self.welsh_translation_search()
self.description = correct_spelling(self.description)
self.averages = averages
# We handle seemind occurances of mixed translations
self.description = handle_mixed_translation(self.description)
self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or (
description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting")
if description == "SAP05:Lighting":
self.description = description # Reset self.description
def welsh_translation_search(self):
"""
For welsh text describing the percentage of low energy lighting, we match the regular
expression and perform the translation
"""
lel_match = re.search(r"goleuadau ynni-isel mewn (\d+)%? ogçör mannau gosod", self.description)
lel_match2 = re.search(r"goleuadau ynni-isel mewn (\d+)%? o'r mannau gosod", self.description)
if lel_match is not None or lel_match2 is not None:
# Perform the actual translation
percentage = lel_match.group(1) if lel_match is not None else lel_match2.group(1)
self.description = f"low energy lighting in {percentage}% of fixed outlets"
else:
translation = self.WELSH_TEXT.get(self.description)
if translation:
self.description = translation
def process(self):
description = self.description
if self.nodata:
return {"low_energy_proportion": None}
if 'no low energy lighting' in description:
return {"low_energy_proportion": 0}
if "all fixed outlets" in description:
return {"low_energy_proportion": 1}
if "excellent lighting efficiency" in description:
return {"low_energy_proportion": 1}
if ('good lighting efficiency' in description) or ('excellent lighting efficiency' in description) or \
('below average lighting efficiency' in description):
average = [
x for x in self.averages if x["lighting-description"] == description
][0]["low-energy-lighting"]
return {
"low_energy_proportion": average
}
match = re.search(r'\d+', description)
if match:
proportion = int(match.group()) / 100.0
return {
"low_energy_proportion": proportion
}
raise NotImplementedError("Not handled this case - investigate me")