From 4edf4ceedb69e156c3ade26d3e8079826868544a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 7 Sep 2023 15:50:30 +0300 Subject: [PATCH] Debugging roof and lighting welsh translations --- .../epc_attributes/LightingAttributes.py | 3 ++- model_data/epc_attributes/RoofAttributes.py | 26 ++++++++++++++++--- .../test_lighting_attributes_cases.py | 1 + 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/model_data/epc_attributes/LightingAttributes.py b/model_data/epc_attributes/LightingAttributes.py index 452caa7a..05e2edd8 100644 --- a/model_data/epc_attributes/LightingAttributes.py +++ b/model_data/epc_attributes/LightingAttributes.py @@ -5,7 +5,8 @@ from model_data.utils import correct_spelling class LightingAttributes: WELSH_TEXT = { - "goleuadau ynni-isel ym mhob un ogçör mannau gosod": "low energy lighting in all fixed outlets" + "goleuadau ynni-isel ym mhob un ogçör mannau gosod": "low energy lighting in all fixed outlets", + "dim goleuadau ynni-isel": "no low energy lighting" } def __init__(self, description, averages): diff --git a/model_data/epc_attributes/RoofAttributes.py b/model_data/epc_attributes/RoofAttributes.py index db7321f8..7f12fe7c 100644 --- a/model_data/epc_attributes/RoofAttributes.py +++ b/model_data/epc_attributes/RoofAttributes.py @@ -22,16 +22,34 @@ class RoofAttributes(Definitions): self.description: str = description.lower() self.nodata = not description or description in self.DATA_ANOMALY_MATCHES - translation = self.WELSH_TEXT.get(self.description) - if translation: - self.nodata = False - self.description = translation + self.welsh_translation_search() if not self.nodata and not any( rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"] ): raise ValueError('Invalid description') + def welsh_translation_search(self): + """ + For some descriptions, + we want to translate, however they have a consistent structure, where the only change + is the thickness of insulation. Instead of manually adding a record for each translation, we + search for regular expressions and translate + """ + + insulation_thickness_match = re.search(r"(\d+ mm) o inswleiddio yn y llofft", self.description) + + # Step 2: Generalized translation with placeholder + if insulation_thickness_match: + insulation_thickness = insulation_thickness_match.group(1) + self.description = self.description.replace(insulation_thickness_match.group(0), "") + self.description = f"pitched, {insulation_thickness} loft insulation" + else: + translation = self.WELSH_TEXT.get(self.description) + if translation: + self.nodata = False + self.description = translation + def process(self) -> Dict[str, Union[float, str, bool, None]]: result: Dict[str, Union[float, str, bool, None]] = {} diff --git a/model_data/tests/test_data/test_lighting_attributes_cases.py b/model_data/tests/test_data/test_lighting_attributes_cases.py index d9e3f01f..d8e35ee0 100644 --- a/model_data/tests/test_data/test_lighting_attributes_cases.py +++ b/model_data/tests/test_data/test_lighting_attributes_cases.py @@ -32,4 +32,5 @@ test_cases = [ {'original_description': 'No Low energy lighting', 'low_energy_proportion': 0}, {'original_description': 'Goleuadau ynni-isel mewn 60% oGÇÖr mannau gosod', 'low_energy_proportion': 0.6}, {'original_description': 'Goleuadau ynni-isel ym mhob un oGÇÖr mannau gosod', 'low_energy_proportion': 1}, + {'original_description': 'Dim goleuadau ynni-isel', 'low_energy_proportion': 0}, ]