From a27d664a2f3ed141473c759c54df112142ad07cc Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Sep 2024 11:24:55 +0100 Subject: [PATCH] Handling windows cleaning edge case --- etl/epc_clean/epc_attributes/WindowAttributes.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py index e9139510..a52977e6 100644 --- a/etl/epc_clean/epc_attributes/WindowAttributes.py +++ b/etl/epc_clean/epc_attributes/WindowAttributes.py @@ -33,12 +33,18 @@ class WindowAttributes(Definitions): "gwydrau lluosog ym mhobman": "multiple glazing throughout", } + # These are observed data anomalies that we want to ignore + NO_DATA_CASES = [ + "SAP05:Windows", + "Solid, no insulation (assumed)", # A description typically associated with floors, not windows + ] + def __init__(self, description: str): self.description: str = clean_description(description.lower()) # In the case of an empty description, we want to return a dictionary with all values set to False # and indicate there was no data - self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description == "SAP05:Windows" + self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description in self.NO_DATA_CASES translation = self.WELSH_TEXT.get(self.description) if translation: