diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py index e9139510..a52977e6 100644 --- a/etl/epc_clean/epc_attributes/WindowAttributes.py +++ b/etl/epc_clean/epc_attributes/WindowAttributes.py @@ -33,12 +33,18 @@ class WindowAttributes(Definitions): "gwydrau lluosog ym mhobman": "multiple glazing throughout", } + # These are observed data anomalies that we want to ignore + NO_DATA_CASES = [ + "SAP05:Windows", + "Solid, no insulation (assumed)", # A description typically associated with floors, not windows + ] + def __init__(self, description: str): self.description: str = clean_description(description.lower()) # In the case of an empty description, we want to return a dictionary with all values set to False # and indicate there was no data - self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description == "SAP05:Windows" + self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description in self.NO_DATA_CASES translation = self.WELSH_TEXT.get(self.description) if translation: