From b3053f8518727db1b00c2165b286988dbf5796e9 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Sep 2024 16:10:34 +0100 Subject: [PATCH] handing edge cases for epc cleaning --- etl/epc_clean/epc_attributes/HotWaterAttributes.py | 9 ++++++++- etl/epc_clean/epc_attributes/MainheatAttributes.py | 1 + etl/epc_clean/epc_attributes/RoofAttributes.py | 3 +++ etl/epc_clean/epc_attributes/WindowAttributes.py | 1 + 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py index 78ee5f7d..b0105b10 100644 --- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py +++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py @@ -127,13 +127,20 @@ class HotWaterAttributes(Definitions): "thermostat, flue gas heat recovery", "ogçör brif system, gydag ynnigçör haul, adfer gwres nwyon ffliw": "from main system, plus solar, flue gas " "heat recovery", + "o r brif system, gydag ynni r haul, dim thermostat ar y silindr": "from main system, plus solar, no cylinder " + "thermostat", } + NODATA_DESCRIPTIONS = [ + "sap05 hot-water", + "sap hot-water" + ] + def __init__(self, description: str): self.description: str = clean_description(description.lower()).strip() self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or ( - self.description == "sap05 hot-water" + self.description in self.NODATA_DESCRIPTIONS ) translation = self.WELSH_TEXT.get(self.description) diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index 430b418d..46cbf52b 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -61,6 +61,7 @@ class MainHeatAttributes(Definitions): "bwyler a rheiddiaduron, olew, st+¦r wresogyddion trydan": "boiler and radiators, oil, electric storage " "heaters", "pwmp gwres sygçön tarddu yn yr awyr, awyr gynnes, trydan": "air source heat pump, warm air, electric", + "stor wresogyddion trydan": "electric storage heaters", # Should be handled by edge cases ", trydan": ", electric", } diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py index f36d445f..75cb8af1 100644 --- a/etl/epc_clean/epc_attributes/RoofAttributes.py +++ b/etl/epc_clean/epc_attributes/RoofAttributes.py @@ -19,6 +19,8 @@ class RoofAttributes(Definitions): "ar oleddf, inswleiddio cyfyngedig": "pitched, limited insulation", "ar oleddf, wedigçöi inswleiddio wrth y trawstiau": 'pitched, insulated at rafters', "ar oleddf, wedi?i inswleiddio wrth y trawstiau": 'pitched, insulated at rafters', + "ar oleddf, wedi?i inswleiddio wrth y trawstia": 'pitched, insulated at rafters', + "ar oleddf, wedigçöi inswleiddio wrth y trawstia": 'pitched, insulated at rafters', "yn wastad, inswleiddio cyfyngedig (rhagdybiaeth)": "flat, limited insulation (assumed)", "yn wastad, inswleiddio cyfyngedig": "flat, limited insulation", "yn wastad, dim inswleiddio (rhagdybiaeth)": "flat, no insulation (assumed)", @@ -35,6 +37,7 @@ class RoofAttributes(Definitions): "ystafell(oedd) to, nenfwd wedigçöi inswleiddio": "roof room(s), ceiling insulated", "ystafell(oedd) to, dim inswleiddio (rhagdybiaeth)": "roof room(s), no insulation (assumed)", "ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation", + "to gwellt, gydag inswleiddio ychwanegol": "thatched, with additional insulation", } DEFAULT_KEYS = [ diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py index a52977e6..8c4d0c45 100644 --- a/etl/epc_clean/epc_attributes/WindowAttributes.py +++ b/etl/epc_clean/epc_attributes/WindowAttributes.py @@ -37,6 +37,7 @@ class WindowAttributes(Definitions): NO_DATA_CASES = [ "SAP05:Windows", "Solid, no insulation (assumed)", # A description typically associated with floors, not windows + "Suspended, no insulation (assumed)", # A description typically associated with floors, not windows ] def __init__(self, description: str):