From cf5f69d6f09b53d7a505cd436c7daab2adf5e517 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 24 Sep 2024 18:31:50 +0100 Subject: [PATCH] debugging cleaning class for examples that hadn't been covered previously --- etl/epc_clean/app.py | 3 ++- etl/epc_clean/epc_attributes/MainheatAttributes.py | 6 ++++++ etl/epc_clean/epc_attributes/MainheatControlAttributes.py | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index 1d833b72..1dfdd452 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -44,7 +44,8 @@ def app(): # Rename the columns to the same format as the api returns data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold - data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE] + # For this cleaning dataset, let's try and use all EPCs + # data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE] # Convert to list of dictioaries as returned by the api data = data.to_dict("records") diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index 56115dca..a7b4305e 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -66,6 +66,7 @@ class MainHeatAttributes(Definitions): "electric heat pumps": "electric heat pump", "solar-assisted heat pump": "solar assisted heat pump", "portable electric heating": "portable electric heaters", + "portable electric heating assumed for most rooms": "portable electric heaters assumed for most rooms", } edge_case_result = {} @@ -138,6 +139,11 @@ class MainHeatAttributes(Definitions): self.is_edge_case = True return + if self.description == ', electric': + self.edge_case_result['has_electric'] = True + self.is_edge_case = True + return + def process(self) -> Dict[str, Union[str, bool]]: result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS} diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py index 46fff6d8..b3cc4df4 100644 --- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py @@ -75,6 +75,7 @@ class MainheatControlAttributes(Definitions): TO_REMAP = { "celect control": 'celect-type control', "celect controls": 'celect-type control', + "trv's, program & flow switch": 'trvs, programmer & flow switch', } WELSH_TEXT = {