From 804e8fb720e473b746e4491a0a5e0700fc486d90 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Jan 2024 12:00:47 +0000 Subject: [PATCH] handling Epc Record when the EPC has been interpolates --- etl/epc/Record.py | 30 +++++----- etl/epc/tests/test_epcrecord.py | 99 +++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 14 deletions(-) diff --git a/etl/epc/Record.py b/etl/epc/Record.py index cdbafd7e..2535f204 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -349,7 +349,7 @@ class EPCRecord: self.prepared_epc["floor-level"] = ( FLOOR_LEVEL_MAP[self.prepared_epc["floor-level"]] if - self.prepared_epc["floor-level"] not in DATA_ANOMALY_MATCHES else None + self.prepared_epc["floor-level"] not in list(DATA_ANOMALY_MATCHES) + ["", None] else None ) def _clean_number_lighting_outlets(self): @@ -499,7 +499,7 @@ class EPCRecord: self.prepared_epc["unheated-corridor-length"] = ( float(self.prepared_epc["unheated-corridor-length"]) if - self.prepared_epc["unheated-corridor-length"] != "" else None + self.prepared_epc["unheated-corridor-length"] not in ["", None] else None ) def _clean_count_variables(self): @@ -509,18 +509,18 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Record doesn not contain epc data") - fields = { - "number_of_open_fireplaces": "number-open-fireplaces", - "number_of_extensions": "extension-count", - "number_of_storeys": "flat-storey-count", - "number_of_rooms": "number-habitable-rooms", - } + fields = [ + "number-open-fireplaces", + "extension-count", + "flat-storey-count", + "number-habitable-rooms" + ] - null_attributes = ["number_of_storeys", "number_of_rooms"] + null_attributes = ["flat-storey-count", "number-habitable-rooms"] - for attribute, epc_field in fields.items(): - value = self.prepared_epc[epc_field] - if value == "" or value in DATA_ANOMALY_MATCHES: + for attribute in fields: + value = self.prepared_epc[attribute] + if value in ["", None] or value in DATA_ANOMALY_MATCHES: if attribute in null_attributes: value = None else: @@ -537,8 +537,9 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - self.prepared_epc['wind-turbine-count'] = int(self.prepared_epc['wind-turbine-count']) if self.prepared_epc[ - 'wind-turbine-count'] != "" else None + self.prepared_epc['wind-turbine-count'] = int( + self.prepared_epc['wind-turbine-count'] + ) if self.prepared_epc['wind-turbine-count'] not in ["", None] else None def _clean_solar_hot_water(self): """ @@ -551,6 +552,7 @@ class EPCRecord: "Y": True, "N": False, "": None, + None: None } self.prepared_epc['solar-water-heating-flag'] = value_map[self.prepared_epc['solar-water-heating-flag']] diff --git a/etl/epc/tests/test_epcrecord.py b/etl/epc/tests/test_epcrecord.py index 06b8ed06..48ad5148 100644 --- a/etl/epc/tests/test_epcrecord.py +++ b/etl/epc/tests/test_epcrecord.py @@ -171,6 +171,16 @@ class TestEpcRecord: assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor" + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "heat-loss-corridor": "unheated corridor", + "unheated-corridor-length": None + } + record._clean_heat_loss_corridor() + + assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor" + assert record.prepared_epc["unheated-corridor-length"] is None + def test_clean_heat_loss_corridor_anomaly(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) # Assuming "InvalidCorridor" is an anomaly @@ -252,3 +262,92 @@ class TestEpcRecord: record._clean_number_lighting_outlets() assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0 + + def test_clean_count_variables(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "number-open-fireplaces": "1", + "extension-count": None, + "flat-storey-count": "", + "number-habitable-rooms": "INVALID!", + } + + record._clean_count_variables() + + assert record.prepared_epc["number-open-fireplaces"] == 1.0 + assert record.prepared_epc["extension-count"] == 0 + assert record.prepared_epc["flat-storey-count"] is None + assert record.prepared_epc["number-habitable-rooms"] is None + + def test_clean_floor_level(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "floor-level": "1", + } + + record._clean_floor_level() + + assert record.prepared_epc["floor-level"] == 1.0 + + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "floor-level": "", + } + + record._clean_floor_level() + + assert record.prepared_epc["floor-level"] is None + + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "floor-level": None, + } + + record._clean_floor_level() + + assert record.prepared_epc["floor-level"] is None + + def test_clean_solar_hot_water(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "solar-water-heating-flag": "Y", + } + + record._clean_solar_hot_water() + + assert record.prepared_epc["solar-water-heating-flag"] is True + + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "solar-water-heating-flag": "N", + } + + record._clean_solar_hot_water() + + assert record.prepared_epc["solar-water-heating-flag"] is False + + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "solar-water-heating-flag": "", + } + + record._clean_solar_hot_water() + + assert record.prepared_epc["solar-water-heating-flag"] is None + + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "solar-water-heating-flag": None, + } + + record._clean_solar_hot_water() + + assert record.prepared_epc["solar-water-heating-flag"] is None