diff --git a/etl/epc/tests/test_epcrecord.py b/etl/epc/tests/test_epcrecord.py index 9dc2c01b..30076711 100644 --- a/etl/epc/tests/test_epcrecord.py +++ b/etl/epc/tests/test_epcrecord.py @@ -309,3 +309,88 @@ class TestEpcRecord: base_record._apply_cleaning_rules() assert base_record._prepared_epc["extension-count"] == 2 + + def test_apply_averages_cleaning_fills_missing_values(self, cleaning_data): + record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) + + record._prepared_epc = { + "property-type": cleaning_data["property_type"].iloc[0], + "local-authority": cleaning_data["local_authority"].iloc[0], + "total-floor-area": float(cleaning_data["total_floor_area"].iloc[0]), + + "number-habitable-rooms": None, + "number-heated-rooms": None, + "floor-height": None, + } + + record._apply_averages_cleaning() + + assert record._prepared_epc["number-habitable-rooms"] is not None + assert record._prepared_epc["number-heated-rooms"] is not None + assert record._prepared_epc["floor-height"] is not None + + def test_apply_averages_cleaning_no_missing(self, cleaning_data): + record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) + + record._prepared_epc = { + "property-type": cleaning_data["property_type"].iloc[0], + "local-authority": cleaning_data["local_authority"].iloc[0], + "total-floor-area": float(cleaning_data["total_floor_area"].iloc[0]), + + "number-habitable-rooms": 5, + "number-heated-rooms": 5, + "floor-height": 2.5, + } + + original = record._prepared_epc.copy() + + record._apply_averages_cleaning() + + assert record._prepared_epc == original + + def test_apply_averages_cleaning_caps_heated_rooms(self, cleaning_data): + record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) + + record._prepared_epc = { + "property-type": cleaning_data["property_type"].iloc[0], + "local-authority": cleaning_data["local_authority"].iloc[0], + "total-floor-area": float(cleaning_data["total_floor_area"].iloc[0]), + + "number-habitable-rooms": None, + "number-heated-rooms": None, + "floor-height": None, + } + + record._apply_averages_cleaning() + + assert ( + record._prepared_epc["number-heated-rooms"] + <= record._prepared_epc["number-habitable-rooms"] + ) + + def test_apply_averages_cleaning_floor_area_filter(self, cleaning_data): + record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) + + floor_area = float(cleaning_data["total_floor_area"].median()) + + record._prepared_epc = { + "property-type": cleaning_data["property_type"].iloc[0], + "local-authority": cleaning_data["local_authority"].iloc[0], + "total-floor-area": floor_area, + + "number-habitable-rooms": None, + "number-heated-rooms": None, + "floor-height": None, + } + + record._apply_averages_cleaning() + + assert record._prepared_epc["floor-height"] > 0 + + def test_apply_averages_cleaning_requires_cleaning_data(self): + record = EPCRecord(run_mode="training", cleaning_data=None) + + record._prepared_epc = {} + + with pytest.raises(ValueError): + record._apply_averages_cleaning()