import pickle import pytest from etl.epc.Record import EPCRecord from etl.epc.settings import DATA_ANOMALY_MATCHES class TestEpcRecord: @pytest.fixture def base_record(self): record = EPCRecord(run_mode="training") record._prepared_epc = {} return record @pytest.fixture() def cleaning_data(self): with open("recommendations/tests/test_data/cleaning_data.pkl", "rb") as f: data = pickle.load(f) return data @pytest.fixture() def epc_records_1(self): epc_records_1 = { "original_epc": { "fixed-lighting-outlets-count": "11", "property-type": "House", "built-form": "Semi-Detached", "construction-age-band": "England and Wales: 1900-1929", "local-authority": "E08000025", "number-habitable-rooms": "4", "number-heated-rooms": "4", }, "full_sap_epc": {}, "old_data": [], } return epc_records_1 def test_clean_built_form_valid_remap(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "built-form": "Semi-Detached", "property-type": "Flat" } record._clean_built_form() assert record._prepared_epc["built-form"] == "Semi-Detached" def test_clean_built_form_anomaly(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "built-form": "", "property-type": "Flat" } record._clean_built_form() assert record._prepared_epc["built-form"] == "End-Terrace" def test_clean_floor_area_valid(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "total-floor-area": "120.5" } record._clean_floor_area() assert record._prepared_epc["total-floor-area"] == 120.5 def test_clean_floor_area_empty(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "total-floor-area": "" } with pytest.raises(ValueError): record._clean_floor_area() def test_clean_heat_loss_corridor_valid(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "heat-loss-corridor": "unheated corridor", "unheated-corridor-length": "" } record._clean_heat_loss_corridor() assert record._prepared_epc["heat-loss-corridor"] == "unheated corridor" record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "heat-loss-corridor": "unheated corridor", "unheated-corridor-length": None } record._clean_heat_loss_corridor() assert record._prepared_epc["heat-loss-corridor"] == "unheated corridor" assert record._prepared_epc["unheated-corridor-length"] is None def test_clean_heat_loss_corridor_anomaly(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "heat-loss-corridor": "InvalidCorridor", "unheated-corridor-length": "" } record._clean_heat_loss_corridor() assert record._prepared_epc["heat-loss-corridor"] == "no corridor" def test_clean_solar_hot_water_valid(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "solar-water-heating-flag": "Y" } record._clean_solar_hot_water() assert record._prepared_epc["solar-water-heating-flag"] == "Y" assert record.solar_water_heating_flag_bool is True def test_clean_solar_hot_water_empty(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = { "solar-water-heating-flag": "" } record._clean_solar_hot_water() assert record._prepared_epc["solar-water-heating-flag"] == "N" assert record.solar_water_heating_flag_bool is False def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1): record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1) record._prepared_epc = { "fixed-lighting-outlets-count": "5" } record._clean_number_lighting_outlets() assert record._prepared_epc["fixed-lighting-outlets-count"] == 5.0 def test_clean_number_lighting_outlets_empty(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record.run_mode = "newdata" record._prepared_epc = { "fixed-lighting-outlets-count": "", "property-type": "Flat", "built-form": "Semi-Detached", "construction-age-band": "England and Wales: 1900-1929", "local-authority": "E08000025", "number-habitable-rooms": "4", "number-heated-rooms": "4", } record.old_data = [] record.full_sap_epc = {} record._clean_number_lighting_outlets() assert record._prepared_epc["fixed-lighting-outlets-count"] == 10 def test_clean_floor_level(self, cleaning_data): record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = {"floor-level": "1"} record._clean_floor_level() assert record._prepared_epc["floor-level"] == 1.0 record = EPCRecord(cleaning_data=cleaning_data) record._prepared_epc = {"floor-level": ""} record._clean_floor_level() assert record._prepared_epc["floor-level"] is None def test_year_built(self, cleaning_data): # This test handles a specific test case # Mock the property object epc_records = { "original_epc": { 'low-energy-fixed-light-count': '', 'address': '19 Waterloo Road, Shoeburyness', 'uprn-source': 'Energy Assessor', 'floor-height': '2.65', 'heating-cost-potential': '436', 'unheated-corridor-length': '', 'hot-water-cost-potential': '100', 'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good', 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '89', 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '888', 'address3': '', 'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A', 'report-type': '100', 'property-type': 'House', 'local-authority-label': 'Southend-on-Sea', 'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '386', 'county': '', 'postcode': 'SS3 9EQ', 'solar-water-heating-flag': 'N', 'constituency': 'E14001501', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '4', 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '49', 'local-authority': 'E06000033', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2025-03-17', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '58', 'address1': '19 Waterloo Road', 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': '', 'roof-energy-eff': 'Average', 'total-floor-area': '78.0', 'building-reference-number': '10007286268', 'environment-impact-current': '48', 'co2-emissions-current': '4.5', 'roof-description': 'Pitched, 100 mm loft insulation', 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '4', 'address2': 'Shoeburyness', 'hot-water-env-eff': 'Average', 'posttown': 'SOUTHEND-ON-SEA', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 78% of fixed outlets', 'roof-env-eff': 'Average', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '101', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', 'lodgement-datetime': '2025-03-25 16:59:15', 'flat-top-storey': '', 'current-energy-rating': 'D', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor', 'transaction-type': 'marketed sale', 'uprn': 100090702270, 'current-energy-efficiency': '56', 'energy-consumption-current': '329', 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '101', 'lodgement-date': '2025-03-25', 'extension-count': '1', 'mainheatc-env-eff': 'Average', 'lmk-key': 'ff00a1e150063f7bbcac1644be57fdcf05b6c9c60053f80c5d218bf2863fea93', 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Average', 'low-energy-lighting': '78', 'walls-description': 'Solid brick, as built, no insulation (assumed)', 'hotwater-description': 'From main system' }, "full_sap_epc": {}, "old_data": [] } prepared_epc = EPCRecord( epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data ) assert prepared_epc.get("year_built") == 1900 def test_cleaning_rules_energy(self, base_record): base_record._prepared_epc = { "energy-consumption-current": "150", "co2-emissions-current": "32.5" } base_record._apply_cleaning_rules() assert base_record._prepared_epc["energy-consumption-current"] == 150.0 assert base_record._prepared_epc["co2-emissions-current"] == 32.5 def test_cleaning_rules_energy_anomaly(self, base_record): base_record._prepared_epc = { "energy-consumption-current": "INVALID", "co2-emissions-current": "INVALID" } base_record._apply_cleaning_rules() assert base_record._prepared_epc["energy-consumption-current"] == "INVALID" assert base_record._prepared_epc["co2-emissions-current"] == "INVALID" def test_cleaning_rules_mains_gas(self, base_record): base_record._prepared_epc = { "mains-gas-flag": "Y" } base_record._apply_cleaning_rules() assert base_record._prepared_epc["mains-gas-flag"] is True def test_cleaning_rules_mains_gas_anomaly(self, base_record): base_record._prepared_epc = { "mains-gas-flag": "INVALID" } base_record._apply_cleaning_rules() assert base_record._prepared_epc["mains-gas-flag"] is None def test_cleaning_rules_wind_turbine(self, base_record): base_record._prepared_epc = { "wind-turbine-count": "3" } base_record._apply_cleaning_rules() assert base_record._prepared_epc["wind-turbine-count"] == 3 def test_cleaning_rules_extension_count(self, base_record): base_record._prepared_epc = { "extension-count": "2" } base_record._apply_cleaning_rules() assert base_record._prepared_epc["extension-count"] == 2 def test_apply_averages_cleaning_fills_missing_values(self, cleaning_data): record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) record._prepared_epc = { "property-type": cleaning_data["property_type"].iloc[0], "local-authority": cleaning_data["local_authority"].iloc[0], "total-floor-area": float(cleaning_data["total_floor_area"].iloc[0]), "number-habitable-rooms": None, "number-heated-rooms": None, "floor-height": None, } record._apply_averages_cleaning() assert record._prepared_epc["number-habitable-rooms"] is not None assert record._prepared_epc["number-heated-rooms"] is not None assert record._prepared_epc["floor-height"] is not None def test_apply_averages_cleaning_no_missing(self, cleaning_data): record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) record._prepared_epc = { "property-type": cleaning_data["property_type"].iloc[0], "local-authority": cleaning_data["local_authority"].iloc[0], "total-floor-area": float(cleaning_data["total_floor_area"].iloc[0]), "number-habitable-rooms": 5, "number-heated-rooms": 5, "floor-height": 2.5, } original = record._prepared_epc.copy() record._apply_averages_cleaning() assert record._prepared_epc == original def test_apply_averages_cleaning_caps_heated_rooms(self, cleaning_data): record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) record._prepared_epc = { "property-type": cleaning_data["property_type"].iloc[0], "local-authority": cleaning_data["local_authority"].iloc[0], "total-floor-area": float(cleaning_data["total_floor_area"].iloc[0]), "number-habitable-rooms": None, "number-heated-rooms": None, "floor-height": None, } record._apply_averages_cleaning() assert ( record._prepared_epc["number-heated-rooms"] <= record._prepared_epc["number-habitable-rooms"] ) def test_apply_averages_cleaning_floor_area_filter(self, cleaning_data): record = EPCRecord(run_mode="training", cleaning_data=cleaning_data) floor_area = float(cleaning_data["total_floor_area"].median()) record._prepared_epc = { "property-type": cleaning_data["property_type"].iloc[0], "local-authority": cleaning_data["local_authority"].iloc[0], "total-floor-area": floor_area, "number-habitable-rooms": None, "number-heated-rooms": None, "floor-height": None, } record._apply_averages_cleaning() assert record._prepared_epc["floor-height"] > 0 def test_apply_averages_cleaning_requires_cleaning_data(self): record = EPCRecord(run_mode="training", cleaning_data=None) record._prepared_epc = {} with pytest.raises(ValueError): record._apply_averages_cleaning()