From dbe13586da99dbbd28a126eb02537c8987564faf Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Jan 2024 18:10:24 +0000 Subject: [PATCH] creating unit tests, added test cases for router --- backend/Property.py | 5 +- backend/app/plan/router.py | 36 ++------ backend/ml_models/Valuation.py | 10 +- etl/epc/DataProcessor.py | 5 +- etl/epc/Record.py | 43 ++++++--- etl/epc/tests/test_epcrecord.py | 158 +++++++++++++++++++++++++++++++- 6 files changed, 207 insertions(+), 50 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 98325b15..c1055eb9 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -222,7 +222,10 @@ class Property(Definitions): proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth)) recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth) - recommendation_record["roof_energy_eff_ending"] = "Very Good" + if recommendation["type"] == "loft_insulation": + recommendation_record["roof_energy_eff_ending"] = "Good" + else: + recommendation_record["roof_energy_eff_ending"] = "Very Good" else: # Fill missing roof u-values - this fill is not based on recommended upgrades if recommendation_record["roof_thermal_transmittance_ending"] is None: diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index d869bcb5..521ec615 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -65,6 +65,16 @@ async def trigger_plan(body: PlanTriggerRequest): bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) + # For testing: + # plan_input.extend( + # [ + # {'address': '73 Long Chaulden', 'postcode': 'HP1 2HX', 'Notes': ''}, + # {'address': '8 Lindlings', 'postcode': 'HP1 2HA', 'Notes': ''}, + # {'address': '44 Lindlings', 'postcode': 'HP1 2HE', 'Notes': ''}, + # {'address': '46 Chaulden Terrace', 'postcode': 'HP1 2AN', 'Notes': ''}, + # ] + # ) + input_properties = [] for config in plan_input: # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly @@ -164,32 +174,6 @@ async def trigger_plan(body: PlanTriggerRequest): model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) - recommendations_scoring_data.head() - z = recommendations_scoring_data[recommendations_scoring_data["uprn"] == 100070505235].copy() - z = z[z["roof_thermal_transmittance"] != z["roof_thermal_transmittance_ending"]] - z["roof_thermal_transmittance_ending"] = 0.4 - z["roof_energy_eff_ending"] = "Average" - - now = model_api.predict_all( - df=z, - bucket=get_settings().DATA_BUCKET, - prediction_buckets={ - "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, - "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, - "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET - } - ) - - now["sap_change_predictions"] - input_properties[1].data["mechanical-ventilation"] - # id predictions property_id recommendation_id - # 0 3696+9 56.3 3696 9 - # 1 3696+10 56.8 3696 10 - # 2 3696+11 56.3 3696 11 - # 3 3696+12 56.8 3696 12 - # With good rather than very good - now["sap_change_predictions"] - all_predictions = model_api.predict_all( df=recommendations_scoring_data, bucket=get_settings().DATA_BUCKET, diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 018b4678..dadef9a9 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -96,11 +96,11 @@ class PropertyValuation: if not value: return { - "current_value": None, - "lower_bound_increased_value": None, - "upper_bound_increased_value": None, - "average_increased_value": None, - "average_increase": None + "current_value": 0, + "lower_bound_increased_value": 0, + "upper_bound_increased_value": 0, + "average_increased_value": 0, + "average_increase": 0 } current_epc = property_instance.data["current-energy-rating"] diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 5dfeea1a..4c4651a4 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -723,8 +723,9 @@ class EPCDataProcessor: self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0) @staticmethod - def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, - ignore_step: bool = False): + def apply_averages_cleaning( + data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False + ): """ Clean the input DataFrame using averages from a cleaning DataFrame. diff --git a/etl/epc/Record.py b/etl/epc/Record.py index f1dde43e..4474baf1 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -380,13 +380,21 @@ class EPCRecord: else: # Use averages from the cleaning dataset, based on the property type, built form, construction age # band and local authority + + cleaning_data = self.cleaning_data.copy() + # When running in new-data more, the columns will have been coerced to lower case so we push them + # back to upper case + if self.run_mode == "newdata": + cleaning_data.columns = [x.upper() for x in cleaning_data.columns] + cleaned_property_data = EPCDataProcessor.apply_averages_cleaning( data_to_clean=self.epc_record_as_dataframe("prepared_epc", replace_empty_string=True), cleaning_data=self.cleaning_data, cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], ) self.prepared_epc["fixed-lighting-outlets-count"] = round( - cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]) + cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0] + ) else: self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"]) @@ -460,14 +468,14 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - map = { + mains_gas_map = { "Y": True, "N": False, } self.prepared_epc["mains-gas-flag"] = None if ( self.prepared_epc["mains-gas-flag"] == "" or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES - ) else map[self.prepared_epc["mains-gas-flag"]] + ) else mains_gas_map[self.prepared_epc["mains-gas-flag"]] def _clean_heat_loss_corridor(self): """ @@ -476,15 +484,18 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - map = { - "no corridor": False, - "unheated corridor": True, - "heated corridor": False - } + valid_values = [ + "no corridor", + "unheated corridor", + "heated corridor" + ] - self.prepared_epc["heat-loss-corridor"] = False if self.prepared_epc[ - "heat-loss-corridor"] in DATA_ANOMALY_MATCHES else map[ - self.prepared_epc["heat-loss-corridor"]] + self.prepared_epc["heat-loss-corridor"] = ( + "no corridor" if self.prepared_epc["heat-loss-corridor"] in DATA_ANOMALY_MATCHES else + self.prepared_epc["heat-loss-corridor"] + ) + if self.prepared_epc["heat-loss-corridor"] not in valid_values: + self.prepared_epc["heat-loss-corridor"] = "no corridor" self.prepared_epc["unheated-corridor-length"] = ( float(self.prepared_epc["unheated-corridor-length"]) if @@ -572,11 +583,13 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(self.prepared_epc["built-form"], - self.prepared_epc["built-form"]) + self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get( + self.prepared_epc["built-form"], self.prepared_epc["built-form"] + ) + if self.prepared_epc["built-form"] in DATA_ANOMALY_MATCHES: - if self.prepared_epc["property-type"] == "Flat": - self.prepared_epc["built-form"] = "Semi-Detached" + if self.prepared_epc["property-type"] in ["Flat", "Maisonette"]: + self.prepared_epc["built-form"] = "End-Terrace" def _clean_age_band(self): """ diff --git a/etl/epc/tests/test_epcrecord.py b/etl/epc/tests/test_epcrecord.py index f55bd30a..06b8ed06 100644 --- a/etl/epc/tests/test_epcrecord.py +++ b/etl/epc/tests/test_epcrecord.py @@ -1,7 +1,8 @@ import pytest from utils.s3 import read_dataframe_from_s3_parquet from etl.epc.Record import EPCRecord -from unittest.mock import Mock +from etl.epc.settings import DATA_ANOMALY_MATCHES +import random class TestEpcRecord: @@ -96,3 +97,158 @@ class TestEpcRecord: record4._clean_ventilation() assert record4.prepared_epc["mechanical-ventilation"] is None + + def test_clean_energy_valid_values(self, cleaning_data, epc_records_1): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "energy-consumption-current": "200", + "co2-emissions-current": "5.5" + } + record._clean_energy() + + assert record.prepared_epc["energy-consumption-current"] == 200.0 + assert record.prepared_epc["co2-emissions-current"] == 5.5 + + def test_clean_energy_empty_values(self, cleaning_data, epc_records_1): + # We cannot have invalid values so this should raise an exception + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "energy-consumption-current": "", + "co2-emissions-current": "" + } + record._clean_energy() + + with pytest.raises(ValueError): + record._clean_energy() + + def test_clean_built_form_valid_remap(self, cleaning_data, epc_records_1): + record = EPCRecord(cleaning_data=cleaning_data) + # Assuming "Semi" should be remapped to "Semi-Detached" + record.prepared_epc = { + "built-form": "Semi-Detached", + "property-type": "Flat" # Assuming this affects the remapping + } + record._clean_built_form() + + assert record.prepared_epc["built-form"] == "Semi-Detached" + + def test_clean_built_form_anomaly(self, cleaning_data, epc_records_1): + record = EPCRecord(cleaning_data=cleaning_data) + + record.prepared_epc = { + "built-form": "", + "property-type": "Flat" + } + record._clean_built_form() + + assert record.prepared_epc["built-form"] == "End-Terrace" + + def test_clean_floor_area_valid(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "total-floor-area": "120.5" + } + record._clean_floor_area() + + assert record.prepared_epc["total-floor-area"] == 120.5 + + def test_clean_floor_area_empty(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "total-floor-area": "" + } + # We have no known case of missing floor area + with pytest.raises(ValueError): + record._clean_floor_area() + + def test_clean_heat_loss_corridor_valid(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "heat-loss-corridor": "unheated corridor", + "unheated-corridor-length": "" + } + record._clean_heat_loss_corridor() + + assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor" + + def test_clean_heat_loss_corridor_anomaly(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + # Assuming "InvalidCorridor" is an anomaly + record.prepared_epc = { + "heat-loss-corridor": "InvalidCorridor", + "unheated-corridor-length": "" + } + record._clean_heat_loss_corridor() + + assert record.prepared_epc["heat-loss-corridor"] == "no corridor" + + def test_clean_mains_gas_valid(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "mains-gas-flag": "Y" + } + record._clean_mains_gas() + + assert record.prepared_epc["mains-gas-flag"] is True + + def test_clean_mains_gas_anomaly(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "mains-gas-flag": "InvalidValue" + } + # It should always be Y or N or an anomally value + with pytest.raises(ValueError): + record._clean_mains_gas() + + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "mains-gas-flag": random.choice(list(DATA_ANOMALY_MATCHES)) + } + record._clean_mains_gas() + + assert record.prepared_epc["mains-gas-flag"] is None + + def test_clean_solar_hot_water_valid(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "solar-water-heating-flag": "Y" + } + record._clean_solar_hot_water() + + assert record.prepared_epc["solar-water-heating-flag"] is True + + def test_clean_solar_hot_water_empty(self, cleaning_data): + record = EPCRecord(cleaning_data=cleaning_data) + record.prepared_epc = { + "solar-water-heating-flag": "" + } + record._clean_solar_hot_water() + + assert record.prepared_epc["solar-water-heating-flag"] is None + + def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1): + record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1) + record.prepared_epc = { + "fixed-lighting-outlets-count": "5" + } + record._clean_number_lighting_outlets() + + assert record.prepared_epc["fixed-lighting-outlets-count"] == 5.0 + + def test_clean_number_lighting_outlets_empty(self, cleaning_data, epc_records_1): + record = EPCRecord(cleaning_data=cleaning_data) + record.run_mode = "newdata" + record.prepared_epc = { + "fixed-lighting-outlets-count": "", + "property-type": "Flat", + "built-form": "Semi-Detached", + "construction-age-band": "England and Wales: 1900-1929", + "local-authority": "E08000025", + "number-habitable-rooms": "4", + "number-heated-rooms": "4", + } + record.old_data = [] + record.full_sap_epc = [] + record._clean_number_lighting_outlets() + + assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0