creating unit tests, added test cases for router

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-18 18:10:24 +00:00
parent 1699102cd9
commit dbe13586da
6 changed files with 207 additions and 50 deletions

View file

@ -222,7 +222,10 @@ class Property(Definitions):
proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
recommendation_record["roof_energy_eff_ending"] = "Very Good"
if recommendation["type"] == "loft_insulation":
recommendation_record["roof_energy_eff_ending"] = "Good"
else:
recommendation_record["roof_energy_eff_ending"] = "Very Good"
else:
# Fill missing roof u-values - this fill is not based on recommended upgrades
if recommendation_record["roof_thermal_transmittance_ending"] is None:

View file

@ -65,6 +65,16 @@ async def trigger_plan(body: PlanTriggerRequest):
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
# For testing:
# plan_input.extend(
# [
# {'address': '73 Long Chaulden', 'postcode': 'HP1 2HX', 'Notes': ''},
# {'address': '8 Lindlings', 'postcode': 'HP1 2HA', 'Notes': ''},
# {'address': '44 Lindlings', 'postcode': 'HP1 2HE', 'Notes': ''},
# {'address': '46 Chaulden Terrace', 'postcode': 'HP1 2AN', 'Notes': ''},
# ]
# )
input_properties = []
for config in plan_input:
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
@ -164,32 +174,6 @@ async def trigger_plan(body: PlanTriggerRequest):
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
recommendations_scoring_data.head()
z = recommendations_scoring_data[recommendations_scoring_data["uprn"] == 100070505235].copy()
z = z[z["roof_thermal_transmittance"] != z["roof_thermal_transmittance_ending"]]
z["roof_thermal_transmittance_ending"] = 0.4
z["roof_energy_eff_ending"] = "Average"
now = model_api.predict_all(
df=z,
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
}
)
now["sap_change_predictions"]
input_properties[1].data["mechanical-ventilation"]
# id predictions property_id recommendation_id
# 0 3696+9 56.3 3696 9
# 1 3696+10 56.8 3696 10
# 2 3696+11 56.3 3696 11
# 3 3696+12 56.8 3696 12
# With good rather than very good
now["sap_change_predictions"]
all_predictions = model_api.predict_all(
df=recommendations_scoring_data,
bucket=get_settings().DATA_BUCKET,

View file

@ -96,11 +96,11 @@ class PropertyValuation:
if not value:
return {
"current_value": None,
"lower_bound_increased_value": None,
"upper_bound_increased_value": None,
"average_increased_value": None,
"average_increase": None
"current_value": 0,
"lower_bound_increased_value": 0,
"upper_bound_increased_value": 0,
"average_increased_value": 0,
"average_increase": 0
}
current_epc = property_instance.data["current-energy-rating"]

View file

@ -723,8 +723,9 @@ class EPCDataProcessor:
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
@staticmethod
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None,
ignore_step: bool = False):
def apply_averages_cleaning(
data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
):
"""
Clean the input DataFrame using averages from a cleaning DataFrame.

View file

@ -380,13 +380,21 @@ class EPCRecord:
else:
# Use averages from the cleaning dataset, based on the property type, built form, construction age
# band and local authority
cleaning_data = self.cleaning_data.copy()
# When running in new-data more, the columns will have been coerced to lower case so we push them
# back to upper case
if self.run_mode == "newdata":
cleaning_data.columns = [x.upper() for x in cleaning_data.columns]
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
data_to_clean=self.epc_record_as_dataframe("prepared_epc", replace_empty_string=True),
cleaning_data=self.cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
)
self.prepared_epc["fixed-lighting-outlets-count"] = round(
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0])
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]
)
else:
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
@ -460,14 +468,14 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
map = {
mains_gas_map = {
"Y": True,
"N": False,
}
self.prepared_epc["mains-gas-flag"] = None if (
self.prepared_epc["mains-gas-flag"] == "" or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES
) else map[self.prepared_epc["mains-gas-flag"]]
) else mains_gas_map[self.prepared_epc["mains-gas-flag"]]
def _clean_heat_loss_corridor(self):
"""
@ -476,15 +484,18 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
map = {
"no corridor": False,
"unheated corridor": True,
"heated corridor": False
}
valid_values = [
"no corridor",
"unheated corridor",
"heated corridor"
]
self.prepared_epc["heat-loss-corridor"] = False if self.prepared_epc[
"heat-loss-corridor"] in DATA_ANOMALY_MATCHES else map[
self.prepared_epc["heat-loss-corridor"]]
self.prepared_epc["heat-loss-corridor"] = (
"no corridor" if self.prepared_epc["heat-loss-corridor"] in DATA_ANOMALY_MATCHES else
self.prepared_epc["heat-loss-corridor"]
)
if self.prepared_epc["heat-loss-corridor"] not in valid_values:
self.prepared_epc["heat-loss-corridor"] = "no corridor"
self.prepared_epc["unheated-corridor-length"] = (
float(self.prepared_epc["unheated-corridor-length"]) if
@ -572,11 +583,13 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(self.prepared_epc["built-form"],
self.prepared_epc["built-form"])
self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(
self.prepared_epc["built-form"], self.prepared_epc["built-form"]
)
if self.prepared_epc["built-form"] in DATA_ANOMALY_MATCHES:
if self.prepared_epc["property-type"] == "Flat":
self.prepared_epc["built-form"] = "Semi-Detached"
if self.prepared_epc["property-type"] in ["Flat", "Maisonette"]:
self.prepared_epc["built-form"] = "End-Terrace"
def _clean_age_band(self):
"""

View file

@ -1,7 +1,8 @@
import pytest
from utils.s3 import read_dataframe_from_s3_parquet
from etl.epc.Record import EPCRecord
from unittest.mock import Mock
from etl.epc.settings import DATA_ANOMALY_MATCHES
import random
class TestEpcRecord:
@ -96,3 +97,158 @@ class TestEpcRecord:
record4._clean_ventilation()
assert record4.prepared_epc["mechanical-ventilation"] is None
def test_clean_energy_valid_values(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"energy-consumption-current": "200",
"co2-emissions-current": "5.5"
}
record._clean_energy()
assert record.prepared_epc["energy-consumption-current"] == 200.0
assert record.prepared_epc["co2-emissions-current"] == 5.5
def test_clean_energy_empty_values(self, cleaning_data, epc_records_1):
# We cannot have invalid values so this should raise an exception
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"energy-consumption-current": "",
"co2-emissions-current": ""
}
record._clean_energy()
with pytest.raises(ValueError):
record._clean_energy()
def test_clean_built_form_valid_remap(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
# Assuming "Semi" should be remapped to "Semi-Detached"
record.prepared_epc = {
"built-form": "Semi-Detached",
"property-type": "Flat" # Assuming this affects the remapping
}
record._clean_built_form()
assert record.prepared_epc["built-form"] == "Semi-Detached"
def test_clean_built_form_anomaly(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"built-form": "",
"property-type": "Flat"
}
record._clean_built_form()
assert record.prepared_epc["built-form"] == "End-Terrace"
def test_clean_floor_area_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"total-floor-area": "120.5"
}
record._clean_floor_area()
assert record.prepared_epc["total-floor-area"] == 120.5
def test_clean_floor_area_empty(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"total-floor-area": ""
}
# We have no known case of missing floor area
with pytest.raises(ValueError):
record._clean_floor_area()
def test_clean_heat_loss_corridor_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"heat-loss-corridor": "unheated corridor",
"unheated-corridor-length": ""
}
record._clean_heat_loss_corridor()
assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
def test_clean_heat_loss_corridor_anomaly(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
# Assuming "InvalidCorridor" is an anomaly
record.prepared_epc = {
"heat-loss-corridor": "InvalidCorridor",
"unheated-corridor-length": ""
}
record._clean_heat_loss_corridor()
assert record.prepared_epc["heat-loss-corridor"] == "no corridor"
def test_clean_mains_gas_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"mains-gas-flag": "Y"
}
record._clean_mains_gas()
assert record.prepared_epc["mains-gas-flag"] is True
def test_clean_mains_gas_anomaly(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"mains-gas-flag": "InvalidValue"
}
# It should always be Y or N or an anomally value
with pytest.raises(ValueError):
record._clean_mains_gas()
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"mains-gas-flag": random.choice(list(DATA_ANOMALY_MATCHES))
}
record._clean_mains_gas()
assert record.prepared_epc["mains-gas-flag"] is None
def test_clean_solar_hot_water_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": "Y"
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] is True
def test_clean_solar_hot_water_empty(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": ""
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] is None
def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1)
record.prepared_epc = {
"fixed-lighting-outlets-count": "5"
}
record._clean_number_lighting_outlets()
assert record.prepared_epc["fixed-lighting-outlets-count"] == 5.0
def test_clean_number_lighting_outlets_empty(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
record.run_mode = "newdata"
record.prepared_epc = {
"fixed-lighting-outlets-count": "",
"property-type": "Flat",
"built-form": "Semi-Detached",
"construction-age-band": "England and Wales: 1900-1929",
"local-authority": "E08000025",
"number-habitable-rooms": "4",
"number-heated-rooms": "4",
}
record.old_data = []
record.full_sap_epc = []
record._clean_number_lighting_outlets()
assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0