mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
creating unit tests, added test cases for router
This commit is contained in:
parent
1699102cd9
commit
dbe13586da
6 changed files with 207 additions and 50 deletions
|
|
@ -222,7 +222,10 @@ class Property(Definitions):
|
||||||
proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
|
proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
|
||||||
|
|
||||||
recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
|
recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
|
||||||
recommendation_record["roof_energy_eff_ending"] = "Very Good"
|
if recommendation["type"] == "loft_insulation":
|
||||||
|
recommendation_record["roof_energy_eff_ending"] = "Good"
|
||||||
|
else:
|
||||||
|
recommendation_record["roof_energy_eff_ending"] = "Very Good"
|
||||||
else:
|
else:
|
||||||
# Fill missing roof u-values - this fill is not based on recommended upgrades
|
# Fill missing roof u-values - this fill is not based on recommended upgrades
|
||||||
if recommendation_record["roof_thermal_transmittance_ending"] is None:
|
if recommendation_record["roof_thermal_transmittance_ending"] is None:
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,16 @@ async def trigger_plan(body: PlanTriggerRequest):
|
||||||
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# For testing:
|
||||||
|
# plan_input.extend(
|
||||||
|
# [
|
||||||
|
# {'address': '73 Long Chaulden', 'postcode': 'HP1 2HX', 'Notes': ''},
|
||||||
|
# {'address': '8 Lindlings', 'postcode': 'HP1 2HA', 'Notes': ''},
|
||||||
|
# {'address': '44 Lindlings', 'postcode': 'HP1 2HE', 'Notes': ''},
|
||||||
|
# {'address': '46 Chaulden Terrace', 'postcode': 'HP1 2AN', 'Notes': ''},
|
||||||
|
# ]
|
||||||
|
# )
|
||||||
|
|
||||||
input_properties = []
|
input_properties = []
|
||||||
for config in plan_input:
|
for config in plan_input:
|
||||||
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
|
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
|
||||||
|
|
@ -164,32 +174,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
||||||
|
|
||||||
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
|
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
|
||||||
|
|
||||||
recommendations_scoring_data.head()
|
|
||||||
z = recommendations_scoring_data[recommendations_scoring_data["uprn"] == 100070505235].copy()
|
|
||||||
z = z[z["roof_thermal_transmittance"] != z["roof_thermal_transmittance_ending"]]
|
|
||||||
z["roof_thermal_transmittance_ending"] = 0.4
|
|
||||||
z["roof_energy_eff_ending"] = "Average"
|
|
||||||
|
|
||||||
now = model_api.predict_all(
|
|
||||||
df=z,
|
|
||||||
bucket=get_settings().DATA_BUCKET,
|
|
||||||
prediction_buckets={
|
|
||||||
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
|
|
||||||
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
|
|
||||||
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
now["sap_change_predictions"]
|
|
||||||
input_properties[1].data["mechanical-ventilation"]
|
|
||||||
# id predictions property_id recommendation_id
|
|
||||||
# 0 3696+9 56.3 3696 9
|
|
||||||
# 1 3696+10 56.8 3696 10
|
|
||||||
# 2 3696+11 56.3 3696 11
|
|
||||||
# 3 3696+12 56.8 3696 12
|
|
||||||
# With good rather than very good
|
|
||||||
now["sap_change_predictions"]
|
|
||||||
|
|
||||||
all_predictions = model_api.predict_all(
|
all_predictions = model_api.predict_all(
|
||||||
df=recommendations_scoring_data,
|
df=recommendations_scoring_data,
|
||||||
bucket=get_settings().DATA_BUCKET,
|
bucket=get_settings().DATA_BUCKET,
|
||||||
|
|
|
||||||
|
|
@ -96,11 +96,11 @@ class PropertyValuation:
|
||||||
|
|
||||||
if not value:
|
if not value:
|
||||||
return {
|
return {
|
||||||
"current_value": None,
|
"current_value": 0,
|
||||||
"lower_bound_increased_value": None,
|
"lower_bound_increased_value": 0,
|
||||||
"upper_bound_increased_value": None,
|
"upper_bound_increased_value": 0,
|
||||||
"average_increased_value": None,
|
"average_increased_value": 0,
|
||||||
"average_increase": None
|
"average_increase": 0
|
||||||
}
|
}
|
||||||
|
|
||||||
current_epc = property_instance.data["current-energy-rating"]
|
current_epc = property_instance.data["current-energy-rating"]
|
||||||
|
|
|
||||||
|
|
@ -723,8 +723,9 @@ class EPCDataProcessor:
|
||||||
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
|
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None,
|
def apply_averages_cleaning(
|
||||||
ignore_step: bool = False):
|
data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Clean the input DataFrame using averages from a cleaning DataFrame.
|
Clean the input DataFrame using averages from a cleaning DataFrame.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -380,13 +380,21 @@ class EPCRecord:
|
||||||
else:
|
else:
|
||||||
# Use averages from the cleaning dataset, based on the property type, built form, construction age
|
# Use averages from the cleaning dataset, based on the property type, built form, construction age
|
||||||
# band and local authority
|
# band and local authority
|
||||||
|
|
||||||
|
cleaning_data = self.cleaning_data.copy()
|
||||||
|
# When running in new-data more, the columns will have been coerced to lower case so we push them
|
||||||
|
# back to upper case
|
||||||
|
if self.run_mode == "newdata":
|
||||||
|
cleaning_data.columns = [x.upper() for x in cleaning_data.columns]
|
||||||
|
|
||||||
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
|
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
|
||||||
data_to_clean=self.epc_record_as_dataframe("prepared_epc", replace_empty_string=True),
|
data_to_clean=self.epc_record_as_dataframe("prepared_epc", replace_empty_string=True),
|
||||||
cleaning_data=self.cleaning_data,
|
cleaning_data=self.cleaning_data,
|
||||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||||
)
|
)
|
||||||
self.prepared_epc["fixed-lighting-outlets-count"] = round(
|
self.prepared_epc["fixed-lighting-outlets-count"] = round(
|
||||||
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0])
|
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
|
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
|
||||||
|
|
||||||
|
|
@ -460,14 +468,14 @@ class EPCRecord:
|
||||||
if not self.prepared_epc:
|
if not self.prepared_epc:
|
||||||
raise ValueError("EPC Recrod doesn not contain epc data")
|
raise ValueError("EPC Recrod doesn not contain epc data")
|
||||||
|
|
||||||
map = {
|
mains_gas_map = {
|
||||||
"Y": True,
|
"Y": True,
|
||||||
"N": False,
|
"N": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.prepared_epc["mains-gas-flag"] = None if (
|
self.prepared_epc["mains-gas-flag"] = None if (
|
||||||
self.prepared_epc["mains-gas-flag"] == "" or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES
|
self.prepared_epc["mains-gas-flag"] == "" or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES
|
||||||
) else map[self.prepared_epc["mains-gas-flag"]]
|
) else mains_gas_map[self.prepared_epc["mains-gas-flag"]]
|
||||||
|
|
||||||
def _clean_heat_loss_corridor(self):
|
def _clean_heat_loss_corridor(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -476,15 +484,18 @@ class EPCRecord:
|
||||||
if not self.prepared_epc:
|
if not self.prepared_epc:
|
||||||
raise ValueError("EPC Recrod doesn not contain epc data")
|
raise ValueError("EPC Recrod doesn not contain epc data")
|
||||||
|
|
||||||
map = {
|
valid_values = [
|
||||||
"no corridor": False,
|
"no corridor",
|
||||||
"unheated corridor": True,
|
"unheated corridor",
|
||||||
"heated corridor": False
|
"heated corridor"
|
||||||
}
|
]
|
||||||
|
|
||||||
self.prepared_epc["heat-loss-corridor"] = False if self.prepared_epc[
|
self.prepared_epc["heat-loss-corridor"] = (
|
||||||
"heat-loss-corridor"] in DATA_ANOMALY_MATCHES else map[
|
"no corridor" if self.prepared_epc["heat-loss-corridor"] in DATA_ANOMALY_MATCHES else
|
||||||
self.prepared_epc["heat-loss-corridor"]]
|
self.prepared_epc["heat-loss-corridor"]
|
||||||
|
)
|
||||||
|
if self.prepared_epc["heat-loss-corridor"] not in valid_values:
|
||||||
|
self.prepared_epc["heat-loss-corridor"] = "no corridor"
|
||||||
|
|
||||||
self.prepared_epc["unheated-corridor-length"] = (
|
self.prepared_epc["unheated-corridor-length"] = (
|
||||||
float(self.prepared_epc["unheated-corridor-length"]) if
|
float(self.prepared_epc["unheated-corridor-length"]) if
|
||||||
|
|
@ -572,11 +583,13 @@ class EPCRecord:
|
||||||
if not self.prepared_epc:
|
if not self.prepared_epc:
|
||||||
raise ValueError("EPC Recrod doesn not contain epc data")
|
raise ValueError("EPC Recrod doesn not contain epc data")
|
||||||
|
|
||||||
self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(self.prepared_epc["built-form"],
|
self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(
|
||||||
self.prepared_epc["built-form"])
|
self.prepared_epc["built-form"], self.prepared_epc["built-form"]
|
||||||
|
)
|
||||||
|
|
||||||
if self.prepared_epc["built-form"] in DATA_ANOMALY_MATCHES:
|
if self.prepared_epc["built-form"] in DATA_ANOMALY_MATCHES:
|
||||||
if self.prepared_epc["property-type"] == "Flat":
|
if self.prepared_epc["property-type"] in ["Flat", "Maisonette"]:
|
||||||
self.prepared_epc["built-form"] = "Semi-Detached"
|
self.prepared_epc["built-form"] = "End-Terrace"
|
||||||
|
|
||||||
def _clean_age_band(self):
|
def _clean_age_band(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
import pytest
|
import pytest
|
||||||
from utils.s3 import read_dataframe_from_s3_parquet
|
from utils.s3 import read_dataframe_from_s3_parquet
|
||||||
from etl.epc.Record import EPCRecord
|
from etl.epc.Record import EPCRecord
|
||||||
from unittest.mock import Mock
|
from etl.epc.settings import DATA_ANOMALY_MATCHES
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
class TestEpcRecord:
|
class TestEpcRecord:
|
||||||
|
|
@ -96,3 +97,158 @@ class TestEpcRecord:
|
||||||
record4._clean_ventilation()
|
record4._clean_ventilation()
|
||||||
|
|
||||||
assert record4.prepared_epc["mechanical-ventilation"] is None
|
assert record4.prepared_epc["mechanical-ventilation"] is None
|
||||||
|
|
||||||
|
def test_clean_energy_valid_values(self, cleaning_data, epc_records_1):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"energy-consumption-current": "200",
|
||||||
|
"co2-emissions-current": "5.5"
|
||||||
|
}
|
||||||
|
record._clean_energy()
|
||||||
|
|
||||||
|
assert record.prepared_epc["energy-consumption-current"] == 200.0
|
||||||
|
assert record.prepared_epc["co2-emissions-current"] == 5.5
|
||||||
|
|
||||||
|
def test_clean_energy_empty_values(self, cleaning_data, epc_records_1):
|
||||||
|
# We cannot have invalid values so this should raise an exception
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"energy-consumption-current": "",
|
||||||
|
"co2-emissions-current": ""
|
||||||
|
}
|
||||||
|
record._clean_energy()
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
record._clean_energy()
|
||||||
|
|
||||||
|
def test_clean_built_form_valid_remap(self, cleaning_data, epc_records_1):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
# Assuming "Semi" should be remapped to "Semi-Detached"
|
||||||
|
record.prepared_epc = {
|
||||||
|
"built-form": "Semi-Detached",
|
||||||
|
"property-type": "Flat" # Assuming this affects the remapping
|
||||||
|
}
|
||||||
|
record._clean_built_form()
|
||||||
|
|
||||||
|
assert record.prepared_epc["built-form"] == "Semi-Detached"
|
||||||
|
|
||||||
|
def test_clean_built_form_anomaly(self, cleaning_data, epc_records_1):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
|
||||||
|
record.prepared_epc = {
|
||||||
|
"built-form": "",
|
||||||
|
"property-type": "Flat"
|
||||||
|
}
|
||||||
|
record._clean_built_form()
|
||||||
|
|
||||||
|
assert record.prepared_epc["built-form"] == "End-Terrace"
|
||||||
|
|
||||||
|
def test_clean_floor_area_valid(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"total-floor-area": "120.5"
|
||||||
|
}
|
||||||
|
record._clean_floor_area()
|
||||||
|
|
||||||
|
assert record.prepared_epc["total-floor-area"] == 120.5
|
||||||
|
|
||||||
|
def test_clean_floor_area_empty(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"total-floor-area": ""
|
||||||
|
}
|
||||||
|
# We have no known case of missing floor area
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
record._clean_floor_area()
|
||||||
|
|
||||||
|
def test_clean_heat_loss_corridor_valid(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"heat-loss-corridor": "unheated corridor",
|
||||||
|
"unheated-corridor-length": ""
|
||||||
|
}
|
||||||
|
record._clean_heat_loss_corridor()
|
||||||
|
|
||||||
|
assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
|
||||||
|
|
||||||
|
def test_clean_heat_loss_corridor_anomaly(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
# Assuming "InvalidCorridor" is an anomaly
|
||||||
|
record.prepared_epc = {
|
||||||
|
"heat-loss-corridor": "InvalidCorridor",
|
||||||
|
"unheated-corridor-length": ""
|
||||||
|
}
|
||||||
|
record._clean_heat_loss_corridor()
|
||||||
|
|
||||||
|
assert record.prepared_epc["heat-loss-corridor"] == "no corridor"
|
||||||
|
|
||||||
|
def test_clean_mains_gas_valid(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"mains-gas-flag": "Y"
|
||||||
|
}
|
||||||
|
record._clean_mains_gas()
|
||||||
|
|
||||||
|
assert record.prepared_epc["mains-gas-flag"] is True
|
||||||
|
|
||||||
|
def test_clean_mains_gas_anomaly(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"mains-gas-flag": "InvalidValue"
|
||||||
|
}
|
||||||
|
# It should always be Y or N or an anomally value
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
record._clean_mains_gas()
|
||||||
|
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"mains-gas-flag": random.choice(list(DATA_ANOMALY_MATCHES))
|
||||||
|
}
|
||||||
|
record._clean_mains_gas()
|
||||||
|
|
||||||
|
assert record.prepared_epc["mains-gas-flag"] is None
|
||||||
|
|
||||||
|
def test_clean_solar_hot_water_valid(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"solar-water-heating-flag": "Y"
|
||||||
|
}
|
||||||
|
record._clean_solar_hot_water()
|
||||||
|
|
||||||
|
assert record.prepared_epc["solar-water-heating-flag"] is True
|
||||||
|
|
||||||
|
def test_clean_solar_hot_water_empty(self, cleaning_data):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"solar-water-heating-flag": ""
|
||||||
|
}
|
||||||
|
record._clean_solar_hot_water()
|
||||||
|
|
||||||
|
assert record.prepared_epc["solar-water-heating-flag"] is None
|
||||||
|
|
||||||
|
def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1)
|
||||||
|
record.prepared_epc = {
|
||||||
|
"fixed-lighting-outlets-count": "5"
|
||||||
|
}
|
||||||
|
record._clean_number_lighting_outlets()
|
||||||
|
|
||||||
|
assert record.prepared_epc["fixed-lighting-outlets-count"] == 5.0
|
||||||
|
|
||||||
|
def test_clean_number_lighting_outlets_empty(self, cleaning_data, epc_records_1):
|
||||||
|
record = EPCRecord(cleaning_data=cleaning_data)
|
||||||
|
record.run_mode = "newdata"
|
||||||
|
record.prepared_epc = {
|
||||||
|
"fixed-lighting-outlets-count": "",
|
||||||
|
"property-type": "Flat",
|
||||||
|
"built-form": "Semi-Detached",
|
||||||
|
"construction-age-band": "England and Wales: 1900-1929",
|
||||||
|
"local-authority": "E08000025",
|
||||||
|
"number-habitable-rooms": "4",
|
||||||
|
"number-heated-rooms": "4",
|
||||||
|
}
|
||||||
|
record.old_data = []
|
||||||
|
record.full_sap_epc = []
|
||||||
|
record._clean_number_lighting_outlets()
|
||||||
|
|
||||||
|
assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue