diff --git a/model_data/app.py b/model_data/app.py index 913ff546..b49e3fd5 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -408,6 +408,7 @@ def handler(): # TODO: Add in the u-values for roofs rather than the description # TODO: Add in the actual property features for walls, floors, roof, not just the u-value # TODO: Think about how we use sap vs rdsap - should we add a feature in the model for transaction-type? + # TODO: Remove cases where descriptions have no data or are error cases # # property type looks okay - we're definitely low on the number of bungalows # number-habitable-rooms & number-heated-rooms is unpopulated so pretty useless atm diff --git a/model_data/epc_attributes/FloorAttributes.py b/model_data/epc_attributes/FloorAttributes.py index 589677d6..c53e1627 100644 --- a/model_data/epc_attributes/FloorAttributes.py +++ b/model_data/epc_attributes/FloorAttributes.py @@ -7,20 +7,26 @@ class FloorAttributes(BaseUtility): DWELLING_BELOW = ["another dwelling below", "other premises below"] FLOOR_TYPES = ["assumed", "to unheated space", "to external air", "suspended", "solid"] + # For the short term, while we are still exploring the data, we maintain a list of error cases which + # we want to ignore and consider as no data. + + OBSERVED_ERRORS = ["Conservatory"] + def __init__(self, description: str): self.description: str = description.lower() - self.nodata = not description or description in self.DATA_ANOMALY_MATCHES + self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or description in self.OBSERVED_ERRORS - if not description or not any( - rt in self.description for rt in self.FLOOR_TYPES + self.DWELLING_BELOW + ["average thermal transmittance"] + if not self.nodata and not any( + rt in self.description for rt in + self.FLOOR_TYPES + self.DWELLING_BELOW + ["average thermal transmittance"] ): raise ValueError('Invalid description') def process(self) -> Dict[str, Union[str, bool, int, None]]: if self.nodata: - return {} + return {"no_data": True} result: Dict[str, Union[float, str, bool, None]] = {} description = self.description diff --git a/model_data/epc_attributes/RoofAttributes.py b/model_data/epc_attributes/RoofAttributes.py index f0f3e8f6..ec014357 100644 --- a/model_data/epc_attributes/RoofAttributes.py +++ b/model_data/epc_attributes/RoofAttributes.py @@ -15,7 +15,7 @@ class RoofAttributes(BaseUtility): self.description: str = description.lower() self.nodata = not description or description in self.DATA_ANOMALY_MATCHES - + if not self.nodata and not any( rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"] ): diff --git a/model_data/tests/test_floor_attributes.py b/model_data/tests/test_floor_attributes.py index 5cdaa5be..ce17aa3b 100644 --- a/model_data/tests/test_floor_attributes.py +++ b/model_data/tests/test_floor_attributes.py @@ -12,8 +12,10 @@ class TestCleanFloor: assert floor_attr.description == valid_description.lower() # Test initialization with an empty description - with pytest.raises(ValueError): - FloorAttributes('') + empty = FloorAttributes('') + assert empty.nodata + output = empty.process() + assert output == {"no_data": True} # Test initialization with a description that contains none of the keywords with pytest.raises(ValueError): @@ -33,7 +35,6 @@ class TestCleanFloor: def test_invalid_description(self): # Test that invalid descriptions raise a ValueError invalid_descriptions = [ - "", "invalid description", "description with no known floor types or thermal transmittance", ] @@ -47,3 +48,8 @@ class TestCleanFloor: invalid_description = 'description without keywords' with pytest.raises(ValueError): FloorAttributes(invalid_description) + + def test_known_errors(self): + error_description = "Conservatory" + obj = FloorAttributes(error_description) + assert obj.nodata