diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index 1dfdd452..a3c1018f 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -44,8 +44,7 @@ def app(): # Rename the columns to the same format as the api returns data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold - # For this cleaning dataset, let's try and use all EPCs - # data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE] + data = data[data["lodgement-date"] >= "2011-01-01"] # Convert to list of dictioaries as returned by the api data = data.to_dict("records") diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py index f9cec48b..67f5bebd 100644 --- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py +++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py @@ -96,6 +96,7 @@ class HotWaterAttributes(Definitions): WELSH_TEXT = { "ogçör brif system": "from main system", + "o r brif system": "from main system", "ogçör brif system, adfer gwres nwyon ffliw": "from main system, flue gas heat recovery", "bwyler/cylchredydd nwy": "gas boiler/circulator", "ogçör brif system, dim thermostat ar y silindr": "from main system, no cylinder thermostat", diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py index 84d1f3e9..154fe41b 100644 --- a/etl/epc_clean/epc_attributes/RoofAttributes.py +++ b/etl/epc_clean/epc_attributes/RoofAttributes.py @@ -6,7 +6,7 @@ from etl.epc_clean.epc_attributes.attribute_utils import extract_component_types class RoofAttributes(Definitions): ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed'] - DWELLING_ABOVE = ["another dwelling above", "other premises above"] + DWELLING_ABOVE = ["another dwelling above", "other premises above", "other dwelling above"] WELSH_TEXT = { "ar oleddf, dim inswleiddio": "pitched, no insulation", @@ -113,9 +113,8 @@ class RoofAttributes(Definitions): # roof type result, description = extract_component_types(result, description, list_of_components=self.ROOF_TYPES) - result["has_dwelling_above"] = ( - "another dwelling above" in description or "other premises above" in description - ) + result["has_dwelling_above"] = any([x in description for x in self.DWELLING_ABOVE]) + for dwelling_above in self.DWELLING_ABOVE: description = description.replace(dwelling_above, "") diff --git a/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py b/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py index 6b719afd..06c1f078 100644 --- a/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py @@ -397,7 +397,7 @@ clean_roof_test_cases = [ 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}, {'original_description': 'Average thermal transmittance 0.80 W/m+é-¦K', 'thermal_transmittance': 0.8, - 'thermal_transmittance_unit': 'w/m+é-¦k', 'is_pitched': False, 'is_roof_room': False, + 'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None} ]