adding some additional case coverage to epc cleaner

This commit is contained in:
Khalim Conn-Kowlessar 2024-09-24 18:57:10 +01:00
parent 3957f0fcf8
commit e942d6e700
4 changed files with 6 additions and 7 deletions

View file

@ -44,8 +44,7 @@ def app():
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
# For this cleaning dataset, let's try and use all EPCs
# data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[data["lodgement-date"] >= "2011-01-01"]
# Convert to list of dictioaries as returned by the api
data = data.to_dict("records")

View file

@ -96,6 +96,7 @@ class HotWaterAttributes(Definitions):
WELSH_TEXT = {
"ogçör brif system": "from main system",
"o r brif system": "from main system",
"ogçör brif system, adfer gwres nwyon ffliw": "from main system, flue gas heat recovery",
"bwyler/cylchredydd nwy": "gas boiler/circulator",
"ogçör brif system, dim thermostat ar y silindr": "from main system, no cylinder thermostat",

View file

@ -6,7 +6,7 @@ from etl.epc_clean.epc_attributes.attribute_utils import extract_component_types
class RoofAttributes(Definitions):
ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed']
DWELLING_ABOVE = ["another dwelling above", "other premises above"]
DWELLING_ABOVE = ["another dwelling above", "other premises above", "other dwelling above"]
WELSH_TEXT = {
"ar oleddf, dim inswleiddio": "pitched, no insulation",
@ -113,9 +113,8 @@ class RoofAttributes(Definitions):
# roof type
result, description = extract_component_types(result, description, list_of_components=self.ROOF_TYPES)
result["has_dwelling_above"] = (
"another dwelling above" in description or "other premises above" in description
)
result["has_dwelling_above"] = any([x in description for x in self.DWELLING_ABOVE])
for dwelling_above in self.DWELLING_ABOVE:
description = description.replace(dwelling_above, "")

View file

@ -397,7 +397,7 @@ clean_roof_test_cases = [
'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': 'none'},
{'original_description': 'Average thermal transmittance 0.80 W/m+é-¦K', 'thermal_transmittance': 0.8,
'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False,
'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False,
'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}
]