From ad3ee6dde54dca2fac4672e6ecbf9c30a8459ea7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 11 Sep 2023 15:28:47 +0100 Subject: [PATCH] attributes welsh translations and covering some missing cases --- model_data/app.py | 2 +- .../epc_attributes/HotWaterAttributes.py | 4 ++- .../epc_attributes/MainheatAttributes.py | 23 +++++++++++++++++ model_data/epc_attributes/RoofAttributes.py | 19 +++++++++----- model_data/epc_attributes/WindowAttributes.py | 3 +++ .../test_hot_water_attributes_cases.py | 9 +++++++ .../test_mainheat_attributes_cases.py | 25 +++++++++++++++++++ .../test_data/test_window_attributes_cases.py | 6 +++++ 8 files changed, 83 insertions(+), 8 deletions(-) diff --git a/model_data/app.py b/model_data/app.py index e0ca5cc7..5106a0e4 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -36,7 +36,7 @@ def app(): cleaner = EpcClean([]) epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()] - for directory in tqdm(epc_directories[140:]): + for directory in tqdm(epc_directories): data = pd.read_csv(directory / "certificates.csv", low_memory=False) # Rename the columns to the same format as the api returns data.columns = [c.replace("_", "-").lower() for c in data.columns] diff --git a/model_data/epc_attributes/HotWaterAttributes.py b/model_data/epc_attributes/HotWaterAttributes.py index 0442b65d..d8669f9d 100644 --- a/model_data/epc_attributes/HotWaterAttributes.py +++ b/model_data/epc_attributes/HotWaterAttributes.py @@ -108,7 +108,9 @@ class HotWaterAttributes(Definitions): "popty estynedig olew, dim thermostat ar y silindr": "oil range cooker, no cylinder thermostat", "cynllun cymunedol": "community scheme", "nwy wrth fwy nag un pwynt": "gas multipoint", - "popty estynedig olew": "oil range cooker" + "popty estynedig olew": "oil range cooker", + "dim system ar gael rhagdybir bod twymwr tanddwr trydan": "no system present electric immersion assumed", + "o'r brif system, dim thermostat ar y silindr": "from main system, no cylinder thermostat" } def __init__(self, description: str): diff --git a/model_data/epc_attributes/MainheatAttributes.py b/model_data/epc_attributes/MainheatAttributes.py index 74028ab1..569935b8 100644 --- a/model_data/epc_attributes/MainheatAttributes.py +++ b/model_data/epc_attributes/MainheatAttributes.py @@ -43,6 +43,7 @@ class MainHeatAttributes(Definitions): "dim system ar gael, rhagdybir bod gwresogyddion trydan": "no system present, electric heaters assumed", "gwresogyddion ystafell, glo carreg": "room heaters, coal", "pwmp gwres sygçön tarddu yn yr awyr, rheiddiaduron, trydan": "air source heat pump, radiators, electric", + "gwresogyddion ystafell, nwy prif gyflenwad": "room heaters, mains gas", } REMAP = { @@ -81,6 +82,22 @@ class MainHeatAttributes(Definitions): ): raise ValueError('Invalid description') + def process_edge_cases(self, result) -> (dict, bool): + """ + We handle some edge cases that will cause issues, for example descriptions that are missing a + heating system + :return: truple containing dictionary result, and boolean is_edge_case + """ + + edge_cases = [", underfloor, electric"] + if self.description not in edge_cases: + return result, False + + if self.description == ", underfloor, electric": + result["has_electric"] = True + result['has_underfloor_heating'] = True + return result, True + def process(self) -> Dict[str, Union[str, bool]]: result: Dict[str, Union[str, bool]] = {f'has_{ds.replace(" ", "_")}': False for ds in self.DISTRIBUTION_SYSTEMS} @@ -92,11 +109,17 @@ class MainHeatAttributes(Definitions): if self.nodata: return result + result, is_edge_case = self.process_edge_cases(result) + if is_edge_case: + return result + description = self.description.split(',') # Process each part separately for part in description: part = part.strip() # remove leading/trailing white spaces + if not part: + continue # Heating Systems process_part(result, part, self.HEAT_SYSTEMS, 'has_') diff --git a/model_data/epc_attributes/RoofAttributes.py b/model_data/epc_attributes/RoofAttributes.py index d6ef217f..faaba32d 100644 --- a/model_data/epc_attributes/RoofAttributes.py +++ b/model_data/epc_attributes/RoofAttributes.py @@ -53,17 +53,24 @@ class RoofAttributes(Definitions): search for regular expressions and translate """ - insulation_thickness_match = re.search(r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft", self.description) - insulation_thickness_match2 = re.search(r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", self.description) + loft_insulation_thickness_match = re.search(r"ar oleddf, (\d+ mm) o inswleiddio yn y llofft", self.description) + loft_insulation_thickness_match2 = re.search(r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", self.description) + loft_insulation_thickness_match3 = re.search(r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof", + self.description) uvalue_search = re.search(r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description) # Step 2: Generalized translation with placeholder - if (insulation_thickness_match is not None) | (insulation_thickness_match2 is not None): - if insulation_thickness_match is not None: - insulation_thickness = insulation_thickness_match.group(1) + if (loft_insulation_thickness_match is not None) | \ + (loft_insulation_thickness_match2 is not None) | \ + (loft_insulation_thickness_match3 is not None): + if loft_insulation_thickness_match is not None: + insulation_thickness = loft_insulation_thickness_match.group(1) + elif loft_insulation_thickness_match2 is not None: + insulation_thickness = loft_insulation_thickness_match2.group(1) else: - insulation_thickness = insulation_thickness_match2.group(1) + insulation_thickness = loft_insulation_thickness_match3.group(1) + self.description = f"pitched, {insulation_thickness} loft insulation" elif uvalue_search: uvalue = uvalue_search.group(1) diff --git a/model_data/epc_attributes/WindowAttributes.py b/model_data/epc_attributes/WindowAttributes.py index 2f433eb0..9b794491 100644 --- a/model_data/epc_attributes/WindowAttributes.py +++ b/model_data/epc_attributes/WindowAttributes.py @@ -24,6 +24,9 @@ class WindowAttributes(Definitions): "rhai gwydrau dwbl": "some double glazing", "gwydrau sengl": "single glazed", "ffenestri perfformiad uchel": "high performance glazing", + "gwydrau triphlyg llawn": "fully triple glazed", + "gwydrau triphlyg rhannol": "partial triple glazed", + "gwydrau triphlyg mwyaf": "mostly triple glazed", } def __init__(self, description: str): diff --git a/model_data/tests/test_data/test_hot_water_attributes_cases.py b/model_data/tests/test_data/test_hot_water_attributes_cases.py index aa78d709..5c48f57e 100644 --- a/model_data/tests/test_data/test_hot_water_attributes_cases.py +++ b/model_data/tests/test_data/test_hot_water_attributes_cases.py @@ -206,4 +206,13 @@ hotwater_cases = [ 'tariff_type': None, 'extra_features': None, 'chp_systems': None, 'distribution_system': None, 'no_system_present': None, 'assumed': False, "appliance": "oil range cooker"}, + {'original_description': 'Dim system ar gael: rhagdybir bod twymwr tanddwr trydan', + 'heater_type': 'electric immersion', + 'system_type': None, 'thermostat_characteristics': None, 'heating_scope': None, 'energy_recovery': None, + 'tariff_type': None, 'extra_features': None, 'chp_systems': None, 'distribution_system': None, + 'no_system_present': 'no system present', 'assumed': True, "appliance": None}, + {'original_description': "O'r brif system, dim thermostat ar y silindr", 'heater_type': None, + 'system_type': 'from main system', 'thermostat_characteristics': 'no cylinder thermostat', 'heating_scope': None, + 'energy_recovery': None, 'tariff_type': None, 'extra_features': None, 'chp_systems': None, + 'distribution_system': None, 'no_system_present': None, 'assumed': False, "appliance": None}, ] diff --git a/model_data/tests/test_data/test_mainheat_attributes_cases.py b/model_data/tests/test_data/test_mainheat_attributes_cases.py index 597fa8e8..2b919baa 100644 --- a/model_data/tests/test_data/test_mainheat_attributes_cases.py +++ b/model_data/tests/test_data/test_mainheat_attributes_cases.py @@ -1407,4 +1407,29 @@ mainheat_cases = [ 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, "has_electric_heat_pumps": False, "has_micro-cogeneration": False}, + {'original_description': ', underfloor, electric', 'has_radiators': False, 'has_fan_coil_units': False, + 'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False, + 'has_pipes_in_concrete_slab': False, 'has_boiler': False, 'has_air_source_heat_pump': False, + 'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False, + 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, 'has_community_scheme': False, + 'has_ground_source_heat_pump': False, 'has_no_system_present': False, 'has_portable_electric_heaters': False, + 'has_water_source_heat_pump': False, 'has_electric': True, 'has_mains_gas': False, 'has_wood_logs': False, + 'has_LPG': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite': False, + 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_assumed': False, + 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': True, + "has_electric_heat_pumps": False, + "has_micro-cogeneration": False}, + {'original_description': 'Gwresogyddion ystafell, nwy prif gyflenwad', 'has_radiators': False, + 'has_fan_coil_units': False, + 'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False, + 'has_pipes_in_concrete_slab': False, 'has_boiler': False, 'has_air_source_heat_pump': False, + 'has_room_heaters': True, 'has_electric_storage_heaters': False, 'has_warm_air': False, + 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, 'has_community_scheme': False, + 'has_ground_source_heat_pump': False, 'has_no_system_present': False, 'has_portable_electric_heaters': False, + 'has_water_source_heat_pump': False, 'has_electric': False, 'has_mains_gas': True, 'has_wood_logs': False, + 'has_LPG': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite': False, + 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_assumed': False, + 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, + "has_electric_heat_pumps": False, + "has_micro-cogeneration": False}, ] diff --git a/model_data/tests/test_data/test_window_attributes_cases.py b/model_data/tests/test_data/test_window_attributes_cases.py index 150eb757..218019d8 100644 --- a/model_data/tests/test_data/test_window_attributes_cases.py +++ b/model_data/tests/test_data/test_window_attributes_cases.py @@ -16,6 +16,8 @@ windows_cases = [ 'glazing_type': 'secondary', 'no_data': False}, {'original_description': 'Mostly triple glazing', 'has_glazing': True, 'glazing_coverage': 'most', 'glazing_type': 'triple', 'no_data': False}, + {'original_description': 'Gwydrau triphlyg mwyaf', 'has_glazing': True, 'glazing_coverage': 'most', + 'glazing_type': 'triple', 'no_data': False}, {'original_description': 'Multiple glazing throughout', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'multiple', 'no_data': False}, {'original_description': 'Partial double glazing', 'has_glazing': True, 'glazing_coverage': 'partial', @@ -26,6 +28,8 @@ windows_cases = [ 'glazing_type': 'secondary', 'no_data': False}, {'original_description': 'Partial triple glazing', 'has_glazing': True, 'glazing_coverage': 'partial', 'glazing_type': 'triple', 'no_data': False}, + {'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial', + 'glazing_type': 'triple', 'no_data': False}, {'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single', 'no_data': False}, {'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial', @@ -48,4 +52,6 @@ windows_cases = [ 'glazing_type': 'high performance', 'no_data': False}, {'original_description': 'Rhai gwydrau dwbl', 'has_glazing': True, 'glazing_coverage': 'partial', 'glazing_type': 'double', 'no_data': False}, + {'original_description': 'Gwydrau triphlyg llawn', 'has_glazing': True, 'glazing_coverage': 'full', + 'glazing_type': 'triple', 'no_data': False}, ]