From 77c20d981aa120532aed843c1c395f8326aee862 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 8 Jun 2023 17:12:52 +0100 Subject: [PATCH] created test_cleaning_utils and moved out reusable function --- epc_data/EpcClean.py | 19 ++++++--------- epc_data/app.py | 7 ++++-- epc_data/cleaning/Floor.py | 28 ++++++++++++++++++++++ epc_data/cleaning/Roof.py | 34 ++++----------------------- epc_data/cleaning/cleaning_utils.py | 29 +++++++++++++++++++++++ epc_data/tests/test_clean_roof.py | 4 ---- epc_data/tests/test_cleaning_utils.py | 6 +++++ 7 files changed, 79 insertions(+), 48 deletions(-) create mode 100644 epc_data/cleaning/Floor.py create mode 100644 epc_data/cleaning/cleaning_utils.py create mode 100644 epc_data/tests/test_cleaning_utils.py diff --git a/epc_data/EpcClean.py b/epc_data/EpcClean.py index a70d3d25..7a520264 100644 --- a/epc_data/EpcClean.py +++ b/epc_data/EpcClean.py @@ -2,6 +2,7 @@ from typing import List, Dict, Any from collections import Counter from epc_data.cleaning.Roof import CleanRoof +from epc_data.cleaning.Floor import CleanFloor class EpcClean: @@ -35,15 +36,9 @@ class EpcClean: for field in self.CLEANING_FIELDS: self.unique_vals[field] = Counter([v[field] for v in self.data]) - self.clean_roof() + self.clean_wrapper(field="roof-description", cleaning_cls=CleanRoof) - # for description in self.unique_vals["floor-description"].keys(): - # self.cleaned["floor-description"].append( - # { - # "original_description": description, - # **self.clean_floor(description) - # } - # ) + # self.clean_wrapper(field="floor-description", cleaning_cls=CleanFloor) def _init_empty_cleaned_obj(self) -> None: """ @@ -51,11 +46,11 @@ class EpcClean: """ self.cleaned = {field: [] for field in self.CLEANING_FIELDS} - def clean_roof(self): - for description in self.unique_vals["roof-description"].keys(): - self.cleaned["roof-description"].append( + def clean_wrapper(self, field, cleaning_cls): + for description in self.unique_vals[field].keys(): + self.cleaned[field].append( { "original_description": description, - **CleanRoof(description).clean() + **cleaning_cls(description).clean() } ) diff --git a/epc_data/app.py b/epc_data/app.py index 6c3b48d1..2006213f 100644 --- a/epc_data/app.py +++ b/epc_data/app.py @@ -38,5 +38,8 @@ def handler(): cleaner.clean() - - + # For testing: + from epc_data.cleaning.Floor import CleanFloor + descriptions = {x["floor-description"] for x in data} + for description in descriptions: + res = CleanFloor(description).clean() diff --git a/epc_data/cleaning/Floor.py b/epc_data/cleaning/Floor.py new file mode 100644 index 00000000..e05892fa --- /dev/null +++ b/epc_data/cleaning/Floor.py @@ -0,0 +1,28 @@ +from typing import Dict, Union + + +class CleanFloor: + + def __init__(self, description): + self.description: str = description + + def clean(self) -> Dict[str, Union[str, bool, int, None]]: + """ + + :return: + """ + + description_lower = self.description.lower().strip() + + if "another dwelling below" in description_lower: + return self._make_clean_output( + has_dwelling_below=True, + ) + + raise Exception("EKJH") + + @staticmethod + def _make_clean_output(has_dwelling_below): + return { + "has_dwelling_below": has_dwelling_below, + } diff --git a/epc_data/cleaning/Roof.py b/epc_data/cleaning/Roof.py index 7793d4e8..8f2144c9 100644 --- a/epc_data/cleaning/Roof.py +++ b/epc_data/cleaning/Roof.py @@ -1,10 +1,8 @@ -import re -from typing import Dict, Union, Tuple, Optional +from typing import Dict, Union, Optional +from epc_data.cleaning.cleaning_utils import extract_thermal_transmittence class CleanRoof: - U_VALUE_REGEX = re.compile(r"(\d+\.\d+)") - UNIT_REGEX = re.compile(r"(w/m-¦k)") def __init__(self, description): """ @@ -52,10 +50,10 @@ class CleanRoof: if "insulation" in description_lower or "insulated" in description_lower: insulation_thickness = self._find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat) elif "thermal transmittance" in description_lower: - thermal_transmittence, thermal_transmittence_unit = self._extract_thermal_transmittence(description_lower) + thermal_transmittence, thermal_transmittence_unit = extract_thermal_transmittence(description_lower) elif is_thatched: # Search for these features: - thermal_transmittence, thermal_transmittence_unit = self._extract_thermal_transmittence(description_lower) + thermal_transmittence, thermal_transmittence_unit = extract_thermal_transmittence(description_lower) insulation_thickness = self._find_insulation_thickness( description_lower, is_pitched, is_roof_room, is_flat ) @@ -192,27 +190,3 @@ class CleanRoof: return cls._search_split_description(desc) return None - - @classmethod - def _extract_thermal_transmittence(cls, description_lower: str) -> Tuple[Union[float, None], Union[str, None]]: - """ - Extracts thermal transmittance from the description. - - :param description_lower: Lowercase description. - :return: Tuple containing U-value and unit. - """ - # Find U-value - u_value = re.search(cls.U_VALUE_REGEX, description_lower) - if u_value is not None: - u_value = float(u_value.group(1)) - else: - u_value = None - - # Find unit - unit = re.search(cls.UNIT_REGEX, description_lower) - if unit is not None: - unit = unit.group(1) - else: - unit = None - - return u_value, unit diff --git a/epc_data/cleaning/cleaning_utils.py b/epc_data/cleaning/cleaning_utils.py new file mode 100644 index 00000000..d7196836 --- /dev/null +++ b/epc_data/cleaning/cleaning_utils.py @@ -0,0 +1,29 @@ +import re +from typing import Tuple, Union + +THERMAL_TRANSMITTANCE_U_VALUE_REGEX = re.compile(r"(\d+\.\d+)") +THERMAL_TRANSMITTANCE_UNIT_REGEX = re.compile(r"(w/m-¦k)") + + +def extract_thermal_transmittence(description_lower: str) -> Tuple[Union[float, None], Union[str, None]]: + """ + Extracts thermal transmittance from the description. + + :param description_lower: Lowercase description. + :return: Tuple containing U-value and unit. + """ + # Find U-value + u_value = re.search(THERMAL_TRANSMITTANCE_U_VALUE_REGEX, description_lower) + if u_value is not None: + u_value = float(u_value.group(1)) + else: + u_value = None + + # Find unit + unit = re.search(THERMAL_TRANSMITTANCE_UNIT_REGEX, description_lower) + if unit is not None: + unit = unit.group(1) + else: + unit = None + + return u_value, unit diff --git a/epc_data/tests/test_clean_roof.py b/epc_data/tests/test_clean_roof.py index a93c0564..83be314d 100644 --- a/epc_data/tests/test_clean_roof.py +++ b/epc_data/tests/test_clean_roof.py @@ -41,10 +41,6 @@ class TestEpcClean: def test__find_insulation_thickness(self): assert CleanRoof._find_insulation_thickness("no insulation", False, False, False) == 0 - def test__extract_thermal_transmittence(self): - description = "U-value of 2.3 w/m-¦k" - assert CleanRoof._extract_thermal_transmittence(description) == (2.3, "w/m-¦k") - def test_clean_roof(self): result = CleanRoof('Pitched, 270 mm loft insulation').clean() diff --git a/epc_data/tests/test_cleaning_utils.py b/epc_data/tests/test_cleaning_utils.py new file mode 100644 index 00000000..d5b77916 --- /dev/null +++ b/epc_data/tests/test_cleaning_utils.py @@ -0,0 +1,6 @@ +from epc_data.cleaning.cleaning_utils import extract_thermal_transmittence + + +def test__extract_thermal_transmittence(): + description = "U-value of 2.3 w/m-¦k" + assert extract_thermal_transmittence(description) == (2.3, "w/m-¦k")