diff --git a/.idea/Model.iml b/.idea/Model.iml index c66b4576..a7ea3cf1 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 00000000..a55e7a17 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 46ea892f..242c02bb 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,7 @@ - + + + \ No newline at end of file diff --git a/epc_data/EpcClean.py b/epc_data/EpcClean.py index cdd29c14..a10edb84 100644 --- a/epc_data/EpcClean.py +++ b/epc_data/EpcClean.py @@ -2,6 +2,8 @@ import re from typing import List, Dict, Any, Union, Tuple from collections import Counter +import pandas as pd + class EpcClean: """ @@ -39,8 +41,12 @@ class EpcClean: for description in self.unique_vals["roof-description"].keys(): self.cleaned["roof-description"].append( - {"original": description, "cleaned": self.clean_roof(description)} + { + "original_description": description, + **self.clean_roof(description) + } ) + df = pd.DataFrame(self.cleaned["roof-description"]) def _init_empty_cleaned_obj(self) -> None: """ @@ -181,8 +187,10 @@ class EpcClean: insulation_thickness = self._find_insulation_thickness( description_lower, is_pitched, is_roof_room, is_flat ) + elif description_lower == "pitched": + thermal_transmittence, thermal_transmittence_unit, insulation_thickness = None, None, None else: - raise NotImplementedError("Not handles this") + raise NotImplementedError("Not handled this") attributes = { "is_pitched": is_pitched, diff --git a/epc_data/README.md b/epc_data/README.md index 70ffa21e..17f1227f 100644 --- a/epc_data/README.md +++ b/epc_data/README.md @@ -33,3 +33,17 @@ To install project dependencies navigate to /epc_data and run ```commandline pip install -r requirements.txt ``` + +### Running Tests + +If you are not in a virtual environment, activate it with + +```commandline +conda activate envName +``` + +Then run + +```commandline +python -m pytest +``` diff --git a/epc_data/app.py b/epc_data/app.py index be99ff2b..0ff21400 100644 --- a/epc_data/app.py +++ b/epc_data/app.py @@ -1,3 +1,5 @@ +import pickle + from tqdm import tqdm from epc_data.temp_inputs import input_data diff --git a/epc_data/requirements.txt b/epc_data/requirements.txt index dbad3478..908ce76c 100644 --- a/epc_data/requirements.txt +++ b/epc_data/requirements.txt @@ -2,4 +2,5 @@ epc-api-python python-dotenv tqdm pandas -mypy \ No newline at end of file +mypy +pytest \ No newline at end of file diff --git a/epc_data/tests/__init__.py b/epc_data/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/epc_data/tests/test_EpcClean.py b/epc_data/tests/test_EpcClean.py new file mode 100644 index 00000000..5a2086cd --- /dev/null +++ b/epc_data/tests/test_EpcClean.py @@ -0,0 +1,132 @@ +import pytest +import pickle +from epc_data.EpcClean import EpcClean +from pathlib import Path + +# For local testing +if __file__ == "": + input_data_path = Path("./epc_data/tests/test_data/EpcClean_inputs.obj") +else: + current_file_path = Path(__file__) + input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj' + + +# +# @pytest.fixture +# def data(): +# print("WOW") +# print(input_data_path) +# with open(input_data_path, 'rb') as f: +# data = pickle.load(f) +# return data +# +# +# def test_clean(data): +# epc = EpcClean(data) +# epc.clean() +# assert len(epc.cleaned["roof-description"]) == len(epc.unique_vals["roof-description"]) +# +# +# def test_clean_roof(data): +# epc = EpcClean(data) +# result = epc.clean_roof('Pitched, 270 mm loft insulation') +# +# # change the expected output based on your requirement +# expected_output = { +# "is_pitched": True, +# "is_roof_room": False, +# "has_loft": True, +# "insulation_thickness": 270, +# "has_dwelling_above": False, +# "assumed": False, +# "is_flat": False, +# "thermal_transmittence": None, +# "thermal_transmittence_unit": None +# } +# +# assert result == expected_output +# +# +# def test_clean_roof_with_dwelling_above(data): +# epc = EpcClean(data) +# result = epc.clean_roof('(another dwelling above)') +# +# expected_output = { +# "is_pitched": False, +# "is_roof_room": False, +# "has_loft": False, +# "insulation_thickness": 0, +# "has_dwelling_above": True, +# "assumed": False, +# "is_flat": False, +# "is_thatched": False, +# "thermal_transmittence": None, +# "thermal_transmittence_unit": None, +# } +# +# assert result == expected_output + + +class TestEpcClean: + @pytest.fixture(autouse=True) + def load_data(self): + with open(input_data_path, "rb") as file: + self.data = pickle.load(file) + self.ec = EpcClean(self.data) + + def test_clean(self): + self.ec.clean() + assert len(self.ec.cleaned["roof-description"]) == len(self.ec.unique_vals["roof-description"]) + + def test__init_empty_cleaned_obj(self): + self.ec._init_empty_cleaned_obj() + assert all([len(values) == 0 for values in self.ec.cleaned.values()]) + + def test__search_split_roof_description(self): + assert self.ec._search_split_roof_description("insulated") == "average" + assert self.ec._search_split_roof_description("limited") == "below average" + with pytest.raises(NotImplementedError): + self.ec._search_split_roof_description("unknown") + + def test__find_insulation_thickness(self): + assert self.ec._find_insulation_thickness("no insulation", False, False, False) == 0 + + def test__extract_thermal_transmittence(self): + description = "U-value of 2.3 w/m-¦k" + assert self.ec._extract_thermal_transmittence(description) == (2.3, "w/m-¦k") + + def test_clean_roof(self): + result = self.ec.clean_roof('Pitched, 270 mm loft insulation') + + # change the expected output based on your requirement + expected_output = { + "is_pitched": True, + "is_roof_room": False, + "has_loft": True, + "insulation_thickness": 270, + "has_dwelling_above": False, + "assumed": False, + "is_flat": False, + "thermal_transmittence": None, + "thermal_transmittence_unit": None + } + + assert result == expected_output + + def test_clean_roof_with_dwelling_above(self): + result = self.ec.clean_roof('(another dwelling above)') + + expected_output = { + "is_pitched": False, + "is_roof_room": False, + "has_loft": False, + "insulation_thickness": 0, + "has_dwelling_above": True, + "assumed": False, + "is_flat": False, + "is_thatched": False, + "thermal_transmittence": None, + "thermal_transmittence_unit": None, + } + + assert result == expected_output diff --git a/epc_data/tests/test_data/EpcClean_inputs.obj b/epc_data/tests/test_data/EpcClean_inputs.obj new file mode 100644 index 00000000..817cb6f4 Binary files /dev/null and b/epc_data/tests/test_data/EpcClean_inputs.obj differ