mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Broken up the cleaning class
This commit is contained in:
parent
872591505f
commit
e927e6d41f
4 changed files with 248 additions and 224 deletions
|
|
@ -1,16 +1,14 @@
|
||||||
import re
|
from typing import List, Dict, Any
|
||||||
from typing import List, Dict, Any, Union, Tuple, Optional
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
|
from epc_data.cleaning.Roof import CleanRoof
|
||||||
|
|
||||||
|
|
||||||
class EpcClean:
|
class EpcClean:
|
||||||
"""
|
"""
|
||||||
Container for methods which we utilise for cleaning EPC data
|
Container for methods which we utilise for cleaning EPC data
|
||||||
"""
|
"""
|
||||||
|
|
||||||
U_VALUE_REGEX = re.compile(r"(\d+\.\d+)")
|
|
||||||
UNIT_REGEX = re.compile(r"(w/m-¦k)")
|
|
||||||
|
|
||||||
CLEANING_FIELDS: List[str] = [
|
CLEANING_FIELDS: List[str] = [
|
||||||
"roof-description",
|
"roof-description",
|
||||||
"floor-description",
|
"floor-description",
|
||||||
|
|
@ -37,13 +35,15 @@ class EpcClean:
|
||||||
for field in self.CLEANING_FIELDS:
|
for field in self.CLEANING_FIELDS:
|
||||||
self.unique_vals[field] = Counter([v[field] for v in self.data])
|
self.unique_vals[field] = Counter([v[field] for v in self.data])
|
||||||
|
|
||||||
for description in self.unique_vals["roof-description"].keys():
|
self.clean_roof()
|
||||||
self.cleaned["roof-description"].append(
|
|
||||||
{
|
# for description in self.unique_vals["floor-description"].keys():
|
||||||
"original_description": description,
|
# self.cleaned["floor-description"].append(
|
||||||
**self.clean_roof(description)
|
# {
|
||||||
}
|
# "original_description": description,
|
||||||
)
|
# **self.clean_floor(description)
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
def _init_empty_cleaned_obj(self) -> None:
|
def _init_empty_cleaned_obj(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
|
@ -51,206 +51,11 @@ class EpcClean:
|
||||||
"""
|
"""
|
||||||
self.cleaned = {field: [] for field in self.CLEANING_FIELDS}
|
self.cleaned = {field: [] for field in self.CLEANING_FIELDS}
|
||||||
|
|
||||||
@staticmethod
|
def clean_roof(self):
|
||||||
def _search_split_roof_description(desc: str) -> str:
|
for description in self.unique_vals["roof-description"].keys():
|
||||||
"""
|
self.cleaned["roof-description"].append(
|
||||||
Searches roof descriptions and looks for key words, determining a description about the roof's insulation.
|
{
|
||||||
|
"original_description": description,
|
||||||
:param desc: Description to be searched.
|
**CleanRoof(description).clean()
|
||||||
:return: Result of the search.
|
}
|
||||||
"""
|
|
||||||
if desc == "insulated":
|
|
||||||
return "average"
|
|
||||||
if desc == "limited":
|
|
||||||
return "below average"
|
|
||||||
raise NotImplementedError("Handle me")
|
|
||||||
|
|
||||||
def _find_insulation_thickness(
|
|
||||||
self, description_lower: str, is_pitched: bool, is_roof_room: bool, is_flat: bool
|
|
||||||
) -> Union[int, str, None]:
|
|
||||||
"""
|
|
||||||
Finds insulation thickness in the description.
|
|
||||||
|
|
||||||
:param description_lower: Lowercase description.
|
|
||||||
:param is_pitched: Whether the roof is pitched.
|
|
||||||
:param is_roof_room: Whether there is a room in the roof.
|
|
||||||
:param is_flat: Whether the roof is flat.
|
|
||||||
:return: Insulation thickness if found, else None.
|
|
||||||
"""
|
|
||||||
if "no insulation" in description_lower:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if is_pitched:
|
|
||||||
try:
|
|
||||||
thickness = description_lower.split("pitched,")[-1].split("mm")[0].strip()
|
|
||||||
if "+" in thickness:
|
|
||||||
return thickness
|
|
||||||
try:
|
|
||||||
return int(thickness)
|
|
||||||
except ValueError as int_error:
|
|
||||||
raise ValueError(int_error)
|
|
||||||
except ValueError as _:
|
|
||||||
if "invalid input" in description_lower:
|
|
||||||
return None
|
|
||||||
desc = description_lower.split("pitched,")[-1].strip().split(" ")[0]
|
|
||||||
return self._search_split_roof_description(desc)
|
|
||||||
|
|
||||||
if is_roof_room:
|
|
||||||
desc_split_lookup = {
|
|
||||||
"ceiling insulated": "average",
|
|
||||||
"thatched": "average",
|
|
||||||
}
|
|
||||||
# Just search for specific phrases
|
|
||||||
desc_split = description_lower.split("roof room(s),")[-1].strip()
|
|
||||||
res = desc_split_lookup.get(desc_split)
|
|
||||||
if res:
|
|
||||||
return res
|
|
||||||
|
|
||||||
desc = desc_split.split(" ")[0]
|
|
||||||
return self._search_split_roof_description(desc)
|
|
||||||
|
|
||||||
if is_flat:
|
|
||||||
# Just search for specific phrases
|
|
||||||
desc = description_lower.split("flat,")[-1].lstrip().split(" ")[0]
|
|
||||||
return self._search_split_roof_description(desc)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _extract_thermal_transmittence(self, description_lower: str) -> Tuple[Union[float, None], Union[str, None]]:
|
|
||||||
"""
|
|
||||||
Extracts thermal transmittance from the description.
|
|
||||||
|
|
||||||
:param description_lower: Lowercase description.
|
|
||||||
:return: Tuple containing U-value and unit.
|
|
||||||
"""
|
|
||||||
# Find U-value
|
|
||||||
u_value = re.search(self.U_VALUE_REGEX, description_lower)
|
|
||||||
if u_value is not None:
|
|
||||||
u_value = float(u_value.group(1))
|
|
||||||
else:
|
|
||||||
u_value = None
|
|
||||||
|
|
||||||
# Find unit
|
|
||||||
unit = re.search(self.UNIT_REGEX, description_lower)
|
|
||||||
if unit is not None:
|
|
||||||
unit = unit.group(1)
|
|
||||||
else:
|
|
||||||
unit = None
|
|
||||||
|
|
||||||
return u_value, unit
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _make_clean_roof_output(
|
|
||||||
is_valid: bool,
|
|
||||||
at_rafters: bool,
|
|
||||||
is_pitched: bool,
|
|
||||||
is_roof_room: bool,
|
|
||||||
has_loft: bool,
|
|
||||||
insulation_thickness: str | int | None,
|
|
||||||
has_dwelling_above: bool,
|
|
||||||
assumed: bool,
|
|
||||||
is_flat: bool,
|
|
||||||
is_thatched: bool,
|
|
||||||
thermal_transmittence: Optional[float],
|
|
||||||
thermal_transmittence_unit: Optional[str]
|
|
||||||
) -> Dict[str, Union[bool, str, None]]:
|
|
||||||
"""
|
|
||||||
Utility function to ensure all the keys are present in the output.
|
|
||||||
|
|
||||||
:param is_valid: True if the roof descrption is valid, False otherwise
|
|
||||||
:param at_rafters: True if the insulation is at the rafters, False otherwise
|
|
||||||
:param is_pitched: True if the roof is pitched, False otherwise
|
|
||||||
:param is_roof_room: True if there is a room in the roof, False otherwise
|
|
||||||
:param has_loft: True if there is a loft, False otherwise
|
|
||||||
:param insulation_thickness: The thickness of the insulation
|
|
||||||
:param has_dwelling_above: True if there is a dwelling above, False otherwise
|
|
||||||
:param assumed: True if the roof type was assumed based on property age, False otherwise
|
|
||||||
:param is_flat: True if the roof is flat, False otherwise
|
|
||||||
:param is_thatched: True if the roof is thatched, False otherwise
|
|
||||||
:param thermal_transmittence: The thermal transmittence value of the roof, if known
|
|
||||||
:param thermal_transmittence_unit: The unit of thermal transmittence, if known
|
|
||||||
:return: A dictionary containing all the information about the roof.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return {
|
|
||||||
"is_valid": is_valid,
|
|
||||||
"at_rafters": at_rafters,
|
|
||||||
"is_pitched": is_pitched,
|
|
||||||
"is_roof_room": is_roof_room,
|
|
||||||
"has_loft": has_loft,
|
|
||||||
"insulation_thickness": insulation_thickness,
|
|
||||||
"has_dwelling_above": has_dwelling_above,
|
|
||||||
"assumed": assumed,
|
|
||||||
"is_flat": is_flat,
|
|
||||||
"is_thatched": is_thatched,
|
|
||||||
"thermal_transmittence": thermal_transmittence,
|
|
||||||
"thermal_transmittence_unit": thermal_transmittence_unit
|
|
||||||
}
|
|
||||||
|
|
||||||
def clean_roof(self, description: str) -> Dict[str, Union[str, bool, int, None]]:
|
|
||||||
"""
|
|
||||||
We aim to extract features about the roof, so we can characterise it. We will check:
|
|
||||||
- If the roof is pitched
|
|
||||||
- If there is a room roof
|
|
||||||
- if there is a loft
|
|
||||||
- If it has insulation
|
|
||||||
- if so, what degree of insulation
|
|
||||||
|
|
||||||
:param description: Description of the roof.
|
|
||||||
:return: Dictionary of attributes of the roof.
|
|
||||||
"""
|
|
||||||
description_lower = description.lower().strip()
|
|
||||||
|
|
||||||
if "another dwelling above" in description_lower or "other premises above" in description_lower:
|
|
||||||
return self._make_clean_roof_output(
|
|
||||||
is_valid="invalid" not in description_lower,
|
|
||||||
at_rafters="at rafters" in description_lower,
|
|
||||||
is_pitched=False,
|
|
||||||
is_roof_room=False,
|
|
||||||
has_loft=False,
|
|
||||||
insulation_thickness=0,
|
|
||||||
has_dwelling_above=True,
|
|
||||||
assumed="assumed" in description_lower,
|
|
||||||
is_flat="flat" in description_lower,
|
|
||||||
is_thatched=False,
|
|
||||||
thermal_transmittence=None,
|
|
||||||
thermal_transmittence_unit=None
|
|
||||||
)
|
)
|
||||||
|
|
||||||
is_pitched = "pitched" in description_lower
|
|
||||||
is_roof_room = "roof room" in description_lower
|
|
||||||
has_loft = "loft" in description_lower
|
|
||||||
is_flat = "flat" in description_lower
|
|
||||||
is_thatched = "thatched" in description_lower
|
|
||||||
at_rafters = "at rafters" in description_lower
|
|
||||||
|
|
||||||
thermal_transmittence, thermal_transmittence_unit, insulation_thickness = None, None, None
|
|
||||||
if "insulation" in description_lower or "insulated" in description_lower:
|
|
||||||
insulation_thickness = self._find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat)
|
|
||||||
elif "thermal transmittance" in description_lower:
|
|
||||||
thermal_transmittence, thermal_transmittence_unit = self._extract_thermal_transmittence(description_lower)
|
|
||||||
elif is_thatched:
|
|
||||||
# Search for these features:
|
|
||||||
thermal_transmittence, thermal_transmittence_unit = self._extract_thermal_transmittence(description_lower)
|
|
||||||
insulation_thickness = self._find_insulation_thickness(
|
|
||||||
description_lower, is_pitched, is_roof_room, is_flat
|
|
||||||
)
|
|
||||||
elif description_lower == "pitched":
|
|
||||||
thermal_transmittence, thermal_transmittence_unit, insulation_thickness = None, None, None
|
|
||||||
else:
|
|
||||||
raise NotImplementedError("Not handled this")
|
|
||||||
|
|
||||||
return self._make_clean_roof_output(
|
|
||||||
is_valid="invalid" not in description_lower,
|
|
||||||
at_rafters=at_rafters,
|
|
||||||
is_pitched=is_pitched,
|
|
||||||
is_roof_room=is_roof_room,
|
|
||||||
has_loft=has_loft,
|
|
||||||
insulation_thickness=insulation_thickness,
|
|
||||||
has_dwelling_above=False,
|
|
||||||
assumed="assumed" in description_lower,
|
|
||||||
is_flat=is_flat,
|
|
||||||
is_thatched=is_thatched,
|
|
||||||
thermal_transmittence=thermal_transmittence,
|
|
||||||
thermal_transmittence_unit=thermal_transmittence_unit
|
|
||||||
)
|
|
||||||
|
|
|
||||||
|
|
@ -38,5 +38,5 @@ def handler():
|
||||||
|
|
||||||
cleaner.clean()
|
cleaner.clean()
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
df = pd.DataFrame(cleaner.cleaned["roof-description"])
|
|
||||||
|
|
|
||||||
218
epc_data/cleaning/Roof.py
Normal file
218
epc_data/cleaning/Roof.py
Normal file
|
|
@ -0,0 +1,218 @@
|
||||||
|
import re
|
||||||
|
from typing import Dict, Union, Tuple, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class CleanRoof:
|
||||||
|
U_VALUE_REGEX = re.compile(r"(\d+\.\d+)")
|
||||||
|
UNIT_REGEX = re.compile(r"(w/m-¦k)")
|
||||||
|
|
||||||
|
def __init__(self, description):
|
||||||
|
"""
|
||||||
|
:param description: Description of the roof.
|
||||||
|
"""
|
||||||
|
self.description: str = description
|
||||||
|
|
||||||
|
def clean(self) -> Dict[str, Union[str, bool, int, None]]:
|
||||||
|
"""
|
||||||
|
We aim to extract features about the roof, so we can characterise it. We will check:
|
||||||
|
- If the roof is pitched
|
||||||
|
- If there is a room roof
|
||||||
|
- if there is a loft
|
||||||
|
- If it has insulation
|
||||||
|
- if so, what degree of insulation
|
||||||
|
|
||||||
|
:return: Dictionary of attributes of the roof.
|
||||||
|
"""
|
||||||
|
description_lower = self.description.lower().strip()
|
||||||
|
|
||||||
|
if "another dwelling above" in description_lower or "other premises above" in description_lower:
|
||||||
|
return self._make_clean_output(
|
||||||
|
is_valid="invalid" not in description_lower,
|
||||||
|
at_rafters="at rafters" in description_lower,
|
||||||
|
is_pitched=False,
|
||||||
|
is_roof_room=False,
|
||||||
|
has_loft=False,
|
||||||
|
insulation_thickness=0,
|
||||||
|
has_dwelling_above=True,
|
||||||
|
assumed="assumed" in description_lower,
|
||||||
|
is_flat="flat" in description_lower,
|
||||||
|
is_thatched=False,
|
||||||
|
thermal_transmittence=None,
|
||||||
|
thermal_transmittence_unit=None
|
||||||
|
)
|
||||||
|
|
||||||
|
is_pitched = "pitched" in description_lower
|
||||||
|
is_roof_room = "roof room" in description_lower
|
||||||
|
has_loft = "loft" in description_lower
|
||||||
|
is_flat = "flat" in description_lower
|
||||||
|
is_thatched = "thatched" in description_lower
|
||||||
|
at_rafters = "at rafters" in description_lower
|
||||||
|
|
||||||
|
thermal_transmittence, thermal_transmittence_unit, insulation_thickness = None, None, None
|
||||||
|
if "insulation" in description_lower or "insulated" in description_lower:
|
||||||
|
insulation_thickness = self._find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat)
|
||||||
|
elif "thermal transmittance" in description_lower:
|
||||||
|
thermal_transmittence, thermal_transmittence_unit = self._extract_thermal_transmittence(description_lower)
|
||||||
|
elif is_thatched:
|
||||||
|
# Search for these features:
|
||||||
|
thermal_transmittence, thermal_transmittence_unit = self._extract_thermal_transmittence(description_lower)
|
||||||
|
insulation_thickness = self._find_insulation_thickness(
|
||||||
|
description_lower, is_pitched, is_roof_room, is_flat
|
||||||
|
)
|
||||||
|
elif description_lower == "pitched":
|
||||||
|
thermal_transmittence, thermal_transmittence_unit, insulation_thickness = None, None, None
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Not handled this")
|
||||||
|
|
||||||
|
return self._make_clean_output(
|
||||||
|
is_valid="invalid" not in description_lower,
|
||||||
|
at_rafters=at_rafters,
|
||||||
|
is_pitched=is_pitched,
|
||||||
|
is_roof_room=is_roof_room,
|
||||||
|
has_loft=has_loft,
|
||||||
|
insulation_thickness=insulation_thickness,
|
||||||
|
has_dwelling_above=False,
|
||||||
|
assumed="assumed" in description_lower,
|
||||||
|
is_flat=is_flat,
|
||||||
|
is_thatched=is_thatched,
|
||||||
|
thermal_transmittence=thermal_transmittence,
|
||||||
|
thermal_transmittence_unit=thermal_transmittence_unit
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_clean_output(
|
||||||
|
is_valid: bool,
|
||||||
|
at_rafters: bool,
|
||||||
|
is_pitched: bool,
|
||||||
|
is_roof_room: bool,
|
||||||
|
has_loft: bool,
|
||||||
|
insulation_thickness: str | int | None,
|
||||||
|
has_dwelling_above: bool,
|
||||||
|
assumed: bool,
|
||||||
|
is_flat: bool,
|
||||||
|
is_thatched: bool,
|
||||||
|
thermal_transmittence: Optional[float],
|
||||||
|
thermal_transmittence_unit: Optional[str]
|
||||||
|
) -> Dict[str, Union[bool, str, None]]:
|
||||||
|
"""
|
||||||
|
Utility function to ensure all the keys are present in the output.
|
||||||
|
|
||||||
|
:param is_valid: True if the roof descrption is valid, False otherwise
|
||||||
|
:param at_rafters: True if the insulation is at the rafters, False otherwise
|
||||||
|
:param is_pitched: True if the roof is pitched, False otherwise
|
||||||
|
:param is_roof_room: True if there is a room in the roof, False otherwise
|
||||||
|
:param has_loft: True if there is a loft, False otherwise
|
||||||
|
:param insulation_thickness: The thickness of the insulation
|
||||||
|
:param has_dwelling_above: True if there is a dwelling above, False otherwise
|
||||||
|
:param assumed: True if the roof type was assumed based on property age, False otherwise
|
||||||
|
:param is_flat: True if the roof is flat, False otherwise
|
||||||
|
:param is_thatched: True if the roof is thatched, False otherwise
|
||||||
|
:param thermal_transmittence: The thermal transmittence value of the roof, if known
|
||||||
|
:param thermal_transmittence_unit: The unit of thermal transmittence, if known
|
||||||
|
:return: A dictionary containing all the information about the roof.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return {
|
||||||
|
"is_valid": is_valid,
|
||||||
|
"at_rafters": at_rafters,
|
||||||
|
"is_pitched": is_pitched,
|
||||||
|
"is_roof_room": is_roof_room,
|
||||||
|
"has_loft": has_loft,
|
||||||
|
"insulation_thickness": insulation_thickness,
|
||||||
|
"has_dwelling_above": has_dwelling_above,
|
||||||
|
"assumed": assumed,
|
||||||
|
"is_flat": is_flat,
|
||||||
|
"is_thatched": is_thatched,
|
||||||
|
"thermal_transmittence": thermal_transmittence,
|
||||||
|
"thermal_transmittence_unit": thermal_transmittence_unit
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _search_split_description(desc: str) -> str:
|
||||||
|
"""
|
||||||
|
Searches roof descriptions and looks for key words, determining a description about the roof's insulation.
|
||||||
|
|
||||||
|
:param desc: Description to be searched.
|
||||||
|
:return: Result of the search.
|
||||||
|
"""
|
||||||
|
if desc == "insulated":
|
||||||
|
return "average"
|
||||||
|
if desc == "limited":
|
||||||
|
return "below average"
|
||||||
|
raise NotImplementedError("Handle me")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _find_insulation_thickness(
|
||||||
|
cls, description_lower: str, is_pitched: bool, is_roof_room: bool, is_flat: bool
|
||||||
|
) -> Union[int, str, None]:
|
||||||
|
"""
|
||||||
|
Finds insulation thickness in the description.
|
||||||
|
|
||||||
|
:param description_lower: Lowercase description.
|
||||||
|
:param is_pitched: Whether the roof is pitched.
|
||||||
|
:param is_roof_room: Whether there is a room in the roof.
|
||||||
|
:param is_flat: Whether the roof is flat.
|
||||||
|
:return: Insulation thickness if found, else None.
|
||||||
|
"""
|
||||||
|
if "no insulation" in description_lower:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if is_pitched:
|
||||||
|
try:
|
||||||
|
thickness = description_lower.split("pitched,")[-1].split("mm")[0].strip()
|
||||||
|
if "+" in thickness:
|
||||||
|
return thickness
|
||||||
|
try:
|
||||||
|
return int(thickness)
|
||||||
|
except ValueError as int_error:
|
||||||
|
raise ValueError(int_error)
|
||||||
|
except ValueError as _:
|
||||||
|
if "invalid input" in description_lower:
|
||||||
|
return None
|
||||||
|
desc = description_lower.split("pitched,")[-1].strip().split(" ")[0]
|
||||||
|
return cls._search_split_description(desc)
|
||||||
|
|
||||||
|
if is_roof_room:
|
||||||
|
desc_split_lookup = {
|
||||||
|
"ceiling insulated": "average",
|
||||||
|
"thatched": "average",
|
||||||
|
}
|
||||||
|
# Just search for specific phrases
|
||||||
|
desc_split = description_lower.split("roof room(s),")[-1].strip()
|
||||||
|
res = desc_split_lookup.get(desc_split)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
|
||||||
|
desc = desc_split.split(" ")[0]
|
||||||
|
return cls._search_split_description(desc)
|
||||||
|
|
||||||
|
if is_flat:
|
||||||
|
# Just search for specific phrases
|
||||||
|
desc = description_lower.split("flat,")[-1].lstrip().split(" ")[0]
|
||||||
|
return cls._search_split_description(desc)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_thermal_transmittence(cls, description_lower: str) -> Tuple[Union[float, None], Union[str, None]]:
|
||||||
|
"""
|
||||||
|
Extracts thermal transmittance from the description.
|
||||||
|
|
||||||
|
:param description_lower: Lowercase description.
|
||||||
|
:return: Tuple containing U-value and unit.
|
||||||
|
"""
|
||||||
|
# Find U-value
|
||||||
|
u_value = re.search(cls.U_VALUE_REGEX, description_lower)
|
||||||
|
if u_value is not None:
|
||||||
|
u_value = float(u_value.group(1))
|
||||||
|
else:
|
||||||
|
u_value = None
|
||||||
|
|
||||||
|
# Find unit
|
||||||
|
unit = re.search(cls.UNIT_REGEX, description_lower)
|
||||||
|
if unit is not None:
|
||||||
|
unit = unit.group(1)
|
||||||
|
else:
|
||||||
|
unit = None
|
||||||
|
|
||||||
|
return u_value, unit
|
||||||
|
|
@ -3,6 +3,7 @@ import pickle
|
||||||
from epc_data.EpcClean import EpcClean
|
from epc_data.EpcClean import EpcClean
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from epc_data.tests.test_data.EpcClean_test_roof_cases import clean_roof_test_cases
|
from epc_data.tests.test_data.EpcClean_test_roof_cases import clean_roof_test_cases
|
||||||
|
from epc_data.cleaning.Roof import CleanRoof
|
||||||
|
|
||||||
# For local testing
|
# For local testing
|
||||||
if __file__ == "<input>":
|
if __file__ == "<input>":
|
||||||
|
|
@ -32,20 +33,20 @@ class TestEpcClean:
|
||||||
assert all([len(values) == 0 for values in self.cleaner.cleaned.values()])
|
assert all([len(values) == 0 for values in self.cleaner.cleaned.values()])
|
||||||
|
|
||||||
def test__search_split_roof_description(self):
|
def test__search_split_roof_description(self):
|
||||||
assert self.cleaner._search_split_roof_description("insulated") == "average"
|
assert CleanRoof._search_split_description("insulated") == "average"
|
||||||
assert self.cleaner._search_split_roof_description("limited") == "below average"
|
assert CleanRoof._search_split_description("limited") == "below average"
|
||||||
with pytest.raises(NotImplementedError):
|
with pytest.raises(NotImplementedError):
|
||||||
self.cleaner._search_split_roof_description("unknown")
|
CleanRoof._search_split_description("unknown")
|
||||||
|
|
||||||
def test__find_insulation_thickness(self):
|
def test__find_insulation_thickness(self):
|
||||||
assert self.cleaner._find_insulation_thickness("no insulation", False, False, False) == 0
|
assert CleanRoof._find_insulation_thickness("no insulation", False, False, False) == 0
|
||||||
|
|
||||||
def test__extract_thermal_transmittence(self):
|
def test__extract_thermal_transmittence(self):
|
||||||
description = "U-value of 2.3 w/m-¦k"
|
description = "U-value of 2.3 w/m-¦k"
|
||||||
assert self.cleaner._extract_thermal_transmittence(description) == (2.3, "w/m-¦k")
|
assert CleanRoof._extract_thermal_transmittence(description) == (2.3, "w/m-¦k")
|
||||||
|
|
||||||
def test_clean_roof(self):
|
def test_clean_roof(self):
|
||||||
result = self.cleaner.clean_roof('Pitched, 270 mm loft insulation')
|
result = CleanRoof('Pitched, 270 mm loft insulation').clean()
|
||||||
|
|
||||||
# change the expected output based on your requirement
|
# change the expected output based on your requirement
|
||||||
expected_output = {
|
expected_output = {
|
||||||
|
|
@ -66,7 +67,7 @@ class TestEpcClean:
|
||||||
assert result == expected_output
|
assert result == expected_output
|
||||||
|
|
||||||
for test_case in clean_roof_test_cases:
|
for test_case in clean_roof_test_cases:
|
||||||
result = self.cleaner.clean_roof(test_case['original_description'])
|
result = CleanRoof(test_case['original_description']).clean()
|
||||||
# Ensure the output ordering is correct
|
# Ensure the output ordering is correct
|
||||||
expected_result = {key: test_case[key] for key in result.keys()}
|
expected_result = {key: test_case[key] for key in result.keys()}
|
||||||
expected_result["desc"] = test_case["original_description"]
|
expected_result["desc"] = test_case["original_description"]
|
||||||
|
|
@ -74,7 +75,7 @@ class TestEpcClean:
|
||||||
assert result == expected_result
|
assert result == expected_result
|
||||||
|
|
||||||
def test_clean_roof_with_dwelling_above(self):
|
def test_clean_roof_with_dwelling_above(self):
|
||||||
result = self.cleaner.clean_roof('(another dwelling above)')
|
result = CleanRoof('(another dwelling above)').clean()
|
||||||
|
|
||||||
expected_output = {
|
expected_output = {
|
||||||
"is_valid": True,
|
"is_valid": True,
|
||||||
Loading…
Add table
Reference in a new issue