refactored WallAttributes

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-13 10:26:10 +01:00
parent 40372c70d5
commit 7c4d115f5f
2 changed files with 41 additions and 174 deletions

View file

@ -44,7 +44,7 @@ def handler():
descriptions = {x["walls-description"] for x in data}
out = []
for description in descriptions:
res = WallAttributes(description).clean()
res = WallAttributes(description).process_description()
out.append(
{
"original_description": description,

View file

@ -1,11 +1,8 @@
from epc_data.attributes.attribute_utils import extract_thermal_transmittence
from typing import Optional
import nltk
from nltk.corpus import stopwords
import re
from typing import Dict, Union
class WallAttributes:
IGNORE_STOP_WORDS = ["no"]
def __init__(self, description: str):
"""
@ -13,180 +10,50 @@ class WallAttributes:
"""
self.description: str = description
# TODO: Remove this out of here
nltk.download('stopwords', quiet=True)
self.stop_words = stopwords.words('english')
self.stop_words = [word for word in self.stop_words if word not in self.IGNORE_STOP_WORDS]
def process(self) -> Dict[str, Union[float, str, bool, None]]:
result: Dict[str, Union[float, str, bool, None]] = {}
description = self.description.lower()
def clean(self):
"""
:return:
"""
description_lower = self.description.lower().strip()
thermal_transmittence: Optional[float] = None
thermal_transmittence_unit: Optional[str] = None
is_cavity_wall: bool = "cavity wall" in description_lower
has_filled_cavity: bool = "filled cavity" in description_lower
is_solid_brick: bool = "solid brick" in description_lower
# TODO: Find out what this means - is_system_built
is_system_built: bool = "system built" in description_lower
is_timber_frame: bool = "timber frame" in description_lower
is_granite_or_whinstone: bool = "granite or whinstone" in description_lower
# The "as built" description indicates that these factors are based on the original construction
# specifications and materials.
as_built = "as built" in description_lower
assumed = "assumed" in description_lower
insulation_thickness = None
if "thermal transmittance" in description_lower:
thermal_transmittence, thermal_transmittence_unit = extract_thermal_transmittence(description_lower)
elif "insulation" in description_lower or "insulated" in description_lower:
insulation_thickness = self._characterise_insulation(
description_lower,
is_cavity_wall,
has_filled_cavity,
is_solid_brick,
as_built,
assumed,
is_system_built,
is_timber_frame,
is_granite_or_whinstone
)
elif is_cavity_wall and has_filled_cavity:
# We can likely remove this branch of the if statement
thermal_transmittence, thermal_transmittence_unit, = None, None
# thermal transmittance - it can be negative which is errneous however we'll still pull it out
match = re.search(r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)", description)
if match:
result['thermal_transmittance'] = float(match.group(1))
result['thermal_transmittance_unit'] = match.group(2)
else:
result['thermal_transmittance'] = None
result['thermal_transmittance_unit'] = None
raise Exception("H")
return self._make_output(
thermal_transmittence=thermal_transmittence,
thermal_transmittence_unit=thermal_transmittence_unit,
is_solid_brick=is_solid_brick,
insulation_thickness=insulation_thickness
)
def _characterise_insulation(
self,
description_lower,
is_cavity_wall,
has_filled_cavity,
is_solid_brick,
as_built,
assumed,
is_system_built,
is_timber_frame,
is_granite_or_whinstone
):
search_description = description_lower.replace("(assumed)", "").strip() if assumed else description_lower
search_description = search_description.replace("as built,", "").strip() if as_built else search_description
search_description = search_description.replace("system built,", "").strip() if \
is_system_built else search_description
search_description = search_description.replace("timber frame,", "").strip() if \
is_timber_frame else search_description
search_description = search_description.replace("granite or whinstone,", "").strip() if \
is_granite_or_whinstone else search_description
search_description = search_description.replace("cavity wall,", "").strip() if \
is_cavity_wall else search_description
search_description = search_description.replace("filled cavity", "").strip() if \
has_filled_cavity else search_description
characterisation_map = {
"external": "external",
"internal": "internal"
}
# wall type
result['is_cavity_wall'] = 'cavity wall' in description
result['has_filled_cavity'] = 'filled cavity' in description
result['is_solid_brick'] = 'solid brick' in description
result['is_system_built'] = 'system built' in description
result['is_timber_frame'] = 'timber frame' in description
result['is_granite_or_whinstone'] = 'granite' in description or 'whinstone' in description
result['as_built'] = 'as built' in description
result['is_cob'] = 'cob' in description
result['assumed'] = 'assumed' in description
result['is_sandstone_or_limestone'] = 'sandstone or limestone' in description
# insulation thickness - this is far from a perfect approach and we'd likely need to use nlp to do this
# generally however this is sufficient for mvp
thickness_map = {
"external": "average",
"internal": "average",
"partial": "below average",
"no": "none",
# TODO: CHECK IF ADDITIONAL = ABOVE AVERAGE
"additional": "above average"
"external insulation": "average",
"internal insulation": "average",
"partial insulation": "below average",
"no insulation": "none",
"additional insulation": "above average",
"insulated": "average"
}
insulation_term = "insulation" if "insulation" in search_description else "insulated"
search_description = search_description.replace(insulation_term, "").strip()
# TODO: We might not need all these if statements..
if is_cavity_wall:
if search_description == "":
insulation_thickness = "average"
insulation_characteristic = None
else:
insulation_characteristic = characterisation_map.get(
search_description.split(" ")[-1]
)
insulation_thickness = [k for k in thickness_map if k in search_description]
if not insulation_thickness or len(insulation_thickness) > 1:
raise Exception("Check me out")
insulation_thickness = thickness_map.get(insulation_thickness[0])
if not insulation_thickness:
raise NotImplementedError("Implement me! - insulation_thickness")
elif is_solid_brick:
desc_split = search_description.split("solid brick,")[-1].strip().split("as built,")[-1]
if desc_split == "":
insulation_thickness = "average"
insulation_characteristic = None
else:
insulation_thickness = thickness_map.get(desc_split.split(insulation_term)[0].strip())
if not insulation_thickness:
insulation_thickness = "average"
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
elif is_system_built:
if search_description == "":
insulation_thickness = "average"
insulation_characteristic = None
else:
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
insulation_thickness = [k for k in thickness_map if k in search_description]
if not insulation_thickness or len(insulation_thickness) > 1:
raise Exception("Check me out")
insulation_thickness = thickness_map.get(insulation_thickness[0])
elif is_timber_frame:
if search_description == "":
insulation_thickness = "average"
insulation_characteristic = None
else:
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
insulation_thickness = [k for k in thickness_map if k in search_description]
if not insulation_thickness or len(insulation_thickness) > 1:
raise Exception("Check me out")
insulation_thickness = thickness_map.get(insulation_thickness[0])
for key, value in thickness_map.items():
if key in description:
result['insulation_thickness'] = value
break
else:
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
result['insulation_thickness'] = None
insulation_thickness = [k for k in thickness_map if k in search_description]
if not insulation_thickness or len(insulation_thickness) > 1:
raise Exception("Check me out")
# insulation type
result['external_insulation'] = 'external insulation' in description
result['internal_insulation'] = 'internal insulation' in description
insulation_thickness = thickness_map.get(insulation_thickness[0])
return insulation_thickness, insulation_characteristic
@staticmethod
def _make_output(thermal_transmittence, thermal_transmittence_unit, is_solid_brick, insulation_thickness):
return {
"thermal_transmittence": thermal_transmittence,
"thermal_transmittence_unit": thermal_transmittence_unit,
"is_solid_brick": is_solid_brick,
"insulation_thickness": insulation_thickness
}
return result