mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
refactored WallAttributes
This commit is contained in:
parent
40372c70d5
commit
7c4d115f5f
2 changed files with 41 additions and 174 deletions
|
|
@ -44,7 +44,7 @@ def handler():
|
|||
descriptions = {x["walls-description"] for x in data}
|
||||
out = []
|
||||
for description in descriptions:
|
||||
res = WallAttributes(description).clean()
|
||||
res = WallAttributes(description).process_description()
|
||||
out.append(
|
||||
{
|
||||
"original_description": description,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,8 @@
|
|||
from epc_data.attributes.attribute_utils import extract_thermal_transmittence
|
||||
from typing import Optional
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
import re
|
||||
from typing import Dict, Union
|
||||
|
||||
|
||||
class WallAttributes:
|
||||
IGNORE_STOP_WORDS = ["no"]
|
||||
|
||||
def __init__(self, description: str):
|
||||
"""
|
||||
|
|
@ -13,180 +10,50 @@ class WallAttributes:
|
|||
"""
|
||||
self.description: str = description
|
||||
|
||||
# TODO: Remove this out of here
|
||||
nltk.download('stopwords', quiet=True)
|
||||
self.stop_words = stopwords.words('english')
|
||||
self.stop_words = [word for word in self.stop_words if word not in self.IGNORE_STOP_WORDS]
|
||||
def process(self) -> Dict[str, Union[float, str, bool, None]]:
|
||||
result: Dict[str, Union[float, str, bool, None]] = {}
|
||||
description = self.description.lower()
|
||||
|
||||
def clean(self):
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
|
||||
description_lower = self.description.lower().strip()
|
||||
|
||||
thermal_transmittence: Optional[float] = None
|
||||
thermal_transmittence_unit: Optional[str] = None
|
||||
|
||||
is_cavity_wall: bool = "cavity wall" in description_lower
|
||||
has_filled_cavity: bool = "filled cavity" in description_lower
|
||||
is_solid_brick: bool = "solid brick" in description_lower
|
||||
# TODO: Find out what this means - is_system_built
|
||||
is_system_built: bool = "system built" in description_lower
|
||||
is_timber_frame: bool = "timber frame" in description_lower
|
||||
is_granite_or_whinstone: bool = "granite or whinstone" in description_lower
|
||||
# The "as built" description indicates that these factors are based on the original construction
|
||||
# specifications and materials.
|
||||
as_built = "as built" in description_lower
|
||||
assumed = "assumed" in description_lower
|
||||
|
||||
insulation_thickness = None
|
||||
if "thermal transmittance" in description_lower:
|
||||
thermal_transmittence, thermal_transmittence_unit = extract_thermal_transmittence(description_lower)
|
||||
|
||||
elif "insulation" in description_lower or "insulated" in description_lower:
|
||||
insulation_thickness = self._characterise_insulation(
|
||||
description_lower,
|
||||
is_cavity_wall,
|
||||
has_filled_cavity,
|
||||
is_solid_brick,
|
||||
as_built,
|
||||
assumed,
|
||||
is_system_built,
|
||||
is_timber_frame,
|
||||
is_granite_or_whinstone
|
||||
)
|
||||
elif is_cavity_wall and has_filled_cavity:
|
||||
# We can likely remove this branch of the if statement
|
||||
thermal_transmittence, thermal_transmittence_unit, = None, None
|
||||
# thermal transmittance - it can be negative which is errneous however we'll still pull it out
|
||||
match = re.search(r"average thermal transmittance (-?\d+\.\d+)\s(w/m-¦k)", description)
|
||||
if match:
|
||||
result['thermal_transmittance'] = float(match.group(1))
|
||||
result['thermal_transmittance_unit'] = match.group(2)
|
||||
else:
|
||||
result['thermal_transmittance'] = None
|
||||
result['thermal_transmittance_unit'] = None
|
||||
|
||||
raise Exception("H")
|
||||
|
||||
return self._make_output(
|
||||
thermal_transmittence=thermal_transmittence,
|
||||
thermal_transmittence_unit=thermal_transmittence_unit,
|
||||
is_solid_brick=is_solid_brick,
|
||||
insulation_thickness=insulation_thickness
|
||||
)
|
||||
|
||||
def _characterise_insulation(
|
||||
self,
|
||||
description_lower,
|
||||
is_cavity_wall,
|
||||
has_filled_cavity,
|
||||
is_solid_brick,
|
||||
as_built,
|
||||
assumed,
|
||||
is_system_built,
|
||||
is_timber_frame,
|
||||
is_granite_or_whinstone
|
||||
):
|
||||
|
||||
search_description = description_lower.replace("(assumed)", "").strip() if assumed else description_lower
|
||||
search_description = search_description.replace("as built,", "").strip() if as_built else search_description
|
||||
search_description = search_description.replace("system built,", "").strip() if \
|
||||
is_system_built else search_description
|
||||
search_description = search_description.replace("timber frame,", "").strip() if \
|
||||
is_timber_frame else search_description
|
||||
search_description = search_description.replace("granite or whinstone,", "").strip() if \
|
||||
is_granite_or_whinstone else search_description
|
||||
search_description = search_description.replace("cavity wall,", "").strip() if \
|
||||
is_cavity_wall else search_description
|
||||
search_description = search_description.replace("filled cavity", "").strip() if \
|
||||
has_filled_cavity else search_description
|
||||
|
||||
characterisation_map = {
|
||||
"external": "external",
|
||||
"internal": "internal"
|
||||
}
|
||||
# wall type
|
||||
result['is_cavity_wall'] = 'cavity wall' in description
|
||||
result['has_filled_cavity'] = 'filled cavity' in description
|
||||
result['is_solid_brick'] = 'solid brick' in description
|
||||
result['is_system_built'] = 'system built' in description
|
||||
result['is_timber_frame'] = 'timber frame' in description
|
||||
result['is_granite_or_whinstone'] = 'granite' in description or 'whinstone' in description
|
||||
result['as_built'] = 'as built' in description
|
||||
result['is_cob'] = 'cob' in description
|
||||
result['assumed'] = 'assumed' in description
|
||||
result['is_sandstone_or_limestone'] = 'sandstone or limestone' in description
|
||||
|
||||
# insulation thickness - this is far from a perfect approach and we'd likely need to use nlp to do this
|
||||
# generally however this is sufficient for mvp
|
||||
thickness_map = {
|
||||
"external": "average",
|
||||
"internal": "average",
|
||||
"partial": "below average",
|
||||
"no": "none",
|
||||
# TODO: CHECK IF ADDITIONAL = ABOVE AVERAGE
|
||||
"additional": "above average"
|
||||
"external insulation": "average",
|
||||
"internal insulation": "average",
|
||||
"partial insulation": "below average",
|
||||
"no insulation": "none",
|
||||
"additional insulation": "above average",
|
||||
"insulated": "average"
|
||||
}
|
||||
|
||||
insulation_term = "insulation" if "insulation" in search_description else "insulated"
|
||||
|
||||
search_description = search_description.replace(insulation_term, "").strip()
|
||||
|
||||
# TODO: We might not need all these if statements..
|
||||
if is_cavity_wall:
|
||||
|
||||
if search_description == "":
|
||||
insulation_thickness = "average"
|
||||
insulation_characteristic = None
|
||||
else:
|
||||
insulation_characteristic = characterisation_map.get(
|
||||
search_description.split(" ")[-1]
|
||||
)
|
||||
|
||||
insulation_thickness = [k for k in thickness_map if k in search_description]
|
||||
if not insulation_thickness or len(insulation_thickness) > 1:
|
||||
raise Exception("Check me out")
|
||||
|
||||
insulation_thickness = thickness_map.get(insulation_thickness[0])
|
||||
|
||||
if not insulation_thickness:
|
||||
raise NotImplementedError("Implement me! - insulation_thickness")
|
||||
elif is_solid_brick:
|
||||
desc_split = search_description.split("solid brick,")[-1].strip().split("as built,")[-1]
|
||||
|
||||
if desc_split == "":
|
||||
insulation_thickness = "average"
|
||||
insulation_characteristic = None
|
||||
else:
|
||||
insulation_thickness = thickness_map.get(desc_split.split(insulation_term)[0].strip())
|
||||
if not insulation_thickness:
|
||||
insulation_thickness = "average"
|
||||
|
||||
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
|
||||
|
||||
elif is_system_built:
|
||||
if search_description == "":
|
||||
insulation_thickness = "average"
|
||||
insulation_characteristic = None
|
||||
else:
|
||||
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
|
||||
insulation_thickness = [k for k in thickness_map if k in search_description]
|
||||
if not insulation_thickness or len(insulation_thickness) > 1:
|
||||
raise Exception("Check me out")
|
||||
|
||||
insulation_thickness = thickness_map.get(insulation_thickness[0])
|
||||
elif is_timber_frame:
|
||||
if search_description == "":
|
||||
insulation_thickness = "average"
|
||||
insulation_characteristic = None
|
||||
else:
|
||||
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
|
||||
|
||||
insulation_thickness = [k for k in thickness_map if k in search_description]
|
||||
if not insulation_thickness or len(insulation_thickness) > 1:
|
||||
raise Exception("Check me out")
|
||||
|
||||
insulation_thickness = thickness_map.get(insulation_thickness[0])
|
||||
|
||||
for key, value in thickness_map.items():
|
||||
if key in description:
|
||||
result['insulation_thickness'] = value
|
||||
break
|
||||
else:
|
||||
insulation_characteristic = characterisation_map.get(search_description.split(" ")[-1])
|
||||
result['insulation_thickness'] = None
|
||||
|
||||
insulation_thickness = [k for k in thickness_map if k in search_description]
|
||||
if not insulation_thickness or len(insulation_thickness) > 1:
|
||||
raise Exception("Check me out")
|
||||
# insulation type
|
||||
result['external_insulation'] = 'external insulation' in description
|
||||
result['internal_insulation'] = 'internal insulation' in description
|
||||
|
||||
insulation_thickness = thickness_map.get(insulation_thickness[0])
|
||||
|
||||
return insulation_thickness, insulation_characteristic
|
||||
|
||||
@staticmethod
|
||||
def _make_output(thermal_transmittence, thermal_transmittence_unit, is_solid_brick, insulation_thickness):
|
||||
return {
|
||||
"thermal_transmittence": thermal_transmittence,
|
||||
"thermal_transmittence_unit": thermal_transmittence_unit,
|
||||
"is_solid_brick": is_solid_brick,
|
||||
"insulation_thickness": insulation_thickness
|
||||
}
|
||||
return result
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue